From dc9df2a676685f217081ddfaea6f8b5709a5bca0 Mon Sep 17 00:00:00 2001 From: "Matthew R. Norman" Date: Wed, 24 Feb 2016 07:46:13 -0500 Subject: [PATCH 01/27] preqx_acc target added to HOMME Restructured OpenACC code to be added through cmake targets. -Added the preqx_acc directory -Changed bndry_mod, derivative_mod, edge_mod, prim_advection_mod, viscosity_mod to have a mod_base in src/share -Added module files for those modules in src/preqx that "inherit" the base modules as a template for new solvers -Added module files for those modules in src/preqx_acc that "inherit" base modules and add / overwrite with OpenACC functionality -Separated prim_advection_mod and vertremap_mod into their own files. -Added solver_init_mod for all solvers to give OpenACC the ability to place things on the GPU. -Changed /CMakeLists.txt to recognize preqx_acc and BUILD_PREQX_ACC in the cmake process Tested, and both preqx and preqx_acc build successfully. --- components/homme/CMakeLists.txt | 5 + .../compile_scripts/thatchroof/compile.cpu | 0 .../thatchroof/compile.openacc | 12 +- .../thatchroof/customFlags.cmake.openacc | 17 - components/homme/src/preqx/CMakeLists.txt | 25 +- components/homme/src/preqx/bndry_mod.F90 | 8 + components/homme/src/preqx/derivative_mod.F90 | 8 + components/homme/src/preqx/edge_mod.F90 | 8 + .../homme/src/preqx/prim_advection_mod.F90 | 8 + .../homme/src/preqx/solver_init_mod.F90 | 8 + components/homme/src/preqx/vertremap_mod.F90 | 8 + components/homme/src/preqx/viscosity_mod.F90 | 8 + components/homme/src/preqx_acc/CMakeLists.txt | 171 ++++ .../bndry_mod.F90} | 28 +- .../homme/src/preqx_acc/config.h.cmake.in | 61 ++ .../derivative_mod.F90} | 51 +- .../edge_mod.F90} | 59 +- .../openacc_utils_mod.F90 | 24 - .../prim_advection_mod.F90} | 114 ++- .../homme/src/preqx_acc/solver_init_mod.F90 | 39 + .../homme/src/preqx_acc/vertremap_mod.F90 | 8 + .../viscosity_mod.F90} | 58 +- .../{bndry_mod.F90 => bndry_mod_base.F90} | 4 +- ...vative_mod.F90 => derivative_mod_base.F90} | 4 +- .../share/{edge_mod.F90 => edge_mod_base.F90} | 4 +- ...on_mod.F90 => prim_advection_mod_base.F90} | 852 +----------------- .../homme/src/share/prim_driver_mod.F90 | 13 +- .../homme/src/share/solver_init_mod_base.F90 | 25 + .../homme/src/share/vertremap_mod_base.F90 | 767 ++++++++++++++++ ...scosity_mod.F90 => viscosity_mod_base.F90} | 4 +- .../openacc_work/thatchroof.cpu.job | 2 +- .../openacc_work/thatchroof.openacc.job | 8 +- 32 files changed, 1370 insertions(+), 1041 deletions(-) mode change 100644 => 100755 components/homme/compile_scripts/thatchroof/compile.cpu mode change 100644 => 100755 components/homme/compile_scripts/thatchroof/compile.openacc create mode 100644 components/homme/src/preqx/bndry_mod.F90 create mode 100644 components/homme/src/preqx/derivative_mod.F90 create mode 100644 components/homme/src/preqx/edge_mod.F90 create mode 100644 components/homme/src/preqx/prim_advection_mod.F90 create mode 100644 components/homme/src/preqx/solver_init_mod.F90 create mode 100644 components/homme/src/preqx/vertremap_mod.F90 create mode 100644 components/homme/src/preqx/viscosity_mod.F90 create mode 100644 components/homme/src/preqx_acc/CMakeLists.txt rename components/homme/src/{share/openacc/bndry_openacc_mod.F90 => preqx_acc/bndry_mod.F90} (97%) create mode 100644 components/homme/src/preqx_acc/config.h.cmake.in rename components/homme/src/{share/openacc/derivative_openacc_mod.F90 => preqx_acc/derivative_mod.F90} (84%) rename components/homme/src/{share/openacc/edge_openacc_mod.F90 => preqx_acc/edge_mod.F90} (92%) rename components/homme/src/{share/openacc => preqx_acc}/openacc_utils_mod.F90 (77%) rename components/homme/src/{share/openacc/prim_advection_openacc_mod.F90 => preqx_acc/prim_advection_mod.F90} (89%) create mode 100644 components/homme/src/preqx_acc/solver_init_mod.F90 create mode 100644 components/homme/src/preqx_acc/vertremap_mod.F90 rename components/homme/src/{share/openacc/viscosity_openacc_mod.F90 => preqx_acc/viscosity_mod.F90} (59%) rename components/homme/src/share/{bndry_mod.F90 => bndry_mod_base.F90} (99%) rename components/homme/src/share/{derivative_mod.F90 => derivative_mod_base.F90} (99%) rename components/homme/src/share/{edge_mod.F90 => edge_mod_base.F90} (99%) rename components/homme/src/share/{prim_advection_mod.F90 => prim_advection_mod_base.F90} (73%) create mode 100644 components/homme/src/share/solver_init_mod_base.F90 create mode 100644 components/homme/src/share/vertremap_mod_base.F90 rename components/homme/src/share/{viscosity_mod.F90 => viscosity_mod_base.F90} (99%) mode change 100644 => 100755 components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job mode change 100644 => 100755 components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index d3458a68476..e226bad7043 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -56,6 +56,7 @@ ENDIF () # Some options for conditional builds OPTION(BUILD_HOMME_SWEQX "Shallow water equations FEM" ON) OPTION(BUILD_HOMME_PREQX "Primitive equations FEM" ON) +OPTION(BUILD_HOMME_PREQX_ACC "Primitive equations FEM with OpenACC" OFF) OPTION(BUILD_HOMME_SWDGX "Shallow water equations DG" OFF) OPTION(BUILD_HOMME_PRIMDGX "Primitive equations DG" OFF) OPTION(BUILD_HOMME_SWIM "Shallow water equations implicit" OFF) @@ -175,6 +176,10 @@ IF(${BUILD_HOMME_PREQX}) ADD_SUBDIRECTORY(src/preqx) ENDIF() +IF(${BUILD_HOMME_PREQX_ACC}) + ADD_SUBDIRECTORY(src/preqx_acc) +ENDIF() + IF(${BUILD_HOMME_SWDGX}) ADD_SUBDIRECTORY(src/swdgx) ENDIF() diff --git a/components/homme/compile_scripts/thatchroof/compile.cpu b/components/homme/compile_scripts/thatchroof/compile.cpu old mode 100644 new mode 100755 diff --git a/components/homme/compile_scripts/thatchroof/compile.openacc b/components/homme/compile_scripts/thatchroof/compile.openacc old mode 100644 new mode 100755 index c7b896db37e..b0f055ea508 --- a/components/homme/compile_scripts/thatchroof/compile.openacc +++ b/components/homme/compile_scripts/thatchroof/compile.openacc @@ -20,7 +20,7 @@ cmake \ -DCMAKE_Fortran_COMPILER=mpif90 \ -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_CXX_COMPILER=mpiCC \ - -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \ + -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \ -DDEBUG_FLAGS=" " \ -DNETCDF_DIR=$NETCDF_DIR \ -DWITH_PNETCDF=FALSE \ @@ -30,6 +30,8 @@ cmake \ -DBUILD_HOMME_SWDGX=FALSE \ -DBUILD_HOMME_SWEQX=FALSE \ -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX=FALSE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ -DENABLE_OPENACC=TRUE \ @@ -39,11 +41,11 @@ cmake \ -DZLIB_DIR=$ZLIB_PATH \ -DSZIP_DIR=$SZIP_PATH \ -DCURL_DIR=/opt/curl-7.45_pgi15.9 \ - -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.0,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl' \ + -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.5,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl' \ \ \ \ - -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts//thatchroof/customFlags.cmake.openacc \ + -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/thatchroof/customFlags.cmake.openacc \ $HOMME_ROOT fi # -DOPT_FLAGS="-fastsse -Mvect" \ @@ -54,7 +56,7 @@ make clean || exit -1 fi if [ $do_make -eq 1 ]; then -make -j16 preqx || exit -1 +make -j16 preqx_acc || exit -1 mkdir -p $HOMME_ROOT/build/preqx -cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 +cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 fi diff --git a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc index 12506a81cb6..248d7c27be8 100644 --- a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc +++ b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc @@ -1,13 +1,6 @@ SET(CUSTOM_FLAG_FILES ${HOMME_SOURCE_DIR}/physics/column_model_mod.F90 ${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 - ) SET_SOURCE_FILES_PROPERTIES( @@ -17,14 +10,4 @@ SET_SOURCE_FILES_PROPERTIES( " -Mpreprocess -Mextend -Mflushz -O0 -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin" ) -SET_SOURCE_FILES_PROPERTIES( - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 - PROPERTIES GENERATED TRUE COMPILE_FLAGS - " -Mpreprocess -Mextend -Mflushz -fast -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape" -) diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt index c28852c3005..bb59fe33d9c 100644 --- a/components/homme/src/preqx/CMakeLists.txt +++ b/components/homme/src/preqx/CMakeLists.txt @@ -16,12 +16,20 @@ SET (PREQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS} ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx") SET(PREQX_SRCS_F90 - ${SRC_SHARE_DIR}/openacc/bndry_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/derivative_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/edge_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/openacc_utils_mod.F90 - ${SRC_SHARE_DIR}/openacc/prim_advection_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/viscosity_openacc_mod.F90 + ${SRC_SHARE_DIR}/bndry_mod_base.F90 + ${SRC_SHARE_DIR}/derivative_mod_base.F90 + ${SRC_SHARE_DIR}/edge_mod_base.F90 + ${SRC_SHARE_DIR}/prim_advection_mod_base.F90 + ${SRC_SHARE_DIR}/solver_init_mod_base.F90 + ${SRC_SHARE_DIR}/vertremap_mod_base.F90 + ${SRC_SHARE_DIR}/viscosity_mod_base.F90 + ${SRC_DIR}/preqx/bndry_mod.F90 + ${SRC_DIR}/preqx/derivative_mod.F90 + ${SRC_DIR}/preqx/edge_mod.F90 + ${SRC_DIR}/preqx/prim_advection_mod.F90 + ${SRC_DIR}/preqx/solver_init_mod.F90 + ${SRC_DIR}/preqx/vertremap_mod.F90 + ${SRC_DIR}/preqx/viscosity_mod.F90 ${SRC_SHARE_DIR}/arch_switch_mod.F90 ${SRC_SHARE_DIR}/kinds.F90 ${SRC_SHARE_DIR}/dimensions_mod.F90 @@ -30,7 +38,6 @@ SET(PREQX_SRCS_F90 ${SRC_SHARE_DIR}/coordinate_systems_mod.F90 ${SRC_SHARE_DIR}/params_mod.F90 ${SRC_SHARE_DIR}/gridgraph_mod.F90 - ${SRC_SHARE_DIR}/edge_mod.F90 ${SRC_SHARE_DIR}/element_mod.F90 ${SRC_SHARE_DIR}/parallel_mod.F90 ${SRC_SHARE_DIR}/hybrid_mod.F90 @@ -43,9 +50,6 @@ SET(PREQX_SRCS_F90 ${SRC_SHARE_DIR}/schedtype_mod.F90 ${SRC_SHARE_DIR}/edgetype_mod.F90 ${SRC_SHARE_DIR}/thread_mod.F90 - ${SRC_SHARE_DIR}/bndry_mod.F90 - ${SRC_SHARE_DIR}/derivative_mod.F90 - ${SRC_SHARE_DIR}/viscosity_mod.F90 ${SRC_SHARE_DIR}/unit_tests_mod.F90 ${SRC_SHARE_DIR}/spacecurve_mod.F90 ${SRC_SHARE_DIR}/cube_mod.F90 @@ -93,7 +97,6 @@ SET(PREQX_SRCS_F90 ${SRC_DIR}/prim_restart_mod.F90 ${SRC_SHARE_DIR}/prim_si_ref_mod.F90 ${SRC_SHARE_DIR}/prim_advance_mod.F90 - ${SRC_SHARE_DIR}/prim_advection_mod.F90 ${SRC_DIR}/forcing_mod.F90 ${SRC_SHARE_DIR}/metis_mod.F90 ${SRC_SHARE_DIR}/prim_driver_mod.F90 diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90 new file mode 100644 index 00000000000..2ef89f27f70 --- /dev/null +++ b/components/homme/src/preqx/bndry_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module bndry_mod + use bndry_mod_base + implicit none +end module bndry_mod diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90 new file mode 100644 index 00000000000..461fe9c8323 --- /dev/null +++ b/components/homme/src/preqx/derivative_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module derivative_mod + use derivative_mod_base + implicit none +end module derivative_mod diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90 new file mode 100644 index 00000000000..c7f39fe20f6 --- /dev/null +++ b/components/homme/src/preqx/edge_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module edge_mod + use edge_mod_base + implicit none +end module edge_mod diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90 new file mode 100644 index 00000000000..948ec0ad3fc --- /dev/null +++ b/components/homme/src/preqx/prim_advection_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module prim_advection_mod + use prim_advection_mod_base + implicit none +end module prim_advection_mod diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90 new file mode 100644 index 00000000000..224928eeb25 --- /dev/null +++ b/components/homme/src/preqx/solver_init_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod + use solver_init_mod_base + implicit none +end module solver_init_mod diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90 new file mode 100644 index 00000000000..b9e537a6dcf --- /dev/null +++ b/components/homme/src/preqx/vertremap_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module vertremap_mod + use vertremap_mod_base + implicit none +end module vertremap_mod diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90 new file mode 100644 index 00000000000..80e849ae94c --- /dev/null +++ b/components/homme/src/preqx/viscosity_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module viscosity_mod + use viscosity_mod_base + implicit none +end module viscosity_mod diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt new file mode 100644 index 00000000000..ca97ca51e1b --- /dev/null +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -0,0 +1,171 @@ +# Relevant Directories within the HOMME repository +# holding source files needed for building the library +# and executable preqx. + + + +SET(UTILS_SHARE_DIR ${HOMME_SOURCE_DIR}/utils/csm_share) +SET(SRC_DIR ${HOMME_SOURCE_DIR}/src) +SET(SRC_SHARE_DIR ${HOMME_SOURCE_DIR}/src/share) +SET(PHYSICS_DIR ${HOMME_SOURCE_DIR}/physics) +SET(UTILS_TIMING_DIR ${CMAKE_BINARY_DIR}/utils/timing) + +# Make INCLUDE_DIRS global so the tests can access it +SET (PREQX_ACC_INCLUDE_DIRS ${PIO_INCLUDE_DIRS} + ${UTILS_TIMING_DIR} + ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx") + +SET(PREQX_SRCS_F90 + ${SRC_SHARE_DIR}/bndry_mod_base.F90 + ${SRC_SHARE_DIR}/derivative_mod_base.F90 + ${SRC_SHARE_DIR}/edge_mod_base.F90 + ${SRC_SHARE_DIR}/prim_advection_mod_base.F90 + ${SRC_SHARE_DIR}/solver_init_mod_base.F90 + ${SRC_SHARE_DIR}/vertremap_mod_base.F90 + ${SRC_SHARE_DIR}/viscosity_mod_base.F90 + ${SRC_DIR}/preqx_acc/bndry_mod.F90 + ${SRC_DIR}/preqx_acc/derivative_mod.F90 + ${SRC_DIR}/preqx_acc/edge_mod.F90 + ${SRC_DIR}/preqx_acc/openacc_utils_mod.F90 + ${SRC_DIR}/preqx_acc/prim_advection_mod.F90 + ${SRC_DIR}/preqx_acc/solver_init_mod.F90 + ${SRC_DIR}/preqx_acc/vertremap_mod.F90 + ${SRC_DIR}/preqx_acc/viscosity_mod.F90 + ${SRC_SHARE_DIR}/arch_switch_mod.F90 + ${SRC_SHARE_DIR}/kinds.F90 + ${SRC_SHARE_DIR}/dimensions_mod.F90 + ${SRC_SHARE_DIR}/control_mod.F90 + ${SRC_SHARE_DIR}/physical_constants.F90 + ${SRC_SHARE_DIR}/coordinate_systems_mod.F90 + ${SRC_SHARE_DIR}/params_mod.F90 + ${SRC_SHARE_DIR}/gridgraph_mod.F90 + ${SRC_SHARE_DIR}/element_mod.F90 + ${SRC_SHARE_DIR}/parallel_mod.F90 + ${SRC_SHARE_DIR}/hybrid_mod.F90 + ${SRC_SHARE_DIR}/reduction_mod.F90 + ${SRC_SHARE_DIR}/quadrature_mod.F90 + ${SRC_SHARE_DIR}/global_norms_mod.F90 + ${SRC_SHARE_DIR}/ll_mod.F90 + ${SRC_SHARE_DIR}/metagraph_mod.F90 + ${SRC_SHARE_DIR}/schedule_mod.F90 + ${SRC_SHARE_DIR}/schedtype_mod.F90 + ${SRC_SHARE_DIR}/edgetype_mod.F90 + ${SRC_SHARE_DIR}/thread_mod.F90 + ${SRC_SHARE_DIR}/unit_tests_mod.F90 + ${SRC_SHARE_DIR}/spacecurve_mod.F90 + ${SRC_SHARE_DIR}/cube_mod.F90 + ${SRC_SHARE_DIR}/interpolate_mod.F90 + ${SRC_SHARE_DIR}/fvm_bsp_mod.F90 + ${SRC_SHARE_DIR}/fvm_transformation_mod.F90 + ${SRC_SHARE_DIR}/dof_mod.F90 + ${SRC_DIR}/common_io_mod.F90 + ${SRC_SHARE_DIR}/spelt_mod.F90 + ${SRC_SHARE_DIR}/fvm_mod.F90 + ${SRC_SHARE_DIR}/fvm_control_volume_mod.F90 + ${SRC_SHARE_DIR}/fvm_line_integrals_mod.F90 + ${SRC_SHARE_DIR}/fvm_line_integrals_flux_mod.F90 + ${SRC_SHARE_DIR}/fvm_analytic_mod.F90 + ${SRC_SHARE_DIR}/fvm_filter_mod.F90 + ${SRC_SHARE_DIR}/fvm_reconstruction_mod.F90 + ${SRC_DIR}/pio_io_mod.F90 + ${SRC_DIR}/repro_sum_mod.F90 + ${SRC_SHARE_DIR}/time_mod.F90 + ${SRC_SHARE_DIR}/physics_mod.F90 + ${SRC_SHARE_DIR}/hybvcoord_mod.F90 + ${SRC_DIR}/interp_movie_mod.F90 + ${UTILS_SHARE_DIR}/shr_kind_mod.F90 + ${UTILS_SHARE_DIR}/shr_mpi_mod.F90 + ${UTILS_SHARE_DIR}/shr_sys_mod.F90 + ${UTILS_SHARE_DIR}/shr_file_mod.F90 + ${UTILS_SHARE_DIR}/shr_vmath_mod.F90 + ${UTILS_SHARE_DIR}/shr_const_mod.F90 + ${UTILS_SHARE_DIR}/shr_spfn_mod.F90 + ${SRC_SHARE_DIR}/domain_mod.F90 + ${SRC_DIR}/restart_io_mod.F90 + ${SRC_SHARE_DIR}/cg_mod.F90 + ${SRC_SHARE_DIR}/linear_algebra_mod.F90 + ${SRC_SHARE_DIR}/solver_mod.F90 + ${SRC_SHARE_DIR}/filter_mod.F90 + ${SRC_DIR}/common_movie_mod.F90 + ${SRC_SHARE_DIR}/namelist_mod.F90 + ${SRC_SHARE_DIR}/mass_matrix_mod.F90 + ${SRC_DIR}/checksum_mod.F90 + ${SRC_DIR}/baroclinic_inst_mod.F90 + ${SRC_SHARE_DIR}/diffusion_mod.F90 + ${SRC_SHARE_DIR}/prim_si_mod.F90 + ${SRC_DIR}/asp_tests.F90 + ${SRC_SHARE_DIR}/prim_state_mod.F90 + ${SRC_DIR}/prim_restart_mod.F90 + ${SRC_SHARE_DIR}/prim_si_ref_mod.F90 + ${SRC_SHARE_DIR}/prim_advance_mod.F90 + ${SRC_DIR}/forcing_mod.F90 + ${SRC_SHARE_DIR}/metis_mod.F90 + ${SRC_SHARE_DIR}/prim_driver_mod.F90 + ${SRC_DIR}/vertical_mod.F90 + ${SRC_DIR}/prim_movie_mod.F90 + ${SRC_DIR}/surfaces_mod.F90 + ${SRC_DIR}/interpolate_driver_mod.F90 + ${SRC_DIR}/netcdf_io_mod.F90 + ${SRC_SHARE_DIR}/mesh_mod.F90 + ${SRC_SHARE_DIR}/Manager.F90 + ${SRC_DIR}/prim_main.F90 + ${SRC_DIR}/ref_state_mod.F90 + ${PHYSICS_DIR}/physics_types_mod.F90 + ${PHYSICS_DIR}/physics_io_mod.F90 + ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 + ${PHYSICS_DIR}/column_model_mod.F90 + ${PHYSICS_DIR}/column_types_mod.F90 +) + +SET(PREQX_SRCS_C + ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c + ${SRC_DIR}/jrio.c +) + +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () + +# Make SRCS global so the tests can access it +SET(PREQX_ACC_SRCS ${PREQX_SRCS} ${PREQX_SRCS_C} ${PREQX_SRCS_F90} CACHE INTERNAL "Sources for preqx") + +# Set up defaults +IF (NOT PREQX_NP) + SET (PREQX_NP 4) +ENDIF () +IF (NOT PREQX_PLEV) + SET (PREQX_PLEV 20) +ENDIF () +IF (NOT PREQX_USE_PIO) + SET (PREQX_USE_PIO FALSE) +ENDIF () +IF (NOT PREQX_USE_ENERGY) + SET (PREQX_USE_ENERGY FALSE) +ENDIF () +IF (NOT PREQX_NC) + SET (PREQX_NC 4) +ENDIF () +IF (PREQX_QSIZE_D) + SET (QSIZE_D ${PREQX_QSIZE_D}) +ENDIF () + +MESSAGE(STATUS "Building preqx with:") +MESSAGE(STATUS " NP = ${PREQX_NP}") +MESSAGE(STATUS " NC = ${PREQX_NC}") +MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") +MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") +MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") + +IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) + SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") +ENDIF () + +# The following macro handles the compilation with the appropriate variables +############################################################################ +# createTestExec(exec_name exec_type NP PLEV USE_PIO USE_ENERGY) +############################################################################ +createTestExec(preqx_acc preqx_acc ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} + ${PREQX_USE_PIO} ${PREQX_USE_ENERGY}) + + diff --git a/components/homme/src/share/openacc/bndry_openacc_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90 similarity index 97% rename from components/homme/src/share/openacc/bndry_openacc_mod.F90 rename to components/homme/src/preqx_acc/bndry_mod.F90 index 783f884c00f..b0d86e73ec2 100644 --- a/components/homme/src/share/openacc/bndry_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/bndry_mod.F90 @@ -3,14 +3,16 @@ #include "config.h" #endif -module bndry_openacc_mod -#if USE_OPENACC - use kinds , only: real_kind +module bndry_mod + use bndry_mod_base + use parallel_mod, only : syncmp,parallel_t,abortmp,iam + use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t + use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads + use kinds, only: real_kind implicit none - private - integer, parameter :: maxCycles = 20 - integer, parameter :: maxChunks = 64 - real(kind=real_kind), parameter :: chunk_denom = 1.e5 + integer, parameter, private :: maxCycles = 20 + integer, parameter, private :: maxChunks = 64 + real(kind=real_kind), parameter, private :: chunk_denom = 1.e5 type send_stager_t integer :: nUpdateHost, nSendComp @@ -24,13 +26,8 @@ module bndry_openacc_mod integer :: beg(maxchunks), end(maxchunks), len(maxchunks), asyncid(maxchunks), tag(maxchunks), req(maxchunks) end type recv_stager_t - type(send_stager_t) :: stg_send(maxCycles) - type(recv_stager_t) :: stg_recv(maxCycles) - - public :: bndry_exchangeS_simple_overlap - public :: bndry_exchangeV_simple_overlap - public :: bndry_exchangeV_timing - public :: bndry_exchangeV_finer_overlap + type(send_stager_t), private :: stg_send(maxCycles) + type(recv_stager_t), private :: stg_recv(maxCycles) contains @@ -634,6 +631,5 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch if (stg_recv(myid)%nUpdateDev == nchunks) finished = .true. end function mpi_irecv_openacc_stage -#endif -end module bndry_openacc_mod +end module bndry_mod diff --git a/components/homme/src/preqx_acc/config.h.cmake.in b/components/homme/src/preqx_acc/config.h.cmake.in new file mode 100644 index 00000000000..8c2c17cb05f --- /dev/null +++ b/components/homme/src/preqx_acc/config.h.cmake.in @@ -0,0 +1,61 @@ +/* PRIM Options */ + +/* Energy Diagnostics */ +#cmakedefine ENERGY_DIAGNOSTICS + +/* Using PRIMITIVE */ +#define _PRIM + +/* WK_GRAD */ +#define _WK_GRAD 1 + +/* _PRESTART */ +#define _PRESTART 1 + +/* Using MPI */ +#define _MPI 1 + +/* PIO with interpolation */ +#cmakedefine PIO_INTERP + +/* Number of cells NC*NC in each element (Default value NC=4) */ +#define NC @NUM_CELLS@ + +/* Number of POINTS in the element (Default value is 4) */ +#define NP @NUM_POINTS@ + +/* Number of POINTS in the element (Default value is 1) */ +#define PLEV @NUM_PLEV@ + +/* Number of tracers (Default value 4 if not specified) */ +#cmakedefine QSIZE_D @QSIZE_D@ + +/* Basic PIO */ +#cmakedefine PIO + +/* Whether to use CUDA Fortran */ +#cmakedefine01 USE_CUDA_FORTRAN + +/* Whether to use OPENACC */ +#cmakedefine01 USE_OPENACC + +/* Quadruple-precision */ +#cmakedefine01 HOMME_QUAD_PREC + +/* Horizontal OpenMP (over elements) */ +#cmakedefine HORIZ_OPENMP + +/* Vertical OpenMP (within elements) */ +#cmakedefine COLUMN_OPENMP + +/* Nested OpenMP (both horizontal and vertical) */ +#cmakedefine NESTED_OPENMP + +/* Zoltan partitioning library */ +#cmakedefine01 HAVE_ZOLTAN + +/* Enable persistent MPI comm */ +#cmakedefine MPI_PERSISTENT + +/* Using Accelerator */ +#cmakedefine IS_ACCELERATOR diff --git a/components/homme/src/share/openacc/derivative_openacc_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90 similarity index 84% rename from components/homme/src/share/openacc/derivative_openacc_mod.F90 rename to components/homme/src/preqx_acc/derivative_mod.F90 index 4676d05a053..3833b48bfc8 100644 --- a/components/homme/src/share/openacc/derivative_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/derivative_mod.F90 @@ -3,22 +3,25 @@ #include "config.h" #endif -module derivative_openacc_mod -#if USE_OPENACC - use kinds, only: real_kind - use dimensions_mod, only: np, nelemd +module derivative_mod + use derivative_mod_base + use derivative_mod_base, only: laplace_sphere_wk_base => laplace_sphere_wk, & + divergence_sphere_wk_base => divergence_sphere_wk, & + gradient_sphere_base => gradient_sphere, & + divergence_sphere_base => divergence_sphere + use kinds, only : real_kind, longdouble_kind + use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev + use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi + use parallel_mod, only : abortmp + ! needed for spherical differential operators: + use physical_constants, only : rrearth + use element_mod, only : element_t + use control_mod, only : hypervis_scaling, hypervis_power implicit none - private - - public :: laplace_sphere_wk - public :: divergence_sphere_wk - public :: gradient_sphere - public :: divergence_sphere contains - subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl) - use derivative_mod, only: derivative_t + subroutine laplace_sphere_wk_openacc(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl) use element_mod, only: element_t use control_mod, only: hypervis_scaling, hypervis_power implicit none @@ -35,7 +38,7 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n integer :: i,j,k,ie ! Local real(kind=real_kind) :: oldgrads(2) - call gradient_sphere(s,deriv,elem(:),grads,len,nets,nete,ntl,tl) + call gradient_sphere_openacc(s,deriv,elem(:),grads,len,nets,nete,ntl,tl) !$acc parallel loop gang vector collapse(4) present(grads,elem(:)) private(oldgrads) do ie = nets , nete do k = 1 , len @@ -58,12 +61,11 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n enddo ! note: divergnece_sphere and divergence_sphere_wk are identical *after* bndry_exchange ! if input is C_0. Here input is not C_0, so we should use divergence_sphere_wk(). - call divergence_sphere_wk(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl) - end subroutine laplace_sphere_wk + call divergence_sphere_wk_openacc(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl) + end subroutine laplace_sphere_wk_openacc - subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl) + subroutine divergence_sphere_wk_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl) use element_mod, only: element_t - use derivative_mod, only: derivative_t use physical_constants, only: rrearth implicit none ! input: v = velocity in lat-lon coordinates @@ -117,11 +119,10 @@ subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl) enddo enddo enddo - end subroutine divergence_sphere_wk + end subroutine divergence_sphere_wk_openacc - subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl) + subroutine gradient_sphere_openacc(s,deriv,elem,ds,len,nets,nete,ntl,tl) use element_mod, only: element_t - use derivative_mod, only: derivative_t use physical_constants, only: rrearth implicit none ! input s: scalar @@ -171,13 +172,12 @@ subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl) enddo enddo enddo - end subroutine gradient_sphere + end subroutine gradient_sphere_openacc - subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl) + subroutine divergence_sphere_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl) ! input: v = velocity in lat-lon coordinates ! ouput: div(v) spherical divergence of v use element_mod , only: element_t - use derivative_mod, only: derivative_t use physical_constants, only: rrearth implicit none real(kind=real_kind), intent(in ) :: v(np,np,2,len,ntl,nelemd) ! in lat-lon coordinates @@ -227,8 +227,7 @@ subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl) enddo enddo enddo - end subroutine divergence_sphere + end subroutine divergence_sphere_openacc -#endif -end module derivative_openacc_mod +end module derivative_mod diff --git a/components/homme/src/share/openacc/edge_openacc_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90 similarity index 92% rename from components/homme/src/share/openacc/edge_openacc_mod.F90 rename to components/homme/src/preqx_acc/edge_mod.F90 index 29178a9fda1..43d054b0ae7 100644 --- a/components/homme/src/share/openacc/edge_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/edge_mod.F90 @@ -3,24 +3,28 @@ #include "config.h" #endif -module edge_openacc_mod -#if USE_OPENACC - use kinds, only: real_kind, int_kind, log_kind - use dimensions_mod, only: max_neigh_edges,nelemd,np,max_corner_elem +module edge_mod + use edge_mod_base + use edge_mod_base, only: edgeSpack_base => edgeSpack, & + edgeSunpackMin_base => edgeSunpackMin, & + edgeSunpackMax_base => edgeSunpackMax + use kinds, only : int_kind, log_kind, real_kind + use dimensions_mod, only : max_neigh_edges, nelemd, np + use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL + use thread_mod, only: nthreadshoriz, omp_get_num_threads, omp_get_thread_num + use coordinate_systems_mod, only : cartesian3D_t + use schedtype_mod, only : cycle_t, schedule_t, schedule + use parallel_mod, only : abortmp, haltmp, MPIreal_t, iam,parallel_t, & + MAX_ACTIVE_MSG, HME_status_size, BNDRY_TAG_BASE + use edgetype_mod, only : edgedescriptor_t, edgebuffer_t, & + Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid + use element_mod, only : element_t implicit none - private - public :: edgeSpack - public :: edgeSunpackMin - public :: edgeSunpackMax - public :: edgeVpack - public :: edgeVunpack - public :: edgeVunpackMin - public :: edgeVunpackMax contains - subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeSpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : max_corner_elem use control_mod , only : north, south, east, west, neast, nwest, seast, swest use perf_mod , only : t_startf, t_stopf @@ -66,9 +70,9 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_s_pack') - end subroutine edgeSpack + end subroutine edgeSpack_openacc - subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeSunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -125,9 +129,9 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_s_unpack_min') - end subroutine edgeSunpackMin + end subroutine edgeSunpackMin_openacc - subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeSunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -184,9 +188,9 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_s_unpack_max') - end subroutine edgeSunpackMax + end subroutine edgeSunpackMax_openacc - subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : max_corner_elem use control_mod , only : north, south, east, west, neast, nwest, seast, swest use perf_mod , only : t_startf, t_stopf @@ -247,9 +251,9 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_pack') - end subroutine edgeVpack + end subroutine edgeVpack_openacc - subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVunpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -324,9 +328,9 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_unpack') - end subroutine edgeVunpack + end subroutine edgeVunpack_openacc - subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -401,9 +405,9 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_unpack_min') - end subroutine edgeVunpackMin + end subroutine edgeVunpackMin_openacc - subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -478,8 +482,7 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_unpack_max') - end subroutine edgeVunpackMax + end subroutine edgeVunpackMax_openacc -#endif -end module edge_openacc_mod +end module edge_mod diff --git a/components/homme/src/share/openacc/openacc_utils_mod.F90 b/components/homme/src/preqx_acc/openacc_utils_mod.F90 similarity index 77% rename from components/homme/src/share/openacc/openacc_utils_mod.F90 rename to components/homme/src/preqx_acc/openacc_utils_mod.F90 index 5b716e1849a..5e8542fb727 100644 --- a/components/homme/src/share/openacc/openacc_utils_mod.F90 +++ b/components/homme/src/preqx_acc/openacc_utils_mod.F90 @@ -4,7 +4,6 @@ #endif module openacc_utils_mod -#if USE_OPENACC use kinds, only: real_kind use dimensions_mod, only: nelemd implicit none @@ -16,7 +15,6 @@ module openacc_utils_mod public :: update_device_async public :: copy_ondev public :: copy_ondev_async - public :: arch_init2 public :: acc_async_test_wrap contains @@ -30,27 +28,6 @@ function acc_async_test_wrap( asyncid ) result(rslt) rslt = acc_async_test(asyncid) end function acc_async_test_wrap - subroutine arch_init2( elem , deriv ) - use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj - use derivative_mod, only: derivative_t - implicit none - type(element_t) , intent(in) :: elem(:) - type(derivative_t), intent(in) :: deriv - integer :: ie - !$omp barrier - !$omp master - - !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj) - !$acc enter data pcopyin(elem(1:nelemd),deriv) - do ie = 1 , nelemd - !$acc enter data pcopyin(elem(ie)%desc) - !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse) - enddo - - !$omp end master - !$omp barrier - end subroutine arch_init2 - subroutine copy_qdp_h2d( elem , tl ) use element_mod, only: element_t, state_qdp implicit none @@ -119,6 +96,5 @@ subroutine copy_ondev_async(dest,src,len,id) enddo end subroutine copy_ondev_async -#endif end module openacc_utils_mod diff --git a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 similarity index 89% rename from components/homme/src/share/openacc/prim_advection_openacc_mod.F90 rename to components/homme/src/preqx_acc/prim_advection_mod.F90 index ba1c7fa48ad..629b76e613b 100644 --- a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -6,34 +6,52 @@ #include "config.h" #endif -module prim_advection_openacc_mod -#if USE_OPENACC - use kinds , only: real_kind, int_kind, log_kind - use dimensions_mod , only: np,nlevp,nlev,qsize,qsize_d,max_corner_elem,max_neigh_edges,nelemd - use element_mod , only: timelevels - use edgetype_mod , only: EdgeBuffer_t - use derivative_mod , only: derivative_t +module prim_advection_mod + use prim_advection_mod_base + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_base => Prim_Advec_Tracers_remap, & + prim_advec_init1_base => prim_advec_init1, & + prim_advec_init2_base => prim_advec_init2, & + prim_advec_init_deriv_base => prim_advec_init_deriv, & + deriv_base => deriv, & + Prim_Advec_Tracers_remap_rk2_base => Prim_Advec_Tracers_remap_rk2 + use kinds, only : real_kind + use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd + use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp + use element_mod, only : element_t + use fvm_control_volume_mod, only : fvm_struct + use spelt_mod, only : spelt_struct + use filter_mod, only : filter_t, filter_P + use hybvcoord_mod, only : hvcoord_t + use time_mod, only : TimeLevel_t, smooth, TimeLevel_Qdp + use prim_si_mod, only : preq_pressure + use diffusion_mod, only : scalar_diffusion, diffusion_init + use control_mod, only : integration, test_case, filter_freq_advection, hypervis_order, & + statefreq, moisture, TRACERADV_TOTAL_DIVERGENCE, TRACERADV_UGRADQ, & + nu_q, nu_p, limiter_option, hypervis_subcycle_q, rsplit + use edge_mod, only : edgevpack, edgerotate, edgevunpack, initedgebuffer, initedgesbuffer, & + edgevunpackmin, initghostbuffer3D + + use edgetype_mod, only : EdgeDescriptor_t, EdgeBuffer_t, ghostbuffer3D_t + use hybrid_mod, only : hybrid_t + use bndry_mod, only : bndry_exchangev + use perf_mod, only : t_startf, t_stopf, t_barrierf ! _EXTERNAL + use parallel_mod, only : abortmp + use derivative_mod, only: derivative_t implicit none - private - type (derivative_t), public, allocatable :: deriv(:) ! derivative struct (nthreads) - real(kind=real_kind), allocatable :: qmin(:,:,:), qmax(:,:,:) - real(kind=real_kind), allocatable :: dp0(:) - real(kind=real_kind), allocatable :: Qtens_biharmonic(:,:,:,:,:) - real(kind=real_kind), allocatable :: Qtens(:,:,:,:,:) - real(kind=real_kind), allocatable :: grads_tracer(:,:,:,:,:,:) - real(kind=real_kind), allocatable :: dp_star(:,:,:,:) - type (EdgeBuffer_t) :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax - integer,parameter :: DSSeta = 1 - integer,parameter :: DSSomega = 2 - integer,parameter :: DSSdiv_vdp_ave = 3 - integer,parameter :: DSSno_var = -1 - real(kind=real_kind), allocatable :: data_pack(:,:,:,:), data_pack2(:,:,:,:) - logical :: first_time = .true. - - public :: Prim_Advec_Tracers_remap - public :: prim_advec_init1 - public :: prim_advec_init2 - public :: prim_advec_init_deriv + type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads) + real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:) + real(kind=real_kind), private, allocatable :: dp0(:) + real(kind=real_kind), private, allocatable :: Qtens_biharmonic(:,:,:,:,:) + real(kind=real_kind), private, allocatable :: Qtens(:,:,:,:,:) + real(kind=real_kind), private, allocatable :: grads_tracer(:,:,:,:,:,:) + real(kind=real_kind), private, allocatable :: dp_star(:,:,:,:) + type (EdgeBuffer_t), private :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax + integer,parameter, private :: DSSeta = 1 + integer,parameter, private :: DSSomega = 2 + integer,parameter, private :: DSSdiv_vdp_ave = 3 + integer,parameter, private :: DSSno_var = -1 + real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:) + logical, private :: first_time = .true. contains @@ -112,7 +130,7 @@ subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , flt , hybrid , dt use filter_mod , only: filter_t use time_mod , only: TimeLevel_t, TimeLevel_Qdp use control_mod , only: limiter_option, nu_p, qsplit - use bndry_openacc_mod, only: bndry_exchangeV_timing + use bndry_mod, only: bndry_exchangeV_timing implicit none type (element_t) , intent(inout) :: elem(:) type (derivative_t) , intent(in ) :: deriv @@ -325,9 +343,9 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , use perf_mod , only: t_startf, t_stopf ! _EXTERNAL use hybvcoord_mod , only: hvcoord_t use control_mod , only: nu_q, hypervis_order, hypervis_subcycle_q, nu_p - use viscosity_openacc_mod, only: biharmonic_wk_scalar - use edge_openacc_mod , only: edgeVpack, edgeVunpack - use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap + use viscosity_mod, only: biharmonic_wk_scalar_openacc + use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc + use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap implicit none type (EdgeBuffer_t) , intent(inout) :: edgeAdv_dontuse type (element_t) , intent(inout), target :: elem(:) @@ -388,7 +406,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , !$omp end master !$omp barrier ! compute biharmonic operator. Qtens = input and output - call biharmonic_wk_scalar( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) + call biharmonic_wk_scalar_openacc( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) !$omp barrier !$omp master !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:),qtens) @@ -407,7 +425,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , enddo call limiter2d_zero(state_Qdp,2,nt_qdp) call t_startf('ah_scalar_PEU') - call edgeVpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) !$omp end master !$omp barrier @@ -417,7 +435,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , !$omp barrier !$omp master - call edgeVunpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVunpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) call t_stopf('ah_scalar_PEU') !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:)) do ie = 1 , nelemd @@ -441,7 +459,6 @@ end subroutine advance_hypervis_scalar subroutine qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , limiter_option , nu_p , nets , nete ) use element_mod, only: element_t, state_qdp - use control_mod, only: limiter_option implicit none type(element_t) , intent(inout) :: elem(:) integer , intent(in ) :: rkstage , n0_qdp , np1_qdp , nets , nete , limiter_option @@ -488,10 +505,10 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , use control_mod , only: limiter_option, nu_p, nu_q use perf_mod , only: t_startf, t_stopf use element_mod , only: derived_divdp_proj, state_qdp, derived_vn0, derived_divdp - use derivative_openacc_mod, only: divergence_sphere - use viscosity_openacc_mod , only: biharmonic_wk_scalar, neighbor_minmax - use edge_openacc_mod , only: edgeVpack, edgeVunpack - use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap + use derivative_mod, only: divergence_sphere_openacc + use viscosity_mod , only: biharmonic_wk_scalar_openacc, neighbor_minmax_openacc + use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc + use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap implicit none integer , intent(in ) :: np1_qdp, n0_qdp real (kind=real_kind), intent(in ) :: dt @@ -606,7 +623,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , enddo !$omp end master !$omp barrier - if ( rhs_multiplier == 0 ) call neighbor_minmax(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax) + if ( rhs_multiplier == 0 ) call neighbor_minmax_openacc(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax) ! compute biharmonic mixing term if ( rhs_multiplier == 2 ) then rhs_viss = 3 @@ -633,8 +650,8 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , !$omp end master !$omp barrier endif - call biharmonic_wk_scalar( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) - call neighbor_minmax( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax ) + call biharmonic_wk_scalar_openacc( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) + call neighbor_minmax_openacc( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax ) !$omp barrier !$omp master !$acc parallel loop gang vector collapse(4) present(qtens_biharmonic,dp0,elem(:)) @@ -716,7 +733,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , enddo enddo enddo - call divergence_sphere( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 ) + call divergence_sphere_openacc( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 ) !$acc parallel loop gang vector collapse(5) present(qtens,state_qdp,qtens_biharmonic) do ie = 1 , nelemd ! advance Qdp @@ -761,7 +778,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , ! note: eta_dot_dpdn is actually dimension nlev+1, but nlev+1 data is ! all zero so we only have to DSS 1:nlev call t_startf('eus_PEU') - call edgeVpack(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) !$omp end master !$omp barrier @@ -771,7 +788,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , !$omp barrier !$omp master - call edgeVunpack( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) call t_stopf('eus_PEU') !$acc parallel loop gang vector collapse(4) present(state_Qdp,elem(:)) do ie = 1 , nelemd @@ -939,7 +956,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp ) use edge_mod , only: edgeVpack, edgeVunpack use bndry_mod , only: bndry_exchangeV use control_mod , only: limiter_option - use derivative_openacc_mod, only: divergence_sphere + use derivative_mod, only: divergence_sphere_openacc use openacc_utils_mod , only: copy_ondev use perf_mod , only: t_startf, t_stopf implicit none @@ -954,7 +971,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp ) !$omp barrier !$omp master !$acc update device(derived_vn0) - call divergence_sphere(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1) + call divergence_sphere_openacc(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1) call copy_ondev(derived_divdp_proj,derived_divdp,product(shape(derived_divdp))) !$acc update host(derived_divdp,derived_divdp_proj) !$omp end master @@ -988,7 +1005,6 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp ) call t_stopf('derived PEU') end subroutine precompute_divdp -#endif -end module prim_advection_openacc_mod +end module prim_advection_mod diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90 new file mode 100644 index 00000000000..67ad782bacf --- /dev/null +++ b/components/homme/src/preqx_acc/solver_init_mod.F90 @@ -0,0 +1,39 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod + use solver_init_mod_base + use solver_init_mod_base, only: solver_init2_base => solver_init2 + use dimensions_mod, only: nelemd + implicit none + + public :: solver_init2 + + +contains + + + subroutine solver_init2( elem , deriv ) + use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj + use derivative_mod, only: derivative_t + implicit none + type(element_t) , intent(in) :: elem(:) + type(derivative_t), intent(in) :: deriv + integer :: ie + !$omp barrier + !$omp master + + !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj) + !$acc enter data pcopyin(elem(1:nelemd),deriv) + do ie = 1 , nelemd + !$acc enter data pcopyin(elem(ie)%desc) + !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse) + enddo + + !$omp end master + !$omp barrier + end subroutine solver_init2 + + +end module solver_init_mod diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90 new file mode 100644 index 00000000000..b9e537a6dcf --- /dev/null +++ b/components/homme/src/preqx_acc/vertremap_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module vertremap_mod + use vertremap_mod_base + implicit none +end module vertremap_mod diff --git a/components/homme/src/share/openacc/viscosity_openacc_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90 similarity index 59% rename from components/homme/src/share/openacc/viscosity_openacc_mod.F90 rename to components/homme/src/preqx_acc/viscosity_mod.F90 index bf3cda132b5..2735430e907 100644 --- a/components/homme/src/share/openacc/viscosity_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/viscosity_mod.F90 @@ -3,28 +3,35 @@ #include "config.h" #endif -module viscosity_openacc_mod -#if USE_OPENACC - use kinds, only: real_kind - use dimensions_mod, only: np,nlev,qsize,nelemd +module viscosity_mod + use viscosity_mod_base +! use viscosity_mod_base, only: neighbor_minmax_base => neighbor_minmax, & +! biharmonic_wk_scalar_base => biharmonic_wk_scalar + use thread_mod, only : omp_get_num_threads + use kinds, only : real_kind, iulog + use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac + use hybrid_mod, only : hybrid_t, hybrid_create + use parallel_mod, only : parallel_t + use element_mod, only : element_t + use edgetype_mod, only : EdgeBuffer_t, EdgeDescriptor_t + use bndry_mod, only : bndry_exchangev, bndry_exchangeS, bndry_exchangeS_start,bndry_exchangeS_finish + use control_mod, only : hypervis_scaling, nu, nu_div + use perf_mod, only: t_startf, t_stopf implicit none - private - public :: neighbor_minmax - public :: biharmonic_wk_scalar contains - subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) + subroutine biharmonic_wk_scalar_openacc(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) use hybrid_mod , only: hybrid_t use element_mod , only: element_t use edgetype_mod , only: edgeBuffer_t use derivative_mod , only: derivative_t use control_mod , only: hypervis_scaling use perf_mod , only: t_startf, t_stopf - use derivative_openacc_mod, only: laplace_sphere_wk - use edge_openacc_mod , only: edgeVpack, edgeVunpack - use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap + use derivative_mod, only: laplace_sphere_wk_openacc + use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc + use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap implicit none !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! compute weak biharmonic operator @@ -47,9 +54,9 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) if(hypervis_scaling > 0) var_coef1 = .false. !$omp barrier !$omp master - call laplace_sphere_wk(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1) + call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1) call t_startf('biwksc_PEU') - call edgeVpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) + call edgeVpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) !$omp end master !$omp barrier @@ -59,7 +66,7 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) !$omp barrier !$omp master - call edgeVunpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) + call edgeVunpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) call t_stopf('biwksc_PEU') !$acc parallel loop gang vector collapse(5) present(qtens,elem(:)) do ie = nets , nete @@ -74,18 +81,18 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) enddo enddo enddo - call laplace_sphere_wk(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1) + call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1) !$omp end master !$omp barrier - end subroutine biharmonic_wk_scalar + end subroutine biharmonic_wk_scalar_openacc - subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) + subroutine neighbor_minmax_openacc(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) use hybrid_mod , only: hybrid_t use element_mod , only: element_t use perf_mod , only: t_startf, t_stopf use edgetype_mod , only: edgeBuffer_t - use edge_openacc_mod , only: edgeSpack, edgeSunpackMin, edgeSunpackMax - use bndry_openacc_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap + use edge_mod , only: edgeSpack_openacc, edgeSunpackMin_openacc, edgeSunpackMax_openacc + use bndry_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap implicit none ! compute Q min&max over the element and all its neighbors integer :: nets,nete @@ -100,8 +107,8 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) !$omp barrier !$omp master call t_startf('nmm_PEU') - call edgeSpack(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) - call edgeSpack(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) + call edgeSpack_openacc(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) + call edgeSpack_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) !$omp end master !$omp barrier @@ -111,13 +118,12 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) !$omp barrier !$omp master - call edgeSunpackMin(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) - call edgeSunpackMax(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) + call edgeSunpackMin_openacc(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) + call edgeSunpackMax_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) call t_stopf('nmm_PEU') !$omp end master !$omp barrier - end subroutine neighbor_minmax + end subroutine neighbor_minmax_openacc -#endif -end module viscosity_openacc_mod +end module viscosity_mod diff --git a/components/homme/src/share/bndry_mod.F90 b/components/homme/src/share/bndry_mod_base.F90 similarity index 99% rename from components/homme/src/share/bndry_mod.F90 rename to components/homme/src/share/bndry_mod_base.F90 index 3be3590cf5e..998c80d2952 100644 --- a/components/homme/src/share/bndry_mod.F90 +++ b/components/homme/src/share/bndry_mod_base.F90 @@ -3,7 +3,7 @@ #endif -module bndry_mod +module bndry_mod_base use parallel_mod, only : syncmp,parallel_t,abortmp,iam use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads @@ -1140,4 +1140,4 @@ subroutine sort_neighbor_buffer_mapping(par,elem,nets,nete) -end module bndry_mod +end module bndry_mod_base diff --git a/components/homme/src/share/derivative_mod.F90 b/components/homme/src/share/derivative_mod_base.F90 similarity index 99% rename from components/homme/src/share/derivative_mod.F90 rename to components/homme/src/share/derivative_mod_base.F90 index 7b2f1fce0a1..0c89625a739 100644 --- a/components/homme/src/share/derivative_mod.F90 +++ b/components/homme/src/share/derivative_mod_base.F90 @@ -3,7 +3,7 @@ #endif -module derivative_mod +module derivative_mod_base use kinds, only : real_kind, longdouble_kind use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi @@ -2810,4 +2810,4 @@ end subroutine limiter_optim_iter_full -end module derivative_mod +end module derivative_mod_base diff --git a/components/homme/src/share/edge_mod.F90 b/components/homme/src/share/edge_mod_base.F90 similarity index 99% rename from components/homme/src/share/edge_mod.F90 rename to components/homme/src/share/edge_mod_base.F90 index 2205a27d5eb..6cd64ca3f66 100644 --- a/components/homme/src/share/edge_mod.F90 +++ b/components/homme/src/share/edge_mod_base.F90 @@ -2,7 +2,7 @@ #include "config.h" #endif -module edge_mod +module edge_mod_base use kinds, only : int_kind, log_kind, real_kind use dimensions_mod, only : max_neigh_edges, nelemd @@ -4792,7 +4792,7 @@ subroutine ghostVunpack3d(g, v, vlyr, kptr, desc, sw, se, nw, ne, mult) end subroutine ghostVunpack3d -End module edge_mod +End module edge_mod_base #if 0 #ifndef HAVE_F2003_PTR_BND_REMAP diff --git a/components/homme/src/share/prim_advection_mod.F90 b/components/homme/src/share/prim_advection_mod_base.F90 similarity index 73% rename from components/homme/src/share/prim_advection_mod.F90 rename to components/homme/src/share/prim_advection_mod_base.F90 index 11369232d72..81620cd9784 100644 --- a/components/homme/src/share/prim_advection_mod.F90 +++ b/components/homme/src/share/prim_advection_mod_base.F90 @@ -4,827 +4,39 @@ #define NEWEULER_B4B 1 #define OVERLAP 1 - module EXTRAE_MODULE - - interface - - subroutine extrae_user_function (enter) - integer*4, intent(in) :: enter - end subroutine extrae_user_function - - end interface - - end module EXTRAE_MODULE - -#if 0 -SUBROUTINES: - prim_advec_tracers_remap_rk2() - SEM 2D RK2 + monotone remap + hyper viscosity - SEM 2D RK2 can use sign-preserving or monotone reconstruction - -Notes on Lagrange+REMAP advection -dynamics will compute mean fluxes, so that (i.e. for qsplit=3) - - dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3) + 3dt D(dpdiss_sum/3) - -Where the floating lagrangian component: - dp_star(t+3) = dp(t) -3dt div(Udp_sum/3) + 3dt D(dpdiss_sum/3) -OR: - dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) ) - - -For RK2 advection of Q: (example of 2 stage RK for tracers): dtq = qsplit*dt -For consistency, if Q=1 - dp1 = dp(t)- dtq div[ U1 dp(t)] - dp2 = dp1 - dtq div[ U2 dp1 ] + 2*dtq D( dpdiss_ave ) - dp* = (dp(t) + dp2 )/2 - = dp(t) - dtq div[ U1 dp(t) + U2 dp1 ]/2 + dtq D( dpdiss_ave ) - -so we require: - U1 = Udp_ave / dp(t) - U2 = Udp_ave / dp1 - -For tracer advection: - Qdp1 = Qdp(t)- dtq div[ U1 Qdp(t)] - Qdp2 = Qdp1 - dtq div[ U2 Qdp1 ] + 2*dtq D( Q dpdiss_ave ) - Qdp* = (Qdp(t) + Qdp2 )/2 - = Qdp(t) - dtq div[ U1 Qdp(t) + U2 Qdp1 ] + dtq D( Q dpdiss_ave ) - -Qdp1: limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS) with dp1 as computed above -Qdp2: limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS) with dp2 as computed above - -For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS - - -last step: - remap Qdp* to Qdp(t+1) [ dp_star(t+1) -> dp(t+1) ] - -#endif - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -!! Begin GPU remap module !! -!! by Rick Archibald, 2010 !! -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -module vertremap_mod - - !************************************************************************************** - ! - ! Purpose: - ! Construct sub-grid-scale polynomials using piecewise spline method with - ! monotone filters. - ! - ! References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR) - ! PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF) - ! - !************************************************************************************** - - use kinds, only : real_kind,int_kind - use dimensions_mod, only : np,nlev,qsize,nlevp,npsq,ntrac,nc - use hybvcoord_mod, only : hvcoord_t - use element_mod, only : element_t - use fvm_control_volume_mod, only : fvm_struct - use spelt_mod, only : spelt_struct - use perf_mod, only : t_startf, t_stopf ! _EXTERNAL - use parallel_mod, only : abortmp, parallel_t - use control_mod, only : vert_remap_q_alg - - public remap1 ! remap any field, splines, monotone - public remap1_nofilter ! remap any field, splines, no filter -! todo: tweak interface to match remap1 above, rename remap1_ppm: - public remap_q_ppm ! remap state%Q, PPM, monotone - - contains - -!=======================================================================================================! - -!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid -!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task. -subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref ) - implicit none - type(hvcoord_t) , intent(in ) :: hvcoord !Derived type to hold vertical sigma grid parameters - real(kind=real_kind) , intent(in ) :: ps !Surface pressure for this column - real(kind=real_kind) , intent(in ) :: dt !Time step - real(kind=real_kind) , intent(in ) :: eta_dot_dpdn(nlev+1) !Looks like a vertical pressure flux - !to compute deformed grid spacing - real(kind=real_kind) , intent( out) :: p_lag(nlev+1) !Pressures at interfaces of the Lagrangian deformed grid - real(kind=real_kind) , intent( out) :: p_ref(nlev+1) !Pressures at interfaces of the reference grid - real(kind=real_kind) , intent( out) :: dp_lag(nlev) !Pressure differences on Lagrangian deformed grid - real(kind=real_kind) , intent( out) :: dp_ref(nlev) !Pressure differences on reference grid - integer :: k !Iterator - p_ref(1) = 0 !Both grids have a model top pressure of zero - p_lag(1) = 0 !Both grids have a model top pressure of zero - do k = 1 , nlev - dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + & - ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps !Reference pressure difference - ! Lagrangian pressure difference (flux in - flux out over the time step) - dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) ) - p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell - p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell - enddo -end subroutine remap_calc_grids - -!=======================================================================================================! - - - -subroutine remap1(Qdp,nx,qsize,dp1,dp2) - ! remap 1 field - ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) - ! dp1 layer thickness (source) - ! dp2 layer thickness (target) - ! - ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp - ! - implicit none - integer, intent(in) :: nx,qsize - real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) - real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) - ! ======================== - ! Local Variables - ! ======================== - - real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv - real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 - real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & - peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & - zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 - integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & - lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q - logical :: abort=.false. - - if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then - call remap_Q_ppm(qdp,nx,qsize,dp1,dp2) - return - endif - - call t_startf('remap_Q_noppm') -#if (defined COLUMN_OPENMP) -!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & -!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & -!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & -!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) -#endif - do q=1,qsize - do i=1,nx - do j=1,nx - - z1c(1)=0 ! source grid - z2c(1)=0 ! target grid - do k=1,nlev - z1c(k+1)=z1c(k)+dp1(i,j,k) - z2c(k+1)=z2c(k)+dp2(i,j,k) - enddo - - zv(1)=0 - do k=1,nlev - Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass - zv(k+1) = zv(k)+Qcol(k) - enddo - - if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then - write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' - write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' - write(6,*) 'DATA FOR MODEL LEVELS' - write(6,*) 'PLEVMODEL=',z2c(nlev+1) - write(6,*) 'PLEV =',z1c(nlev+1) - write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) - abort=.true. - endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! quadratic splies with UK met office monotonicity constraints !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zkr = 99 - ilev = 2 - zkr(1) = 1 - zkr(nlev+1) = nlev - kloop: do k = 2,nlev - do jk = ilev,nlev+1 - if (z1c(jk).ge.z2c(k)) then - ilev = jk - zkr(k) = jk-1 - cycle kloop - endif - enddo - enddo kloop - - zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) - zgam(1) = 0.0 - zgam(nlev+1) = 1.0 - zhdp = z1c(2:nlev+1)-z1c(1:nlev) - - - h = 1/zhdp - zarg = Qcol * h - rhs = 0 - lower_diag = 0 - diag = 0 - upper_diag = 0 - - rhs(1)=3*zarg(1) - rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) - rhs(nlev+1)=3*zarg(nlev) - - lower_diag(1)=1 - lower_diag(2:nlev) = h(1:nlev-1) - lower_diag(nlev+1)=1 - - diag(1)=2 - diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) - diag(nlev+1)=2 - - upper_diag(1)=1 - upper_diag(2:nlev) = h(2:nlev) - upper_diag(nlev+1)=0 - - q_diag(1)=-upper_diag(1)/diag(1) - rhs(1)= rhs(1)/diag(1) - - do k=2,nlev+1 - tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) - q_diag(k) = -upper_diag(k)*tmp_cal - rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal - enddo - do k=nlev,1,-1 - rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) - enddo - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! monotonicity modifications !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - filter_code = 0 - dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1) - dy(nlev) = dy(nlev-1) - - dy = merge(zero, dy, abs(dy) < tiny ) - - do k=1,nlev - im1=MAX(1,k-1) - im2=MAX(1,k-2) - im3=MAX(1,k-3) - ip1=MIN(nlev,k+1) - t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0) - t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 & - .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 ) - t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k))) - - filter_code(k) = merge(0,1,t1+t2 > 0) - rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1)) - filter_code(im1) = MAX(filter_code(im1),filter_code(k)) - enddo - - rhs = merge(qmax,rhs,rhs > qmax) - rhs = merge(zero,rhs,rhs < zero) - - za0 = rhs(1:nlev) - za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg - za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg - - dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev) - dy = merge(zero, dy, abs(dy) < tiny ) - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! Compute the 3 quadratic spline coeffients {za0, za1, za2} !! - !! knowing the quadratic spline parameters {rho_left,rho_right,zarg} !! - !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002). !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - h = rhs(2:nlev+1) - - do k=1,nlev - xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny) - xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny) - f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2 - - t1 = merge(1,0,ABS(za2(k)) > tiny) - t2 = merge(1,0,xm <= zero .OR. xm >= 1) - t3 = merge(1,0,za2(k) > zero) - t4 = merge(1,0,za2(k) < zero) - tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2) - tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3) - - peaks=0 - peaks = merge(-1,peaks,tm .EQ. 1) - peaks = merge(+1,peaks,tp .EQ. 1) - peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1) - peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1) - - im1=MAX(1,k-1) - im2=MAX(1,k-2) - ip1=MIN(nlev,k+1) - ip2=MIN(nlev,k+2) - - t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. & - (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. & - (dy(im1)*float(peaks) <= tiny)) - - filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. & - (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny)) - - if (filter_code(k) > 0) then - level1 = rhs(k) - level2 = (2*rhs(k)+h(k))/3 - level3 = 0.5*(rhs(k)+h(k)) - level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k) - level5 = h(k) - - t1 = merge(1,0,h(k) >= rhs(k)) - t2 = merge(1,0,zarg(k) <= level1 .OR. zarg(k) >= level5) - t3 = merge(1,0,zarg(k) > level1 .AND. zarg(k) < level2) - t4 = merge(1,0,zarg(k) > level4 .AND. zarg(k) < level5) - - lt1 = t1*t2 - lt2 = t1*(1-t2+t3) - lt3 = t1*(1-t2+1-t3+t4) - - za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) - za1(k) = merge(zero,za1(k),lt1 .EQ. 1) - za2(k) = merge(zero,za2(k),lt1 .EQ. 1) - - za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) - za1(k) = merge(zero,za1(k),lt2 .EQ. 2) - za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) - - za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) - za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) - za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) - - t2 = merge(1,0,zarg(k) >= level1 .OR. zarg(k) <= level5) - t3 = merge(1,0,zarg(k) < level1 .AND. zarg(k) > level2) - t4 = merge(1,0,zarg(k) < level4 .AND. zarg(k) > level5) - - lt1 = (1-t1)*t2 - lt2 = (1-t1)*(1-t2+t3) - lt3 = (1-t1)*(1-t2+1-t3+t4) - - za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) - za1(k) = merge(zero,za1(k),lt1 .EQ. 1) - za2(k) = merge(zero,za2(k),lt1 .EQ. 1) - - za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) - za1(k) = merge(zero,za1(k),lt2 .EQ. 2) - za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) - - za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) - za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) - za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) - endif - enddo - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! start iteration from top to bottom of atmosphere !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zv1 = 0 - do k=1,nlev - if (zgam(k+1)>1d0) then - WRITE(*,*) 'r not in [0:1]', zgam(k+1) - abort=.true. - endif - zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & - (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) - Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio - zv1 = zv2 - enddo - enddo - enddo - enddo ! q loop - if (abort) call abortmp('Bad levels in remap1. usually CFL violatioin') - call t_stopf('remap_Q_noppm') - -end subroutine remap1 - -subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2) - ! remap 1 field - ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) - ! dp1 layer thickness (source) - ! dp2 layer thickness (target) - ! - ! output: remaped Qdp, conserving mass - ! - implicit none - integer, intent(in) :: nx,qsize - real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) - real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) - ! ======================== - ! Local Variables - ! ======================== - - real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv - real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 - real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & - peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & - zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 - integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & - lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q - logical :: abort=.false. -! call t_startf('remap1_nofilter') - -#if (defined COLUMN_OPENMP) -!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & -!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & -!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & -!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) -#endif - do q=1,qsize - do i=1,nx - do j=1,nx - - z1c(1)=0 ! source grid - z2c(1)=0 ! target grid - do k=1,nlev - z1c(k+1)=z1c(k)+dp1(i,j,k) - z2c(k+1)=z2c(k)+dp2(i,j,k) - enddo - - zv(1)=0 - do k=1,nlev - Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass - zv(k+1) = zv(k)+Qcol(k) - enddo - - if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then - write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' - write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' - write(6,*) 'DATA FOR MODEL LEVELS' - write(6,*) 'PLEVMODEL=',z2c(nlev+1) - write(6,*) 'PLEV =',z1c(nlev+1) - write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) - abort=.true. - endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! quadratic splies with UK met office monotonicity constraints !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zkr = 99 - ilev = 2 - zkr(1) = 1 - zkr(nlev+1) = nlev - kloop: do k = 2,nlev - do jk = ilev,nlev+1 - if (z1c(jk).ge.z2c(k)) then - ilev = jk - zkr(k) = jk-1 - cycle kloop - endif - enddo - enddo kloop - - zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) - zgam(1) = 0.0 - zgam(nlev+1) = 1.0 - zhdp = z1c(2:nlev+1)-z1c(1:nlev) - - - h = 1/zhdp - zarg = Qcol * h - rhs = 0 - lower_diag = 0 - diag = 0 - upper_diag = 0 - - rhs(1)=3*zarg(1) - rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) - rhs(nlev+1)=3*zarg(nlev) - - lower_diag(1)=1 - lower_diag(2:nlev) = h(1:nlev-1) - lower_diag(nlev+1)=1 - - diag(1)=2 - diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) - diag(nlev+1)=2 - - upper_diag(1)=1 - upper_diag(2:nlev) = h(2:nlev) - upper_diag(nlev+1)=0 - - q_diag(1)=-upper_diag(1)/diag(1) - rhs(1)= rhs(1)/diag(1) - - do k=2,nlev+1 - tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) - q_diag(k) = -upper_diag(k)*tmp_cal - rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal - enddo - do k=nlev,1,-1 - rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) - enddo - - za0 = rhs(1:nlev) - za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg - za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg - - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! start iteration from top to bottom of atmosphere !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zv1 = 0 - do k=1,nlev - if (zgam(k+1)>1d0) then - WRITE(*,*) 'r not in [0:1]', zgam(k+1) - abort=.true. - endif - zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & - (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) - Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio - zv1 = zv2 - enddo - enddo - enddo - enddo ! q loop - if (abort) call abortmp('Bad levels in remap1_nofilter. usually CFL violatioin') -! call t_stopf('remap1_nofilter') -end subroutine remap1_nofilter - -!=======================================================================================================! - - -!This uses the exact same model and reference grids and data as remap_Q, but it interpolates -!using PPM instead of splines. -subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2) - ! remap 1 field - ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) - ! dp1 layer thickness (source) - ! dp2 layer thickness (target) - ! - ! output: remaped Qdp, conserving mass - ! - use control_mod, only : vert_remap_q_alg - implicit none - integer,intent(in) :: nx,qsize - real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) - real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) - ! Local Variables - integer, parameter :: gs = 2 !Number of cells to place in the ghost region - real(kind=real_kind), dimension( nlev+2 ) :: pio !Pressure at interfaces for old grid - real(kind=real_kind), dimension( nlev+1 ) :: pin !Pressure at interfaces for new grid - real(kind=real_kind), dimension( nlev+1 ) :: masso !Accumulate mass up to each interface - real(kind=real_kind), dimension( 1-gs:nlev+gs) :: ao !Tracer value on old grid - real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpo !change in pressure over a cell for old grid - real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpn !change in pressure over a cell for old grid - real(kind=real_kind), dimension(3, nlev ) :: coefs !PPM coefficients within each cell - real(kind=real_kind), dimension( nlev ) :: z1, z2 - real(kind=real_kind) :: ppmdx(10,0:nlev+1) !grid spacings - real(kind=real_kind) :: mymass, massn1, massn2 - integer :: i, j, k, q, kk, kid(nlev) - - call t_startf('remap_Q_ppm') - do j = 1 , nx - do i = 1 , nx - - pin(1)=0 - pio(1)=0 - do k=1,nlev - dpn(k)=dp2(i,j,k) - dpo(k)=dp1(i,j,k) - pin(k+1)=pin(k)+dpn(k) - pio(k+1)=pio(k)+dpo(k) - enddo - - - - pio(nlev+2) = pio(nlev+1) + 1. !This is here to allow an entire block of k threads to run in the remapping phase. - !It makes sure there's an old interface value below the domain that is larger. - pin(nlev+1) = pio(nlev+1) !The total mass in a column does not change. - !Therefore, the pressure of that mass cannot either. - !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence. - do k = 1 , gs - dpo(1 -k) = dpo( k) - dpo(nlev+k) = dpo(nlev+1-k) - enddo - - !Compute remapping intervals once for all tracers. Find the old grid cell index in which the - !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new - !interface location. In practice, the grid never deforms past one cell, so the search can be - !simplified by this. Also, the interval of integration is usually of magnitude close to zero - !or close to dpo because of minimial deformation. - !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so - !I set them equal to each other. - do k = 1 , nlev - kk = k !Keep from an order n^2 search operation by assuming the old cell index is close. - !Find the index of the old grid cell in which this new cell's bottom interface resides. - do while ( pio(kk) <= pin(k+1) ) - kk = kk + 1 - enddo - kk = kk - 1 !kk is now the cell index we're integrating over. - if (kk == nlev+1) kk = nlev !This is to keep the indices in bounds. - !Top bounds match anyway, so doesn't matter what coefficients are used - kid(k) = kk !Save for reuse - z1(k) = -0.5D0 !This remapping assumes we're starting from the left interface of an old grid cell - !In fact, we're usually integrating very little or almost all of the cell in question - z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk) !PPM interpolants are normalized to an independent - !coordinate domain [-0.5,0.5]. - enddo - - !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the - !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop. - ppmdx(:,:) = compute_ppm_grids( dpo ) - - !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and - !mass accumulation - do q = 1 , qsize - !Accumulate the old mass up to old grid cell interface locations to simplify integration - !during remapping. Also, divide out the grid spacing so we're working with actual tracer - !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure - !tracer consistency for an initially uniform field. I copied it from the old remap routine. - masso(1) = 0. - do k = 1 , nlev - ao(k) = Qdp(i,j,k,q) - masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping - ao(k) = ao(k) / dpo(k) !Divide out the old grid spacing because we want the tracer mixing ratio, not mass. - enddo - !Fill in ghost values. Ignored if vert_remap_q_alg == 2 - do k = 1 , gs - ao(1 -k) = ao( k) - ao(nlev+k) = ao(nlev+1-k) - enddo - !Compute monotonic and conservative PPM reconstruction over every cell - coefs(:,:) = compute_ppm( ao , ppmdx ) - !Compute tracer values on the new grid by integrating from the old cell bottom to the new - !cell interface to form a new grid mass accumulation. Taking the difference between - !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is - !supposed to hold the full mass this needs no normalization. - massn1 = 0. - do k = 1 , nlev - kk = kid(k) - massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk) - Qdp(i,j,k,q) = massn2 - massn1 - massn1 = massn2 - enddo - enddo - enddo - enddo - call t_stopf('remap_Q_ppm') -end subroutine remap_Q_ppm - - -!=======================================================================================================! - - -!THis compute grid-based coefficients from Collela & Woodward 1984. -function compute_ppm_grids( dx ) result(rslt) - use control_mod, only: vert_remap_q_alg - implicit none - real(kind=real_kind), intent(in) :: dx(-1:nlev+2) !grid spacings - real(kind=real_kind) :: rslt(10,0:nlev+1) !grid spacings - integer :: j - integer :: indB, indE - - !Calculate grid-based coefficients for stage 1 of compute_ppm - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-1 - else - indB = 0 - indE = nlev+1 - endif - do j = indB , indE - rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) ) - rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) ) - rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) ) - enddo - - !Caculate grid-based coefficients for stage 2 of compute_ppm - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-2 - else - indB = 0 - indE = nlev - endif - do j = indB , indE - rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) ) - rslt( 5,j) = 1. / sum( dx(j-1:j+2) ) - rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) ) - rslt( 7,j) = ( dx(j-1) + dx(j ) ) / ( 2. * dx(j ) + dx(j+1) ) - rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j ) ) - rslt( 9,j) = dx(j ) * ( dx(j-1) + dx(j ) ) / ( 2.*dx(j ) + dx(j+1) ) - rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / ( dx(j ) + 2.*dx(j+1) ) - enddo -end function compute_ppm_grids - -!=======================================================================================================! - - - -!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages -function compute_ppm( a , dx ) result(coefs) - use control_mod, only: vert_remap_q_alg - implicit none - real(kind=real_kind), intent(in) :: a ( -1:nlev+2) !Cell-mean values - real(kind=real_kind), intent(in) :: dx (10, 0:nlev+1) !grid spacings - real(kind=real_kind) :: coefs(0:2, nlev ) !PPM coefficients (for parabola) - real(kind=real_kind) :: ai (0:nlev ) !fourth-order accurate, then limited interface values - real(kind=real_kind) :: dma(0:nlev+1) !An expression from Collela's '84 publication - real(kind=real_kind) :: da !Ditto - ! Hold expressions based on the grid (which are cumbersome). - real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10 - real(kind=real_kind) :: al, ar !Left and right interface values for cell-local limiting - integer :: j - integer :: indB, indE - - ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-1 - else - indB = 0 - indE = nlev+1 - endif - do j = indB , indE - da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) ) - dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da) - if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0. - enddo - - ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1) - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-2 - else - indB = 0 - indE = nlev - endif - do j = indB , indE - ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) & - * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) ) - enddo - - ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain - ! (dimension nlev) using ai on either side and ao within the cell. - if (vert_remap_q_alg == 2) then - indB = 3 - indE = nlev-2 - else - indB = 1 - indE = nlev - endif - do j = indB , indE - al = ai(j-1) - ar = ai(j ) - if ( (ar - a(j)) * (a(j) - al) <= 0. ) then - al = a(j) - ar = a(j) - endif - if ( (ar - al) * (a(j) - (al + ar)/2.) > (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar - if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al - !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx - coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4. - coefs(1,j) = ar - al - coefs(2,j) = -6. * a(j) + 3. * ( al + ar ) - enddo - - !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom - !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth - !moment to the cell mean is sufficient to maintain conservation. - if (vert_remap_q_alg == 2) then - coefs(0,1:2) = a(1:2) - coefs(1:2,1:2) = 0. - coefs(0,nlev-1:nlev) = a(nlev-1:nlev) - coefs(1:2,nlev-1:nlev) = 0.D0 - endif -end function compute_ppm - -!=======================================================================================================! - - -!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx, -!given two bounds. Make sure this gets inlined during compilation. -function integrate_parabola( a , x1 , x2 ) result(mass) - implicit none - real(kind=real_kind), intent(in) :: a(0:2) !Coefficients of the parabola - real(kind=real_kind), intent(in) :: x1 !lower domain bound for integration - real(kind=real_kind), intent(in) :: x2 !upper domain bound for integration - real(kind=real_kind) :: mass - mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1 -end function integrate_parabola - - -!=============================================================================================! - - - -end module vertremap_mod - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -!! End GPU remap module !! -!! by Rick Archibald, 2010 !! -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - -!=======================================================================================================! - - +!SUBROUTINES: +! prim_advec_tracers_remap_rk2() +! SEM 2D RK2 + monotone remap + hyper viscosity +! SEM 2D RK2 can use sign-preserving or monotone reconstruction +! +!For RK2 advection of Q: (example of 2 stage RK for tracers): dtq = qsplit*dt +!For consistency, if Q=1 +! dp1 = dp(t)- dtq div[ U1 dp(t)] +! dp2 = dp1 - dtq div[ U2 dp1 ] + 2*dtq D( dpdiss_ave ) +! dp* = (dp(t) + dp2 )/2 +! = dp(t) - dtq div[ U1 dp(t) + U2 dp1 ]/2 + dtq D( dpdiss_ave ) +! +!so we require: +! U1 = Udp_ave / dp(t) +! U2 = Udp_ave / dp1 +! +!For tracer advection: +! Qdp1 = Qdp(t)- dtq div[ U1 Qdp(t)] +! Qdp2 = Qdp1 - dtq div[ U2 Qdp1 ] + 2*dtq D( Q dpdiss_ave ) +! Qdp* = (Qdp(t) + Qdp2 )/2 +! = Qdp(t) - dtq div[ U1 Qdp(t) + U2 Qdp1 ] + dtq D( Q dpdiss_ave ) +! +!Qdp1: limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS) with dp1 as computed above +!Qdp2: limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS) with dp2 as computed above +! +!For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS +! +! +!last step: +! remap Qdp* to Qdp(t+1) [ dp_star(t+1) -> dp(t+1) ] -module prim_advection_mod +module prim_advection_mod_base ! ! two formulations. both are conservative ! u grad Q formulation: @@ -3068,4 +2280,4 @@ subroutine vertical_remap(hybrid,elem,fvm,hvcoord,dt,np1,np1_qdp,np1_fvm,nets,ne call t_stopf('vertical_remap') end subroutine vertical_remap -end module prim_advection_mod +end module prim_advection_mod_base diff --git a/components/homme/src/share/prim_driver_mod.F90 b/components/homme/src/share/prim_driver_mod.F90 index 7313246b1b3..7b96be7414c 100644 --- a/components/homme/src/share/prim_driver_mod.F90 +++ b/components/homme/src/share/prim_driver_mod.F90 @@ -94,7 +94,7 @@ subroutine prim_init1(elem, fvm, par, dom_mt, Tl) ! -------------------------------- use schedule_mod, only : genEdgeSched, PrintSchedule ! -------------------------------- - use arch_switch_mod, only: prim_advec_init1 + use prim_advection_mod, only: prim_advec_init1 ! -------------------------------- use prim_advance_mod, only: prim_advance_init ! -------------------------------- @@ -609,7 +609,8 @@ subroutine prim_init2(elem, fvm, hybrid, nets, nete, tl, hvcoord) use derivative_mod, only : derivinit, interpolate_gll2fvm_points, interpolate_gll2spelt_points, v2pinit use global_norms_mod, only : test_global_integral, print_cfl use hybvcoord_mod, only : hvcoord_t - use arch_switch_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv, arch_init2 + use prim_advection_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv + use solver_init_mod, only: solver_init2 #ifdef CAM #else use column_model_mod, only : InitColumnModel @@ -1102,7 +1103,7 @@ end subroutine noxinit if (hybrid%masterthread) write(iulog,*) "initial state:" call prim_printstate(elem, tl, hybrid,hvcoord,nets,nete, fvm) - call arch_init2(elem(:), deriv(hybrid%ithr)) + call solver_init2(elem(:), deriv(hybrid%ithr)) call Prim_Advec_Init2(elem(:), hvcoord, hybrid) end subroutine prim_init2 @@ -1162,7 +1163,7 @@ subroutine prim_run(elem, hybrid,nets,nete, dt, tl, hvcoord, advance_name) use control_mod, only: statefreq, integration, ftype, qsplit, disable_diagnostics use prim_advance_mod, only : prim_advance_exp, prim_advance_si, preq_robert3 use prim_state_mod, only : prim_printstate, prim_diag_scalars, prim_energy_halftimes - use arch_switch_mod, only: deriv + use prim_advection_mod, only: deriv use parallel_mod, only : abortmp #ifndef CAM use column_model_mod, only : ApplyColumnModel @@ -1618,7 +1619,7 @@ subroutine prim_step(elem, fvm, hybrid,nets,nete, dt, tl, hvcoord, compute_diagn use fvm_bsp_mod, only : get_boomerang_velocities_gll, get_solidbody_velocities_gll use prim_advance_mod, only : prim_advance_exp, overwrite_SEdensity use prim_advection_mod, only : prim_advec_tracers_fvm - use arch_switch_mod, only : prim_advec_tracers_remap, deriv + use prim_advection_mod, only : prim_advec_tracers_remap, deriv use derivative_mod, only : subcell_integration #if defined(_SPELT) use prim_advection_mod, only : prim_advec_tracers_spelt @@ -2027,7 +2028,7 @@ subroutine smooth_topo_datasets(phis,sghdyn,sgh30dyn,elem,hybrid,nets,nete) use derivative_mod, only : derivative_t , laplace_sphere_wk use viscosity_mod, only : biharmonic_wk use prim_advance_mod, only : smooth_phis - use arch_switch_mod, only: deriv + use prim_advection_mod, only: deriv implicit none integer , intent(in) :: nets,nete diff --git a/components/homme/src/share/solver_init_mod_base.F90 b/components/homme/src/share/solver_init_mod_base.F90 new file mode 100644 index 00000000000..774d103a9c3 --- /dev/null +++ b/components/homme/src/share/solver_init_mod_base.F90 @@ -0,0 +1,25 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod_base + implicit none + private + + public :: solver_init2 + + +contains + + + subroutine solver_init2( elem , deriv ) + use element_mod, only: element_t + use derivative_mod, only: derivative_t + implicit none + type(element_t) , intent(in) :: elem(:) + type(derivative_t), intent(in) :: deriv + !do nothing + end subroutine solver_init2 + + +end module solver_init_mod_base diff --git a/components/homme/src/share/vertremap_mod_base.F90 b/components/homme/src/share/vertremap_mod_base.F90 new file mode 100644 index 00000000000..c915acf7350 --- /dev/null +++ b/components/homme/src/share/vertremap_mod_base.F90 @@ -0,0 +1,767 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +!SUBROUTINES: +! +!Notes on Lagrange+REMAP advection +!dynamics will compute mean fluxes, so that (i.e. for qsplit=3) +! +! dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3) + 3dt D(dpdiss_sum/3) +! +!Where the floating lagrangian component: +! dp_star(t+3) = dp(t) -3dt div(Udp_sum/3) + 3dt D(dpdiss_sum/3) +!OR: +! dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) ) + + +module vertremap_mod_base + + !************************************************************************************** + ! + ! Purpose: + ! Construct sub-grid-scale polynomials using piecewise spline method with + ! monotone filters. + ! + ! References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR) + ! PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF) + ! + !************************************************************************************** + + use kinds, only : real_kind,int_kind + use dimensions_mod, only : np,nlev,qsize,nlevp,npsq,ntrac,nc + use hybvcoord_mod, only : hvcoord_t + use element_mod, only : element_t + use fvm_control_volume_mod, only : fvm_struct + use spelt_mod, only : spelt_struct + use perf_mod, only : t_startf, t_stopf ! _EXTERNAL + use parallel_mod, only : abortmp, parallel_t + use control_mod, only : vert_remap_q_alg + + public remap1 ! remap any field, splines, monotone + public remap1_nofilter ! remap any field, splines, no filter +! todo: tweak interface to match remap1 above, rename remap1_ppm: + public remap_q_ppm ! remap state%Q, PPM, monotone + + contains + +!=======================================================================================================! + +!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid +!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task. +subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref ) + implicit none + type(hvcoord_t) , intent(in ) :: hvcoord !Derived type to hold vertical sigma grid parameters + real(kind=real_kind) , intent(in ) :: ps !Surface pressure for this column + real(kind=real_kind) , intent(in ) :: dt !Time step + real(kind=real_kind) , intent(in ) :: eta_dot_dpdn(nlev+1) !Looks like a vertical pressure flux + !to compute deformed grid spacing + real(kind=real_kind) , intent( out) :: p_lag(nlev+1) !Pressures at interfaces of the Lagrangian deformed grid + real(kind=real_kind) , intent( out) :: p_ref(nlev+1) !Pressures at interfaces of the reference grid + real(kind=real_kind) , intent( out) :: dp_lag(nlev) !Pressure differences on Lagrangian deformed grid + real(kind=real_kind) , intent( out) :: dp_ref(nlev) !Pressure differences on reference grid + integer :: k !Iterator + p_ref(1) = 0 !Both grids have a model top pressure of zero + p_lag(1) = 0 !Both grids have a model top pressure of zero + do k = 1 , nlev + dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + & + ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps !Reference pressure difference + ! Lagrangian pressure difference (flux in - flux out over the time step) + dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) ) + p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell + p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell + enddo +end subroutine remap_calc_grids + +!=======================================================================================================! + + + +subroutine remap1(Qdp,nx,qsize,dp1,dp2) + ! remap 1 field + ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) + ! dp1 layer thickness (source) + ! dp2 layer thickness (target) + ! + ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp + ! + implicit none + integer, intent(in) :: nx,qsize + real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) + real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) + ! ======================== + ! Local Variables + ! ======================== + + real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv + real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 + real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & + peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & + zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 + integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & + lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q + logical :: abort=.false. + + if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then + call remap_Q_ppm(qdp,nx,qsize,dp1,dp2) + return + endif + + call t_startf('remap_Q_noppm') +#if (defined COLUMN_OPENMP) +!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & +!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & +!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & +!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) +#endif + do q=1,qsize + do i=1,nx + do j=1,nx + + z1c(1)=0 ! source grid + z2c(1)=0 ! target grid + do k=1,nlev + z1c(k+1)=z1c(k)+dp1(i,j,k) + z2c(k+1)=z2c(k)+dp2(i,j,k) + enddo + + zv(1)=0 + do k=1,nlev + Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass + zv(k+1) = zv(k)+Qcol(k) + enddo + + if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then + write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' + write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' + write(6,*) 'DATA FOR MODEL LEVELS' + write(6,*) 'PLEVMODEL=',z2c(nlev+1) + write(6,*) 'PLEV =',z1c(nlev+1) + write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) + abort=.true. + endif + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! quadratic splies with UK met office monotonicity constraints !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zkr = 99 + ilev = 2 + zkr(1) = 1 + zkr(nlev+1) = nlev + kloop: do k = 2,nlev + do jk = ilev,nlev+1 + if (z1c(jk).ge.z2c(k)) then + ilev = jk + zkr(k) = jk-1 + cycle kloop + endif + enddo + enddo kloop + + zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) + zgam(1) = 0.0 + zgam(nlev+1) = 1.0 + zhdp = z1c(2:nlev+1)-z1c(1:nlev) + + + h = 1/zhdp + zarg = Qcol * h + rhs = 0 + lower_diag = 0 + diag = 0 + upper_diag = 0 + + rhs(1)=3*zarg(1) + rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) + rhs(nlev+1)=3*zarg(nlev) + + lower_diag(1)=1 + lower_diag(2:nlev) = h(1:nlev-1) + lower_diag(nlev+1)=1 + + diag(1)=2 + diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) + diag(nlev+1)=2 + + upper_diag(1)=1 + upper_diag(2:nlev) = h(2:nlev) + upper_diag(nlev+1)=0 + + q_diag(1)=-upper_diag(1)/diag(1) + rhs(1)= rhs(1)/diag(1) + + do k=2,nlev+1 + tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) + q_diag(k) = -upper_diag(k)*tmp_cal + rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal + enddo + do k=nlev,1,-1 + rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) + enddo + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! monotonicity modifications !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + filter_code = 0 + dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1) + dy(nlev) = dy(nlev-1) + + dy = merge(zero, dy, abs(dy) < tiny ) + + do k=1,nlev + im1=MAX(1,k-1) + im2=MAX(1,k-2) + im3=MAX(1,k-3) + ip1=MIN(nlev,k+1) + t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0) + t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 & + .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 ) + t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k))) + + filter_code(k) = merge(0,1,t1+t2 > 0) + rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1)) + filter_code(im1) = MAX(filter_code(im1),filter_code(k)) + enddo + + rhs = merge(qmax,rhs,rhs > qmax) + rhs = merge(zero,rhs,rhs < zero) + + za0 = rhs(1:nlev) + za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg + za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg + + dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev) + dy = merge(zero, dy, abs(dy) < tiny ) + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! Compute the 3 quadratic spline coeffients {za0, za1, za2} !! + !! knowing the quadratic spline parameters {rho_left,rho_right,zarg} !! + !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002). !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + h = rhs(2:nlev+1) + + do k=1,nlev + xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny) + xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny) + f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2 + + t1 = merge(1,0,ABS(za2(k)) > tiny) + t2 = merge(1,0,xm <= zero .OR. xm >= 1) + t3 = merge(1,0,za2(k) > zero) + t4 = merge(1,0,za2(k) < zero) + tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2) + tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3) + + peaks=0 + peaks = merge(-1,peaks,tm .EQ. 1) + peaks = merge(+1,peaks,tp .EQ. 1) + peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1) + peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1) + + im1=MAX(1,k-1) + im2=MAX(1,k-2) + ip1=MIN(nlev,k+1) + ip2=MIN(nlev,k+2) + + t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. & + (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. & + (dy(im1)*float(peaks) <= tiny)) + + filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. & + (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny)) + + if (filter_code(k) > 0) then + level1 = rhs(k) + level2 = (2*rhs(k)+h(k))/3 + level3 = 0.5*(rhs(k)+h(k)) + level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k) + level5 = h(k) + + t1 = merge(1,0,h(k) >= rhs(k)) + t2 = merge(1,0,zarg(k) <= level1 .OR. zarg(k) >= level5) + t3 = merge(1,0,zarg(k) > level1 .AND. zarg(k) < level2) + t4 = merge(1,0,zarg(k) > level4 .AND. zarg(k) < level5) + + lt1 = t1*t2 + lt2 = t1*(1-t2+t3) + lt3 = t1*(1-t2+1-t3+t4) + + za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) + za1(k) = merge(zero,za1(k),lt1 .EQ. 1) + za2(k) = merge(zero,za2(k),lt1 .EQ. 1) + + za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) + za1(k) = merge(zero,za1(k),lt2 .EQ. 2) + za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) + + za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) + za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) + za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) + + t2 = merge(1,0,zarg(k) >= level1 .OR. zarg(k) <= level5) + t3 = merge(1,0,zarg(k) < level1 .AND. zarg(k) > level2) + t4 = merge(1,0,zarg(k) < level4 .AND. zarg(k) > level5) + + lt1 = (1-t1)*t2 + lt2 = (1-t1)*(1-t2+t3) + lt3 = (1-t1)*(1-t2+1-t3+t4) + + za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) + za1(k) = merge(zero,za1(k),lt1 .EQ. 1) + za2(k) = merge(zero,za2(k),lt1 .EQ. 1) + + za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) + za1(k) = merge(zero,za1(k),lt2 .EQ. 2) + za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) + + za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) + za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) + za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) + endif + enddo + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! start iteration from top to bottom of atmosphere !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zv1 = 0 + do k=1,nlev + if (zgam(k+1)>1d0) then + WRITE(*,*) 'r not in [0:1]', zgam(k+1) + abort=.true. + endif + zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & + (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) + Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio + zv1 = zv2 + enddo + enddo + enddo + enddo ! q loop + if (abort) call abortmp('Bad levels in remap1. usually CFL violatioin') + call t_stopf('remap_Q_noppm') + +end subroutine remap1 + +subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2) + ! remap 1 field + ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) + ! dp1 layer thickness (source) + ! dp2 layer thickness (target) + ! + ! output: remaped Qdp, conserving mass + ! + implicit none + integer, intent(in) :: nx,qsize + real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) + real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) + ! ======================== + ! Local Variables + ! ======================== + + real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv + real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 + real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & + peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & + zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 + integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & + lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q + logical :: abort=.false. +! call t_startf('remap1_nofilter') + +#if (defined COLUMN_OPENMP) +!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & +!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & +!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & +!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) +#endif + do q=1,qsize + do i=1,nx + do j=1,nx + + z1c(1)=0 ! source grid + z2c(1)=0 ! target grid + do k=1,nlev + z1c(k+1)=z1c(k)+dp1(i,j,k) + z2c(k+1)=z2c(k)+dp2(i,j,k) + enddo + + zv(1)=0 + do k=1,nlev + Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass + zv(k+1) = zv(k)+Qcol(k) + enddo + + if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then + write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' + write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' + write(6,*) 'DATA FOR MODEL LEVELS' + write(6,*) 'PLEVMODEL=',z2c(nlev+1) + write(6,*) 'PLEV =',z1c(nlev+1) + write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) + abort=.true. + endif + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! quadratic splies with UK met office monotonicity constraints !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zkr = 99 + ilev = 2 + zkr(1) = 1 + zkr(nlev+1) = nlev + kloop: do k = 2,nlev + do jk = ilev,nlev+1 + if (z1c(jk).ge.z2c(k)) then + ilev = jk + zkr(k) = jk-1 + cycle kloop + endif + enddo + enddo kloop + + zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) + zgam(1) = 0.0 + zgam(nlev+1) = 1.0 + zhdp = z1c(2:nlev+1)-z1c(1:nlev) + + + h = 1/zhdp + zarg = Qcol * h + rhs = 0 + lower_diag = 0 + diag = 0 + upper_diag = 0 + + rhs(1)=3*zarg(1) + rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) + rhs(nlev+1)=3*zarg(nlev) + + lower_diag(1)=1 + lower_diag(2:nlev) = h(1:nlev-1) + lower_diag(nlev+1)=1 + + diag(1)=2 + diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) + diag(nlev+1)=2 + + upper_diag(1)=1 + upper_diag(2:nlev) = h(2:nlev) + upper_diag(nlev+1)=0 + + q_diag(1)=-upper_diag(1)/diag(1) + rhs(1)= rhs(1)/diag(1) + + do k=2,nlev+1 + tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) + q_diag(k) = -upper_diag(k)*tmp_cal + rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal + enddo + do k=nlev,1,-1 + rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) + enddo + + za0 = rhs(1:nlev) + za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg + za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg + + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! start iteration from top to bottom of atmosphere !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zv1 = 0 + do k=1,nlev + if (zgam(k+1)>1d0) then + WRITE(*,*) 'r not in [0:1]', zgam(k+1) + abort=.true. + endif + zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & + (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) + Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio + zv1 = zv2 + enddo + enddo + enddo + enddo ! q loop + if (abort) call abortmp('Bad levels in remap1_nofilter. usually CFL violatioin') +! call t_stopf('remap1_nofilter') +end subroutine remap1_nofilter + +!=======================================================================================================! + + +!This uses the exact same model and reference grids and data as remap_Q, but it interpolates +!using PPM instead of splines. +subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2) + ! remap 1 field + ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) + ! dp1 layer thickness (source) + ! dp2 layer thickness (target) + ! + ! output: remaped Qdp, conserving mass + ! + use control_mod, only : vert_remap_q_alg + implicit none + integer,intent(in) :: nx,qsize + real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) + real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) + ! Local Variables + integer, parameter :: gs = 2 !Number of cells to place in the ghost region + real(kind=real_kind), dimension( nlev+2 ) :: pio !Pressure at interfaces for old grid + real(kind=real_kind), dimension( nlev+1 ) :: pin !Pressure at interfaces for new grid + real(kind=real_kind), dimension( nlev+1 ) :: masso !Accumulate mass up to each interface + real(kind=real_kind), dimension( 1-gs:nlev+gs) :: ao !Tracer value on old grid + real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpo !change in pressure over a cell for old grid + real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpn !change in pressure over a cell for old grid + real(kind=real_kind), dimension(3, nlev ) :: coefs !PPM coefficients within each cell + real(kind=real_kind), dimension( nlev ) :: z1, z2 + real(kind=real_kind) :: ppmdx(10,0:nlev+1) !grid spacings + real(kind=real_kind) :: mymass, massn1, massn2 + integer :: i, j, k, q, kk, kid(nlev) + + call t_startf('remap_Q_ppm') + do j = 1 , nx + do i = 1 , nx + + pin(1)=0 + pio(1)=0 + do k=1,nlev + dpn(k)=dp2(i,j,k) + dpo(k)=dp1(i,j,k) + pin(k+1)=pin(k)+dpn(k) + pio(k+1)=pio(k)+dpo(k) + enddo + + + + pio(nlev+2) = pio(nlev+1) + 1. !This is here to allow an entire block of k threads to run in the remapping phase. + !It makes sure there's an old interface value below the domain that is larger. + pin(nlev+1) = pio(nlev+1) !The total mass in a column does not change. + !Therefore, the pressure of that mass cannot either. + !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence. + do k = 1 , gs + dpo(1 -k) = dpo( k) + dpo(nlev+k) = dpo(nlev+1-k) + enddo + + !Compute remapping intervals once for all tracers. Find the old grid cell index in which the + !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new + !interface location. In practice, the grid never deforms past one cell, so the search can be + !simplified by this. Also, the interval of integration is usually of magnitude close to zero + !or close to dpo because of minimial deformation. + !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so + !I set them equal to each other. + do k = 1 , nlev + kk = k !Keep from an order n^2 search operation by assuming the old cell index is close. + !Find the index of the old grid cell in which this new cell's bottom interface resides. + do while ( pio(kk) <= pin(k+1) ) + kk = kk + 1 + enddo + kk = kk - 1 !kk is now the cell index we're integrating over. + if (kk == nlev+1) kk = nlev !This is to keep the indices in bounds. + !Top bounds match anyway, so doesn't matter what coefficients are used + kid(k) = kk !Save for reuse + z1(k) = -0.5D0 !This remapping assumes we're starting from the left interface of an old grid cell + !In fact, we're usually integrating very little or almost all of the cell in question + z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk) !PPM interpolants are normalized to an independent + !coordinate domain [-0.5,0.5]. + enddo + + !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the + !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop. + ppmdx(:,:) = compute_ppm_grids( dpo ) + + !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and + !mass accumulation + do q = 1 , qsize + !Accumulate the old mass up to old grid cell interface locations to simplify integration + !during remapping. Also, divide out the grid spacing so we're working with actual tracer + !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure + !tracer consistency for an initially uniform field. I copied it from the old remap routine. + masso(1) = 0. + do k = 1 , nlev + ao(k) = Qdp(i,j,k,q) + masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping + ao(k) = ao(k) / dpo(k) !Divide out the old grid spacing because we want the tracer mixing ratio, not mass. + enddo + !Fill in ghost values. Ignored if vert_remap_q_alg == 2 + do k = 1 , gs + ao(1 -k) = ao( k) + ao(nlev+k) = ao(nlev+1-k) + enddo + !Compute monotonic and conservative PPM reconstruction over every cell + coefs(:,:) = compute_ppm( ao , ppmdx ) + !Compute tracer values on the new grid by integrating from the old cell bottom to the new + !cell interface to form a new grid mass accumulation. Taking the difference between + !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is + !supposed to hold the full mass this needs no normalization. + massn1 = 0. + do k = 1 , nlev + kk = kid(k) + massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk) + Qdp(i,j,k,q) = massn2 - massn1 + massn1 = massn2 + enddo + enddo + enddo + enddo + call t_stopf('remap_Q_ppm') +end subroutine remap_Q_ppm + + +!=======================================================================================================! + + +!THis compute grid-based coefficients from Collela & Woodward 1984. +function compute_ppm_grids( dx ) result(rslt) + use control_mod, only: vert_remap_q_alg + implicit none + real(kind=real_kind), intent(in) :: dx(-1:nlev+2) !grid spacings + real(kind=real_kind) :: rslt(10,0:nlev+1) !grid spacings + integer :: j + integer :: indB, indE + + !Calculate grid-based coefficients for stage 1 of compute_ppm + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-1 + else + indB = 0 + indE = nlev+1 + endif + do j = indB , indE + rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) ) + rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) ) + rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) ) + enddo + + !Caculate grid-based coefficients for stage 2 of compute_ppm + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-2 + else + indB = 0 + indE = nlev + endif + do j = indB , indE + rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) ) + rslt( 5,j) = 1. / sum( dx(j-1:j+2) ) + rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) ) + rslt( 7,j) = ( dx(j-1) + dx(j ) ) / ( 2. * dx(j ) + dx(j+1) ) + rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j ) ) + rslt( 9,j) = dx(j ) * ( dx(j-1) + dx(j ) ) / ( 2.*dx(j ) + dx(j+1) ) + rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / ( dx(j ) + 2.*dx(j+1) ) + enddo +end function compute_ppm_grids + +!=======================================================================================================! + + + +!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages +function compute_ppm( a , dx ) result(coefs) + use control_mod, only: vert_remap_q_alg + implicit none + real(kind=real_kind), intent(in) :: a ( -1:nlev+2) !Cell-mean values + real(kind=real_kind), intent(in) :: dx (10, 0:nlev+1) !grid spacings + real(kind=real_kind) :: coefs(0:2, nlev ) !PPM coefficients (for parabola) + real(kind=real_kind) :: ai (0:nlev ) !fourth-order accurate, then limited interface values + real(kind=real_kind) :: dma(0:nlev+1) !An expression from Collela's '84 publication + real(kind=real_kind) :: da !Ditto + ! Hold expressions based on the grid (which are cumbersome). + real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10 + real(kind=real_kind) :: al, ar !Left and right interface values for cell-local limiting + integer :: j + integer :: indB, indE + + ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-1 + else + indB = 0 + indE = nlev+1 + endif + do j = indB , indE + da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) ) + dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da) + if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0. + enddo + + ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1) + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-2 + else + indB = 0 + indE = nlev + endif + do j = indB , indE + ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) & + * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) ) + enddo + + ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain + ! (dimension nlev) using ai on either side and ao within the cell. + if (vert_remap_q_alg == 2) then + indB = 3 + indE = nlev-2 + else + indB = 1 + indE = nlev + endif + do j = indB , indE + al = ai(j-1) + ar = ai(j ) + if ( (ar - a(j)) * (a(j) - al) <= 0. ) then + al = a(j) + ar = a(j) + endif + if ( (ar - al) * (a(j) - (al + ar)/2.) > (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar + if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al + !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx + coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4. + coefs(1,j) = ar - al + coefs(2,j) = -6. * a(j) + 3. * ( al + ar ) + enddo + + !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom + !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth + !moment to the cell mean is sufficient to maintain conservation. + if (vert_remap_q_alg == 2) then + coefs(0,1:2) = a(1:2) + coefs(1:2,1:2) = 0. + coefs(0,nlev-1:nlev) = a(nlev-1:nlev) + coefs(1:2,nlev-1:nlev) = 0.D0 + endif +end function compute_ppm + +!=======================================================================================================! + + +!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx, +!given two bounds. Make sure this gets inlined during compilation. +function integrate_parabola( a , x1 , x2 ) result(mass) + implicit none + real(kind=real_kind), intent(in) :: a(0:2) !Coefficients of the parabola + real(kind=real_kind), intent(in) :: x1 !lower domain bound for integration + real(kind=real_kind), intent(in) :: x2 !upper domain bound for integration + real(kind=real_kind) :: mass + mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1 +end function integrate_parabola + + +!=============================================================================================! + + + +end module vertremap_mod_base + + + + diff --git a/components/homme/src/share/viscosity_mod.F90 b/components/homme/src/share/viscosity_mod_base.F90 similarity index 99% rename from components/homme/src/share/viscosity_mod.F90 rename to components/homme/src/share/viscosity_mod_base.F90 index c005a3eff3b..c8cb824ded3 100644 --- a/components/homme/src/share/viscosity_mod.F90 +++ b/components/homme/src/share/viscosity_mod_base.F90 @@ -2,7 +2,7 @@ #include "config.h" #endif -module viscosity_mod +module viscosity_mod_base ! ! This module should be renamed "global_deriv_mod.F90" ! @@ -1032,4 +1032,4 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,nt,min_neigh,max_nei end subroutine #endif -end module +end module viscosity_mod_base diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job old mode 100644 new mode 100755 index 5dde0fbab77..7147e817eb8 --- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job +++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job @@ -7,7 +7,7 @@ # set paths to source code, build directory and run directory set wdir = /proj/imn/HOMME_ACME # run directory -set HOMME = /home/imn/ACME/models/atm/homme # HOMME svn checkout +set HOMME = /home/imn/ACME/components/homme # HOMME svn checkout set input = $HOMME/test/jw_baroclinic/openacc_work # input files for test case set vdir = $HOMME/test/vcoord # vertical coordinate files set exedir = $HOMME/build/preqx diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job old mode 100644 new mode 100755 index 63846c35c5b..df7e9943c9c --- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job +++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job @@ -7,7 +7,7 @@ # set paths to source code, build directory and run directory set wdir = /proj/imn/HOMME_ACME # run directory -set HOMME = /home/imn/ACME/models/atm/homme # HOMME svn checkout +set HOMME = /home/imn/ACME/components/homme # HOMME svn checkout set input = $HOMME/test/jw_baroclinic/openacc_work # input files for test case set vdir = $HOMME/test/vcoord # vertical coordinate files set exedir = $HOMME/build/preqx @@ -28,9 +28,9 @@ limit stacksize unlimited setenv MPSTKZ 64M setenv OMP_STACKSIZE 64M -setenv CUDA_PROFILE 1 -setenv CUDA_PROFILE_LOG "cuda_profile_%p.log" -setenv CUDA_PROFILE_CONFIG ./cuda_config +#setenv CUDA_PROFILE 1 +#setenv CUDA_PROFILE_LOG "cuda_profile_%p.log" +#setenv CUDA_PROFILE_CONFIG ./cuda_config #set i=0 #mkdir -f /tmp/mps_$i From e6e039fed5565e79e1f0c6c795bcdaff1c1fd3b6 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Wed, 24 Feb 2016 13:41:32 -0500 Subject: [PATCH 02/27] Placed hooks in ACME to work with HOMME targets New HOMME targets will create situations where only one target directory can be included in the source files at a time. Thus, a new ACME env_build variable is introduced called CAM_TARGET, which only activates when CAM_DYCORE = 'se'. This defaults to preqx, which is the default of CAM-SE already. There's also a preqx_acc option as well. I got a successful cesm_setup with the default preqx case, and I'm building with preqx now. I'll also test with preqx_acc to ensure that works before starting a PR. --- cime/scripts/Tools/config_definition.xml | 8 ++++++++ components/cam/bld/cam.buildnml | 3 ++- components/cam/bld/configure | 10 ++++++++-- .../compile_scripts/titan/compile.openacc | 12 +++++++----- .../titan/customFlags.cmake.openacc | 18 ------------------ .../titan/env_mach_specific.cpu | 2 +- .../titan/env_mach_specific.openacc | 2 +- components/homme/src/share/dimensions_mod.F90 | 4 ++-- 8 files changed, 29 insertions(+), 30 deletions(-) diff --git a/cime/scripts/Tools/config_definition.xml b/cime/scripts/Tools/config_definition.xml index 563ba319ef1..684510f937d 100644 --- a/cime/scripts/Tools/config_definition.xml +++ b/cime/scripts/Tools/config_definition.xml @@ -1363,6 +1363,14 @@ sdesc="CAM dynamical core" > + + \$opts{'rain_evap_to_coarse_aero'}, "defaults=s" => \$opts{'defaults'}, "dyn=s" => \$opts{'dyn'}, + "dyn_target=s" => \$opts{'dyn_target'}, "edit_chem_mech" => \$opts{'edit_chem_mech'}, "waccm_phys" => \$opts{'waccm_phys'}, "offline_dyn" => \$opts{'offline_dyn'}, @@ -2763,8 +2764,13 @@ sub write_filepath print $fh "$camsrcdir/cam/src/dynamics/$dyn\n"; if($dyn eq 'se') { - print $fh "$camsrcdir/homme/src/share\n"; - print $fh "$camsrcdir/homme/src/share/openacc\n"; + print $fh "$camsrcdir/homme/src/share\n"; + if ($opts{'dyn_target'} eq 'preqx') { + print $fh "$camsrcdir/homme/src/preqx\n"; + } + elsif($opts{'dyn_target'} eq 'preqx_acc') { + print $fh "$camsrcdir/homme/src/preqx_acc\n"; + } } # Parallelization utilies diff --git a/components/homme/compile_scripts/titan/compile.openacc b/components/homme/compile_scripts/titan/compile.openacc index 2e09b6529e5..ac2d06f16e0 100755 --- a/components/homme/compile_scripts/titan/compile.openacc +++ b/components/homme/compile_scripts/titan/compile.openacc @@ -1,7 +1,7 @@ #!/bin/bash -do_cmake=0 -do_clean=0 +do_cmake=1 +do_clean=1 do_make=1 HOMME_ROOT=/ccs/home/$USER/ACME/components/homme @@ -14,7 +14,6 @@ mkdir -p titan-openacc || exit -1 cd titan-openacc if [ $do_cmake -eq 1 ]; then -sed -i "s/ integer, parameter :: qsize_d=.*/ integer, parameter :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1 rm -rf CMakeFiles CMakeCache.txt cmake \ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ @@ -27,10 +26,13 @@ cmake -DWITH_PNETCDF=FALSE \ -DHDF5_DIR=$HDF5_DIR \ -DPREQX_NP=4 \ + -DPREQX_QSIZE_D=$NTRACERS \ -DPREQX_PLEV=$NLEVELS \ -DBUILD_HOMME_SWDGX=FALSE \ -DBUILD_HOMME_SWEQX=FALSE \ -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ + -DBUILD_HOMME_PREQX=FALSE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ -DENABLE_OPENACC=TRUE \ @@ -49,7 +51,7 @@ make clean || exit -1 fi if [ $do_make -eq 1 ]; then -make -j16 preqx || exit -1 +make -j16 preqx_acc || exit -1 mkdir -p $HOMME_ROOT/build/preqx -cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 +cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 fi diff --git a/components/homme/compile_scripts/titan/customFlags.cmake.openacc b/components/homme/compile_scripts/titan/customFlags.cmake.openacc index 5bb9c6a1676..9d2729e62e0 100644 --- a/components/homme/compile_scripts/titan/customFlags.cmake.openacc +++ b/components/homme/compile_scripts/titan/customFlags.cmake.openacc @@ -1,12 +1,6 @@ SET(CUSTOM_FLAG_FILES ${HOMME_SOURCE_DIR}/physics/column_model_mod.F90 ${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 ) @@ -16,15 +10,3 @@ SET_SOURCE_FILES_PROPERTIES( PROPERTIES GENERATED TRUE COMPILE_FLAGS " -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O0 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin" ) - -SET_SOURCE_FILES_PROPERTIES( - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 - PROPERTIES GENERATED TRUE COMPILE_FLAGS - " -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O2 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape" -) - diff --git a/components/homme/compile_scripts/titan/env_mach_specific.cpu b/components/homme/compile_scripts/titan/env_mach_specific.cpu index e3d54196c33..6799e8c6966 100644 --- a/components/homme/compile_scripts/titan/env_mach_specific.cpu +++ b/components/homme/compile_scripts/titan/env_mach_specific.cpu @@ -32,7 +32,7 @@ module rm cray-parallel-netcdf module load PrgEnv-pgi # module switch pgi pgi/14.2.0 - module switch pgi pgi/15.10.lustre + module switch pgi pgi/15.10.0 module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 diff --git a/components/homme/compile_scripts/titan/env_mach_specific.openacc b/components/homme/compile_scripts/titan/env_mach_specific.openacc index 966acd15154..10adc40f5c8 100644 --- a/components/homme/compile_scripts/titan/env_mach_specific.openacc +++ b/components/homme/compile_scripts/titan/env_mach_specific.openacc @@ -32,7 +32,7 @@ module rm cray-parallel-netcdf module load PrgEnv-pgi # module switch pgi pgi/14.2.0 - module switch pgi pgi/15.9.lustre + module switch pgi pgi/15.10.0 module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 diff --git a/components/homme/src/share/dimensions_mod.F90 b/components/homme/src/share/dimensions_mod.F90 index b204379c899..61d3138e996 100644 --- a/components/homme/src/share/dimensions_mod.F90 +++ b/components/homme/src/share/dimensions_mod.F90 @@ -27,9 +27,9 @@ module dimensions_mod #endif #else #ifdef QSIZE_D - integer, parameter :: qsize_d=QSIZE_D + integer, parameter :: qsize_d=50 #else - integer, parameter :: qsize_d=4 + integer, parameter :: qsize_d=50 #endif integer, parameter :: ntrac_d=4 ! fvm tracers #endif From 752f07cbe7e957e337f17460baa120d35f765acb Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 25 Feb 2016 13:11:20 -0500 Subject: [PATCH 03/27] pgi 15.9.lustre working again Also removed arch_switch_mod.F90 --- .../titan/env_mach_specific.openacc | 2 +- components/homme/src/preqx/CMakeLists.txt | 1 - components/homme/src/preqx_acc/CMakeLists.txt | 1 - .../homme/src/share/arch_switch_mod.F90 | 29 ------------------- 4 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 components/homme/src/share/arch_switch_mod.F90 diff --git a/components/homme/compile_scripts/titan/env_mach_specific.openacc b/components/homme/compile_scripts/titan/env_mach_specific.openacc index 10adc40f5c8..f0374679a7f 100644 --- a/components/homme/compile_scripts/titan/env_mach_specific.openacc +++ b/components/homme/compile_scripts/titan/env_mach_specific.openacc @@ -32,7 +32,7 @@ module rm cray-parallel-netcdf module load PrgEnv-pgi # module switch pgi pgi/14.2.0 - module switch pgi pgi/15.10.0 + module switch pgi pgi/15.9.home module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt index bb59fe33d9c..8e9a4fdc063 100644 --- a/components/homme/src/preqx/CMakeLists.txt +++ b/components/homme/src/preqx/CMakeLists.txt @@ -30,7 +30,6 @@ SET(PREQX_SRCS_F90 ${SRC_DIR}/preqx/solver_init_mod.F90 ${SRC_DIR}/preqx/vertremap_mod.F90 ${SRC_DIR}/preqx/viscosity_mod.F90 - ${SRC_SHARE_DIR}/arch_switch_mod.F90 ${SRC_SHARE_DIR}/kinds.F90 ${SRC_SHARE_DIR}/dimensions_mod.F90 ${SRC_SHARE_DIR}/control_mod.F90 diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt index ca97ca51e1b..39042e51430 100644 --- a/components/homme/src/preqx_acc/CMakeLists.txt +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -31,7 +31,6 @@ SET(PREQX_SRCS_F90 ${SRC_DIR}/preqx_acc/solver_init_mod.F90 ${SRC_DIR}/preqx_acc/vertremap_mod.F90 ${SRC_DIR}/preqx_acc/viscosity_mod.F90 - ${SRC_SHARE_DIR}/arch_switch_mod.F90 ${SRC_SHARE_DIR}/kinds.F90 ${SRC_SHARE_DIR}/dimensions_mod.F90 ${SRC_SHARE_DIR}/control_mod.F90 diff --git a/components/homme/src/share/arch_switch_mod.F90 b/components/homme/src/share/arch_switch_mod.F90 deleted file mode 100644 index 8720652254f..00000000000 --- a/components/homme/src/share/arch_switch_mod.F90 +++ /dev/null @@ -1,29 +0,0 @@ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -module arch_switch_mod -#if USE_OPENACC - use prim_advection_openacc_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap - use openacc_utils_mod, only: arch_init2 -#else - use prim_advection_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap -#endif - implicit none - -contains - -#if (! USE_OPENACC) - subroutine arch_init2( elem , deriv ) - use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj - use derivative_mod, only: derivative_t - implicit none - type(element_t) , intent(in) :: elem(:) - type(derivative_t), intent(in) :: deriv - !CPU case, nothing to do - end subroutine arch_init2 -#endif - -end module arch_switch_mod - From febd664e96f3d3b58cf5e3c62ffb6b3231ac7b6c Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Fri, 26 Feb 2016 12:39:29 -0500 Subject: [PATCH 04/27] Convenient way to run OpenACC from create_newcase I put a new option flag in create_newcase called "-camse_target", which takes one option that specifies the HOMME cmake target to use. It defaults to 'preqx' if the flag isn't used. This option does two things in the create_newcase script: (1) It sets CAM_TARGET in env_build.xml (2) It set the default compiler to "pgi_acc" only if -mach == titan. Otherwise does not change default compiler. Also, this is done in such a way that if -compiler is specified, then it will overwrite pgi_acc. So, to run OpenACC code on Titan, one simply needs to say: ./create_newcase -camse_target preqx_acc The current valid values for camse_target are 'preqx' and 'preqx_acc' --- cime/scripts/create_newcase | 73 +++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/cime/scripts/create_newcase b/cime/scripts/create_newcase index 396b4ac8761..b7d49cde766 100755 --- a/cime/scripts/create_newcase +++ b/cime/scripts/create_newcase @@ -96,6 +96,9 @@ OPTIONS or read from ~/.cesm_proj or ~/.ccsm_proj -compiler Specify a compiler for the target machine (optional) default: default compiler for the target machine + -camse_target Spedify a target for the cam-se dycore (ignored for other atmospheric dycores) + default: preqx + options: preqx, preqx-acc -mpilib Specify a mpi library for the target machine (optional) default: default mpi library for the target machine allowed: openmpi, mpich, ibm, mpi-serial, etc @@ -166,6 +169,7 @@ GetOptions( "confopts=s" => \$opts{'confopts'}, "project=s" => \$opts{'project'}, "compiler=s" => \$opts{'compiler'}, + "camse_target=s" => \$opts{'camse_target'}, "mpilib=s" => \$opts{'mpilib'}, "res=s" => \$opts{'res'}, "h|help" => \$opts{'help'}, @@ -211,6 +215,7 @@ my $pecount; my $xmlmode; my $project; my $compiler; +my $camse_target; my $mpilib; if (!$opts{'list'} && !$opts{'testlist'}) { # Check for manditory case input @@ -260,6 +265,13 @@ if (!$opts{'list'} && !$opts{'testlist'}) { $compiler = $opts{'compiler'}; } + # Check if machine compiler option is given + if ($opts{'camse_target'}) { + $camse_target = $opts{'camse_target'}; + } else { + $camse_target = "preqx"; + } + # Check if machine mpilib option is given $mpilib = 'unset'; if ($opts{'mpilib'}) { @@ -502,36 +514,51 @@ if ($mach =~ /(.*)_(.*)/){ } $cfg_ref->set('CCSM_MACHDIR', "$machdir"); +# Handle the camse_target option +# Do this before handling the compiler so that default compiler can be overwritten +if ($camse_target eq "preqx") { + $cfg_ref->set('CAM_TARGET', "preqx"); +} elsif ($camse_target eq "preqx_acc") { + $cfg_ref->set('CAM_TARGET', "preqx_acc"); +} else { + die "ERROR: option -camse_target must be preqx (default) or preqx_acc" +} + # Check that compiler request for target machine matches a supported value # Or set default compiler - if not provided compiler request my $compilers; if ($mach =~ /userdefined/){ - $cfg_ref->set('COMPILER', "USERDEFINED_required_build"); + $cfg_ref->set('COMPILER', "USERDEFINED_required_build"); } else { - $compilers = $cfg_ref->get('COMPILERS'); - my @compilers = split ",", $compilers, -1; - if ($compiler) { - if (! ($mach =~ "generic")){ - my $found = 0; - foreach my $comp (@compilers) { - if ($compiler eq $comp) { - $found = 1; - } - } - if (!$found) { - my $sysmod = "rm -rf $caseroot"; - system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n"; - die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n"; - } - } - $cfg_ref->set('COMPILER', "$compiler"); - if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } - } else { - $compiler = $compilers[0]; - $cfg_ref->set('COMPILER', "$compiler"); - if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } + $compilers = $cfg_ref->get('COMPILERS'); + my @compilers = split ",", $compilers, -1; + if ($compiler) { + if (! ($mach =~ "generic")){ + my $found = 0; + foreach my $comp (@compilers) { + if ($compiler eq $comp) { + $found = 1; + } + } + if (!$found) { + my $sysmod = "rm -rf $caseroot"; + system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n"; + die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n"; + } } + $cfg_ref->set('COMPILER', "$compiler"); + if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } + } else { + $compiler = $compilers[0]; + if ($camse_target eq "preqx_acc") { + if ($mach eq "titan") { + $compiler = "pgi_acc"; + } + } + $cfg_ref->set('COMPILER', "$compiler"); + if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } + } } if ($print>=2) { print "Machine specifier: $mach.$eol"; } From 83279bc33230ddcda7598fa4714007d0983343fd Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Mon, 29 Feb 2016 09:22:59 -0500 Subject: [PATCH 05/27] Adding support for sweqx in HOMME cmake system -Added overloading modules to the src/sweqx director -Changed CMakeLists.txt to include the _base files and new overloading files. --- components/homme/src/sweqx/CMakeLists.txt | 14 ++++++++++---- components/homme/src/sweqx/bndry_mod.F90 | 8 ++++++++ components/homme/src/sweqx/derivative_mod.F90 | 8 ++++++++ components/homme/src/sweqx/edge_mod.F90 | 8 ++++++++ components/homme/src/sweqx/solver_init_mod.F90 | 8 ++++++++ components/homme/src/sweqx/viscosity_mod.F90 | 8 ++++++++ 6 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 components/homme/src/sweqx/bndry_mod.F90 create mode 100644 components/homme/src/sweqx/derivative_mod.F90 create mode 100644 components/homme/src/sweqx/edge_mod.F90 create mode 100644 components/homme/src/sweqx/solver_init_mod.F90 create mode 100644 components/homme/src/sweqx/viscosity_mod.F90 diff --git a/components/homme/src/sweqx/CMakeLists.txt b/components/homme/src/sweqx/CMakeLists.txt index 8409fd086be..56af9356ec7 100644 --- a/components/homme/src/sweqx/CMakeLists.txt +++ b/components/homme/src/sweqx/CMakeLists.txt @@ -13,6 +13,16 @@ SET (SWEQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS} ${UTILS_TIMING_DIR} CACHE INTERNAL "include directories for sweqx") SET(SWEQX_SRCS + ${SRC_SHARE}/bndry_mod_base.F90 + ${SRC_SHARE}/derivative_mod_base.F90 + ${SRC_SHARE}/edge_mod_base.F90 + ${SRC_SHARE}/solver_init_mod_base.F90 + ${SRC_SHARE}/viscosity_mod_base.F90 + ${SRC_BASE}/sweqx/bndry_mod.F90 + ${SRC_BASE}/sweqx/derivative_mod.F90 + ${SRC_BASE}/sweqx/edge_mod.F90 + ${SRC_BASE}/sweqx/solver_init_mod.F90 + ${SRC_BASE}/sweqx/viscosity_mod.F90 ${SRC_BASE}/advance_mod.F90 ${SRC_BASE}/checksum_mod.F90 ${SRC_BASE}/common_io_mod.F90 @@ -32,16 +42,13 @@ SET(SWEQX_SRCS ${SRC_BASE}/types_mod.F90 ${SRC_SHARE}/fvm_bsp_mod.F90 ${SRC_SHARE}/fvm_transformation_mod.F90 - ${SRC_SHARE}/bndry_mod.F90 ${SRC_SHARE}/cg_mod.F90 ${SRC_SHARE}/control_mod.F90 ${SRC_SHARE}/coordinate_systems_mod.F90 ${SRC_SHARE}/cube_mod.F90 - ${SRC_SHARE}/derivative_mod.F90 ${SRC_SHARE}/dimensions_mod.F90 ${SRC_SHARE}/dof_mod.F90 ${SRC_SHARE}/domain_mod.F90 - ${SRC_SHARE}/edge_mod.F90 ${SRC_SHARE}/element_mod.F90 ${SRC_SHARE}/filter_mod.F90 ${SRC_SHARE}/fvm_analytic_mod.F90 @@ -76,7 +83,6 @@ SET(SWEQX_SRCS ${SRC_SHARE}/spacecurve_mod.F90 ${SRC_SHARE}/thread_mod.F90 ${SRC_SHARE}/time_mod.F90 - ${SRC_SHARE}/viscosity_mod.F90 ${SRC_SHARE}/unit_tests_mod.F90 ${SRC_UTILS}/shr_file_mod.F90 ${SRC_UTILS}/shr_kind_mod.F90 diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90 new file mode 100644 index 00000000000..2ef89f27f70 --- /dev/null +++ b/components/homme/src/sweqx/bndry_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module bndry_mod + use bndry_mod_base + implicit none +end module bndry_mod diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90 new file mode 100644 index 00000000000..461fe9c8323 --- /dev/null +++ b/components/homme/src/sweqx/derivative_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module derivative_mod + use derivative_mod_base + implicit none +end module derivative_mod diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90 new file mode 100644 index 00000000000..c7f39fe20f6 --- /dev/null +++ b/components/homme/src/sweqx/edge_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module edge_mod + use edge_mod_base + implicit none +end module edge_mod diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90 new file mode 100644 index 00000000000..224928eeb25 --- /dev/null +++ b/components/homme/src/sweqx/solver_init_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod + use solver_init_mod_base + implicit none +end module solver_init_mod diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90 new file mode 100644 index 00000000000..80e849ae94c --- /dev/null +++ b/components/homme/src/sweqx/viscosity_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module viscosity_mod + use viscosity_mod_base + implicit none +end module viscosity_mod From c247d2f89b09367a9a800871c367590af8123e43 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Mon, 29 Feb 2016 14:23:47 -0500 Subject: [PATCH 06/27] Enabling OpenACC code to run on the CPU --- components/homme/cmake/SetCompilerFlags.cmake | 21 +---- .../titan/compile.openacc.oncpu | 54 +++++++++++++ .../homme/compile_scripts/titan/compile.sweqx | 51 ++++++++++++ components/homme/src/preqx/CMakeLists.txt | 12 +-- components/homme/src/preqx_acc/CMakeLists.txt | 5 ++ .../homme/src/preqx_acc/openacc_utils_mod.F90 | 12 ++- .../src/preqx_acc/prim_advection_mod.F90 | 80 ++++++++++--------- .../jw_baroclinic/openacc_work/openacc.job | 8 +- 8 files changed, 167 insertions(+), 76 deletions(-) create mode 100755 components/homme/compile_scripts/titan/compile.openacc.oncpu create mode 100755 components/homme/compile_scripts/titan/compile.sweqx diff --git a/components/homme/cmake/SetCompilerFlags.cmake b/components/homme/cmake/SetCompilerFlags.cmake index b83bd3fecb7..54c2d46058d 100644 --- a/components/homme/cmake/SetCompilerFlags.cmake +++ b/components/homme/cmake/SetCompilerFlags.cmake @@ -222,26 +222,7 @@ ENDIF () ############################################################################## OPTION(ENABLE_OPENACC "Whether to build with OpenACC support" FALSE) IF (${ENABLE_OPENACC}) - IF (${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI) - # Need to add -acc to the Fortran FLAGS to see if it will compile - # "call acc_init()" - SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -acc") - TRY_COMPILE(OPENACC_SUCCESS - ${CMAKE_BINARY_DIR}/tests/compilerTests/ - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/openAccTest.f90 - OUTPUT_VARIABLE COMPILE_OUTPUT) - IF (${OPENACC_SUCCESS}) - MESSAGE(STATUS "Using OpenACC through PGI compiler") - SET(PREQX_USE_OPENACC TRUE) - ELSE() - MESSAGE(FATAL_ERROR "Unable to use OpenACC through the PGI compiler") - ENDIF() - ELSEIF (${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray) - SET(PREQX_USE_OPENACC TRUE) - MESSAGE(STATUS "Using OpenACC through Cray compiler") - ELSE () - MESSAGE(FATAL_ERROR "OpenACC only supported through the PGI and Cray compilers") - ENDIF() + SET(PREQX_USE_OPENACC TRUE) ENDIF () ############################################################################## diff --git a/components/homme/compile_scripts/titan/compile.openacc.oncpu b/components/homme/compile_scripts/titan/compile.openacc.oncpu new file mode 100755 index 00000000000..a6e3f9d381f --- /dev/null +++ b/components/homme/compile_scripts/titan/compile.openacc.oncpu @@ -0,0 +1,54 @@ +#!/bin/bash + +do_cmake=1 +do_clean=1 +do_make=1 + +HOMME_ROOT=/ccs/home/$USER/ACME/components/homme +NTRACERS=50 +NLEVELS=64 + +source ./env_mach_specific.cpu + +mkdir -p titan-openacc || exit -1 +cd titan-openacc + +if [ $do_cmake -eq 1 ]; then +rm -rf CMakeFiles CMakeCache.txt +cmake \ + -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ + -DCMAKE_Fortran_COMPILER=ftn \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ + -DDEBUG_FLAGS=" " \ + -DNETCDF_DIR=$NETCDF_DIR \ + -DWITH_PNETCDF=FALSE \ + -DHDF5_DIR=$HDF5_DIR \ + -DPREQX_NP=4 \ + -DPREQX_QSIZE_D=$NTRACERS \ + -DPREQX_PLEV=$NLEVELS \ + -DBUILD_HOMME_SWDGX=FALSE \ + -DBUILD_HOMME_SWEQX=FALSE \ + -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ + -DBUILD_HOMME_PREQX=FALSE \ + -DENABLE_OPENMP=TRUE \ + -DHOMME_PROJID=STF006 \ + -DENABLE_OPENACC=TRUE \ + -DENABLE_CUDA_FORTRAN=FALSE \ + -DHOMME_FIND_BLASLAPACK=TRUE \ + -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ + -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \ + $HOMME_ROOT +fi + +if [ $do_clean -eq 1 ]; then +make clean || exit -1 +fi + +if [ $do_make -eq 1 ]; then +make -j preqx_acc || exit -1 +mkdir -p $HOMME_ROOT/build/preqx +cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 +fi diff --git a/components/homme/compile_scripts/titan/compile.sweqx b/components/homme/compile_scripts/titan/compile.sweqx new file mode 100755 index 00000000000..6cfca993f55 --- /dev/null +++ b/components/homme/compile_scripts/titan/compile.sweqx @@ -0,0 +1,51 @@ +#!/bin/bash + +do_cmake=1 +do_clean=1 +do_make=1 + +HOMME_ROOT=/ccs/home/$USER/ACME/components/homme +NLEVELS=64 + +source ./env_mach_specific.cpu + +mkdir -p titan-cpu || exit -1 +cd titan-cpu + +if [ $do_cmake -eq 1 ]; then +rm -rf CMakeFiles CMakeCache.txt +cmake \ + -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ + -DCMAKE_Fortran_COMPILER=ftn \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ + -DDEBUG_FLAGS=" " \ + -DNETCDF_DIR=$NETCDF_DIR \ + -DWITH_PNETCDF=FALSE \ + -DHDF5_DIR=$HDF5_DIR \ + -DSWEQX_NP=4 \ + -DSWEQX_PLEV=$NLEVELS \ + -DBUILD_HOMME_SWDGX=FALSE \ + -DBUILD_HOMME_SWEQX=TRUE \ + -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX=FALSE \ + -DENABLE_OPENMP=TRUE \ + -DHOMME_PROJID=STF006 \ + -DENABLE_OPENACC=FALSE \ + -DENABLE_CUDA_FORTRAN=FALSE \ + -DHOMME_FIND_BLASLAPACK=TRUE \ + -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ + $HOMME_ROOT +fi +# -DOPT_FLAGS="-fastsse -Mvect" \ + +if [ $do_clean -eq 1 ]; then +make clean || exit -1 +fi + +if [ $do_make -eq 1 ]; then +make -j16 sweqx || exit -1 +mkdir -p $HOMME_ROOT/build/sweqx +cp ./src/sweqx/sweqx $HOMME_ROOT/build/sweqx/sweqx.cpu || exit -1 +fi diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt index 8e9a4fdc063..1a2324962f8 100644 --- a/components/homme/src/preqx/CMakeLists.txt +++ b/components/homme/src/preqx/CMakeLists.txt @@ -115,6 +115,7 @@ SET(PREQX_SRCS_F90 ${PHYSICS_DIR}/column_types_mod.F90 ) + SET(PREQX_SRCS_C ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c ${SRC_DIR}/jrio.c @@ -155,11 +156,6 @@ MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") -IF (${PREQX_USE_CUDA_FORTRAN}) - SET(USE_CUDA_FORTRAN TRUE) - MESSAGE(STATUS " Using CUDA Fortran") -ENDIF () - IF (${PREQX_USE_OPENACC}) SET(USE_OPENACC TRUE) MESSAGE(STATUS " Using OpenACC") @@ -176,10 +172,4 @@ ENDIF () createTestExec(preqx preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} ${PREQX_USE_PIO} ${PREQX_USE_ENERGY}) -IF (${BUILD_PREQX_SPELT}) - MESSAGE(STATUS " Also building preqx_spelt") - createTestExec(preqx_spelt preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} - ${PREQX_USE_PIO} ${PREQX_USE_ENERGY}) - SET_TARGET_PROPERTIES(preqx_spelt PROPERTIES COMPILE_DEFINITIONS "_SPELT") -ENDIF () diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt index 39042e51430..512be357de8 100644 --- a/components/homme/src/preqx_acc/CMakeLists.txt +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -156,6 +156,11 @@ MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") +IF (${PREQX_USE_OPENACC}) + SET(USE_OPENACC TRUE) + MESSAGE(STATUS " Using OpenACC") +ENDIF () + IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") ENDIF () diff --git a/components/homme/src/preqx_acc/openacc_utils_mod.F90 b/components/homme/src/preqx_acc/openacc_utils_mod.F90 index 5e8542fb727..8e0acbb5f4d 100644 --- a/components/homme/src/preqx_acc/openacc_utils_mod.F90 +++ b/components/homme/src/preqx_acc/openacc_utils_mod.F90 @@ -20,12 +20,18 @@ module openacc_utils_mod contains function acc_async_test_wrap( asyncid ) result(rslt) - use openacc, only: acc_async_test +# ifdef _OPENACC + use openacc, only: acc_async_test +# endif implicit none integer, intent(in) :: asyncid logical :: rslt - rslt = .false. - rslt = acc_async_test(asyncid) +# ifdef _OPENACC + rslt = .false. + rslt = acc_async_test(asyncid) +# else + rslt = .true. +# endif end function acc_async_test_wrap subroutine copy_qdp_h2d( elem , tl ) diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index 629b76e613b..9bbf4b5831e 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -63,28 +63,30 @@ subroutine copy_qdp1_h2d( elem , tl , nets , nete ) integer , intent(in) :: tl, nets , nete integer :: ie, k, j, i call t_startf('qdp1_pcie') - do ie = nets , nete - data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie) - enddo - !$omp barrier - !$omp master -! do ie = 1 , nelemd -! !$acc update device(state_qdp(:,:,:,1,tl,ie)) -! enddo - !$acc update device(data_pack) async(1) - !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) - do ie = 1 , nelemd - do k = 1 , nlev - do j = 1 , np - do i = 1 , np - state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie) +# if USE_OPENACC + do ie = nets , nete + data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie) + enddo + !$omp barrier + !$omp master +! do ie = 1 , nelemd +! !$acc update device(state_qdp(:,:,:,1,tl,ie)) +! enddo + !$acc update device(data_pack) async(1) + !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) + do ie = 1 , nelemd + do k = 1 , nlev + do j = 1 , np + do i = 1 , np + state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie) + enddo enddo enddo enddo - enddo - !$acc wait(1) - !$omp end master - !$omp barrier + !$acc wait(1) + !$omp end master + !$omp barrier +# endif call t_stopf('qdp1_pcie') end subroutine copy_qdp1_h2d @@ -96,28 +98,30 @@ subroutine copy_qdp1_d2h( elem , tl , nets , nete ) integer , intent(in) :: tl, nets , nete integer :: ie, k, j, i call t_startf('qdp1_pcie') - !$omp barrier - !$omp master - !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) - do ie = 1 , nelemd - do k = 1 , nlev - do j = 1 , np - do i = 1 , np - data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie) +# if USE_OPENACC + !$omp barrier + !$omp master + !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) + do ie = 1 , nelemd + do k = 1 , nlev + do j = 1 , np + do i = 1 , np + data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie) + enddo enddo enddo enddo - enddo - !$acc update host(data_pack) async(1) - !$acc wait(1) -! do ie = 1 , nelemd -! !$acc update host(state_qdp(:,:,:,1,tl,ie)) -! enddo - !$omp end master - !$omp barrier - do ie = nets , nete - state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie) - enddo + !$acc update host(data_pack) async(1) + !$acc wait(1) +! do ie = 1 , nelemd +! !$acc update host(state_qdp(:,:,:,1,tl,ie)) +! enddo + !$omp end master + !$omp barrier + do ie = nets , nete + state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie) + enddo +# endif call t_stopf('qdp1_pcie') end subroutine copy_qdp1_d2h diff --git a/components/homme/test/jw_baroclinic/openacc_work/openacc.job b/components/homme/test/jw_baroclinic/openacc_work/openacc.job index 8600dcee570..3193fcea868 100644 --- a/components/homme/test/jw_baroclinic/openacc_work/openacc.job +++ b/components/homme/test/jw_baroclinic/openacc_work/openacc.job @@ -1,6 +1,6 @@ #!/bin/tcsh -f #PBS -A STF006 -#PBS -l nodes=24 +#PBS -l nodes=3 #PBS -q batch #PBS -l walltime=0:45:00 #PBS -N baroclinic @@ -43,7 +43,7 @@ set exe = preqx.openacc module rm cray-netcdf-hdf5parallel module rm cray-parallel-netcdf module load PrgEnv-pgi - module switch pgi pgi/15.9.lustre + module switch pgi pgi/15.10.0 module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 @@ -71,7 +71,7 @@ setenv MPICH_CPUMASK_DISPLAY 1 #setenv CUDA_PROFILE_CONFIG ./cuda_config #setenv PGI_ACC_DEBUG 1 -setenv OMP_NUM_THREADS 4 +setenv OMP_NUM_THREADS 1 set NCPU = $PBS_NUM_NODES @ NCPU *= 16 @ NCPU /= $OMP_NUM_THREADS @@ -83,7 +83,7 @@ echo $mpirun set u_perturb = 1 set rotate = 0 -set ne = 30 # horizontal resolution 4,16,30,60,120,240 +set ne = 8 # horizontal resolution 4,16,30,60,120,240 set nlev = 64 # vertical resolution 26,30,60,64,96 set qsize = 50 # number of passive tracers set namelist = jw_baroclinic.nl # CAM-style, vertically lagrangian From 6c8f8e190ab640fd9e10ff03fd81513958db273b Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Mon, 29 Feb 2016 15:49:19 -0500 Subject: [PATCH 07/27] Adding Mark's CMake test changes Altering tests to test what we want to test for current HOMME development. --- .../test/jw_baroclinic/openacc_work/cpu.job | 4 +- .../homme/test/reg_test/namelists/swtc1-dg.nl | 69 ------------------ .../homme/test/reg_test/namelists/swtc2-dg.nl | 70 ------------------- .../homme/test/reg_test/namelists/swtc5-dg.nl | 69 ------------------ .../test/reg_test/run_tests/baro1a.cmake | 2 - .../test/reg_test/run_tests/baro1b.cmake | 2 - .../test/reg_test/run_tests/baro2a.cmake | 2 - .../test/reg_test/run_tests/baro2b.cmake | 2 - .../test/reg_test/run_tests/baro2c.cmake | 2 - .../test/reg_test/run_tests/baro2d-imp.cmake | 3 - .../test/reg_test/run_tests/baro2d.cmake | 2 - .../reg_test/run_tests/baroCamMoist-SL.cmake | 2 - .../reg_test/run_tests/baroCamMoist-acc.cmake | 51 ++++++++++++++ .../reg_test/run_tests/baroCamMoist.cmake | 2 - .../test/reg_test/run_tests/swimtc5.cmake | 2 - .../test/reg_test/run_tests/swtc1-dg.cmake | 19 ----- .../homme/test/reg_test/run_tests/swtc1.cmake | 3 +- .../test/reg_test/run_tests/swtc2-dg.cmake | 21 ------ .../homme/test/reg_test/run_tests/swtc2.cmake | 2 - .../test/reg_test/run_tests/swtc5-dg.cmake | 21 ------ .../test/reg_test/run_tests/swtc5-mr.cmake | 2 - .../homme/test/reg_test/run_tests/swtc5.cmake | 2 - .../homme/test/reg_test/run_tests/swtc6.cmake | 2 - .../test/reg_test/run_tests/templates.cmake | 2 - .../test/reg_test/run_tests/test-list.cmake | 7 ++ .../test_execs/baroCam-acc/CMakeLists.txt | 5 ++ 26 files changed, 66 insertions(+), 304 deletions(-) delete mode 100644 components/homme/test/reg_test/namelists/swtc1-dg.nl delete mode 100644 components/homme/test/reg_test/namelists/swtc2-dg.nl delete mode 100644 components/homme/test/reg_test/namelists/swtc5-dg.nl create mode 100644 components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake delete mode 100644 components/homme/test/reg_test/run_tests/swtc1-dg.cmake delete mode 100644 components/homme/test/reg_test/run_tests/swtc2-dg.cmake delete mode 100644 components/homme/test/reg_test/run_tests/swtc5-dg.cmake create mode 100644 components/homme/test_execs/baroCam-acc/CMakeLists.txt diff --git a/components/homme/test/jw_baroclinic/openacc_work/cpu.job b/components/homme/test/jw_baroclinic/openacc_work/cpu.job index bebb6ea3fb5..7771f0f2628 100644 --- a/components/homme/test/jw_baroclinic/openacc_work/cpu.job +++ b/components/homme/test/jw_baroclinic/openacc_work/cpu.job @@ -1,6 +1,6 @@ #!/bin/tcsh -f #PBS -A STF006 -#PBS -l nodes=24 +#PBS -l nodes=3 #PBS -q batch #PBS -l walltime=0:45:00 #PBS -N baroclinic @@ -83,7 +83,7 @@ echo $mpirun set u_perturb = 1 set rotate = 0 -set ne = 30 # horizontal resolution 4,16,30,60,120,240 +set ne = 8 # horizontal resolution 4,16,30,60,120,240 set nlev = 64 # vertical resolution 26,30,60,64,96 set qsize = 50 # number of passive tracers set namelist = jw_baroclinic.nl # CAM-style, vertically lagrangian diff --git a/components/homme/test/reg_test/namelists/swtc1-dg.nl b/components/homme/test/reg_test/namelists/swtc1-dg.nl deleted file mode 100644 index 5bed81bde07..00000000000 --- a/components/homme/test/reg_test/namelists/swtc1-dg.nl +++ /dev/null @@ -1,69 +0,0 @@ -!=======================================================! -! 1 day = 1 * 24 * 3600 = 86400 sec ! -! nmax = ndays * 86400 / tstep = 100 -! 12 days at 120.0 stepsize: nmax= 8640 ! -!=======================================================! -&ctl_nl -NThreads = 1 -partmethod = 4 -topology = "cube" -test_case = 'swtc1' -ne = 10 -ndays = 12 -statefreq = 864 -tasknum = 0 -restartfreq = -1 -restartfile = "./restart/R000000050" -runtype = 0 -tstep = 100 -integration = "explicit" -smooth = 0.05 -nu = 0 -/ -&solver_nl -precon_method = "block_jacobi" -maxits = 100 -tol = 1.e-12 -/ -&filter_nl -transfer_type = "bv" -filter_type = "taylor" -filter_freq = 1 -filter_mu = 0.05D0 -p_bv = 12.0D0 -s_bv = .666666666666666666D0 -wght_fm = 0.10D0 -kcut_fm = 2 -/ -&analysis_nl -!=======================================================! -! currently up to 5 streams are allowed ! -! output_stream_count=1 ! -! ! -! timunits: 0= steps, 1=days, 2=hours ! -! output_timeunits=1,2 ! -! output_start_time=0,1176 ! -! output_end_time=-1,-1 ! -! output_frequency=1,1 ! -! output_dir ="./movies/" ! -! ! -! allowed variables: 'ps ','geop ','u ','v ', ! -! 'latp ','lonp ','latv ','lonv ', ! -! 'elem ','Time ' ! -! ! -! output_varnames1-5 ! -!=======================================================! -output_start_time = 0 -output_end_time = -1 -output_frequency = 1 -output_timeunits = 1 -output_varnames1 = 'u', 'v', 'ps', 'geop' -output_type = 'netcdf' -interp_nlon = 720 -interp_nlat = 360 -/ -&dg_nl -riemanntype= 0 -alphatype= 4 -alpha_dg = 0.0D0 -/ diff --git a/components/homme/test/reg_test/namelists/swtc2-dg.nl b/components/homme/test/reg_test/namelists/swtc2-dg.nl deleted file mode 100644 index eca5a773ca5..00000000000 --- a/components/homme/test/reg_test/namelists/swtc2-dg.nl +++ /dev/null @@ -1,70 +0,0 @@ -!=======================================================! -! 1 day = 1 * 24 * 3600 = 86400 sec ! -! nmax = ndays * 86400 / tstep ! -! 12 days at 30.0 stepsize: nmax= 34560 ! -!=======================================================! -&ctl_nl -NThreads = 1 -partmethod = 4 -topology = "cube" -test_case = "swtc2" -ne = 6 -ndays = 5 -statefreq = 2880 -tasknum = 0 -restartfreq = -1 -restartfile = "./restart/R000000050" -runtype = 0 -tstep = 30.0 -integration = "explicit" -smooth = 0.05 -nu = 7.0e5 -/ -&solver_nl -precon_method = "block_jacobi" -maxits = 100 -tol = 1.e-12 -/ -&filter_nl -transfer_type = "bv" -filter_type = "taylor" -filter_freq = 1 -filter_mu = 0.05D0 -p_bv = 12.0D0 -s_bv = .666666666666666666D0 -wght_fm = 0.10D0 -kcut_fm = 2 -/ -&analysis_nl -!=======================================================! -! currently up to 5 streams are allowed ! -! output_stream_count=1 ! -! ! -! timunits: 0= steps, 1=days, 2=hours ! -! output_timeunits=1,2 ! -! output_start_time=0,1176 ! -! output_end_time=-1,-1 ! -! output_frequency=1,1 ! -! output_dir ="./movies/" ! -! ! -! allowed variables: 'ps ','geop ','u ','v ', ! -! 'latp ','lonp ','latv ','lonv ', ! -! 'elem ','Time ' ! -! ! -! output_varnames1-5 ! -!=======================================================! -output_start_time = 0 -output_end_time = -1 -output_frequency = 1 -output_timeunits = 1 -output_varnames1 = 'u', 'v', 'ps', 'geop', 'zeta' -output_type = 'netcdf' -interp_nlon = 720 -interp_nlat = 360 -/ -&dg_nl -riemanntype= 0 -alphatype= 4 -alpha_dg = 0.0D0 -/ - diff --git a/components/homme/test/reg_test/namelists/swtc5-dg.nl b/components/homme/test/reg_test/namelists/swtc5-dg.nl deleted file mode 100644 index 75324e58b89..00000000000 --- a/components/homme/test/reg_test/namelists/swtc5-dg.nl +++ /dev/null @@ -1,69 +0,0 @@ -!=======================================================! -! 1 day = 1 * 24 * 3600 = 86400 sec ! -! nmax = ndays * 86400 / tstep ! -! 12 days at 30.0 stepsize: nmax= 34560 ! -!=======================================================! -&ctl_nl -NThreads = 1 -partmethod = 4 -topology = "cube" -test_case = "swtc5" -ne = 6 -ndays = 15 -statefreq = 2880 -tasknum = 0 -restartfreq = -1 -restartfile = "./restart/R000000050" -runtype = 0 -tstep = 30.0 -integration = "explicit" -smooth = 0.05 -nu = 7.0e5 -/ -&solver_nl -precon_method = "block_jacobi" -maxits = 100 -tol = 1.e-12 -/ -&filter_nl -transfer_type = "bv" -filter_type = "taylor" -filter_freq = 1 -filter_mu = 0.05D0 -p_bv = 12.0D0 -s_bv = .666666666666666666D0 -wght_fm = 0.10D0 -kcut_fm = 2 -/ -&analysis_nl -!=======================================================! -! currently up to 5 streams are allowed ! -! output_stream_count=1 ! -! ! -! timunits: 0= steps, 1=days, 2=hours ! -! output_timeunits=1,2 ! -! output_start_time=0,1176 ! -! output_end_time=-1,-1 ! -! output_frequency=1,1 ! -! output_dir ="./movies/" ! -! ! -! allowed variables: 'ps ','geop ','u ','v ', ! -! 'latp ','lonp ','latv ','lonv ', ! -! 'elem ','Time ' ! -! ! -! output_varnames1-5 ! -!=======================================================! -output_start_time = 0 -output_end_time = -1 -output_frequency = 1 -output_timeunits = 1 -output_varnames1 = 'u', 'v', 'ps', 'geop', 'zeta' -output_type = 'netcdf' -interp_nlon = 720 -interp_nlat = 360 -/ -&dg_nl -riemanntype= 0 -alphatype= 0 -alpha_dg = 0.0D0 -/ diff --git a/components/homme/test/reg_test/run_tests/baro1a.cmake b/components/homme/test/reg_test/run_tests/baro1a.cmake index 89b79b23c36..973a7ceab6e 100644 --- a/components/homme/test/reg_test/run_tests/baro1a.cmake +++ b/components/homme/test/reg_test/run_tests/baro1a.cmake @@ -10,8 +10,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro1a) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroA) diff --git a/components/homme/test/reg_test/run_tests/baro1b.cmake b/components/homme/test/reg_test/run_tests/baro1b.cmake index 42bf345f79a..8c6f9125397 100644 --- a/components/homme/test/reg_test/run_tests/baro1b.cmake +++ b/components/homme/test/reg_test/run_tests/baro1b.cmake @@ -10,8 +10,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro1b) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroB) diff --git a/components/homme/test/reg_test/run_tests/baro2a.cmake b/components/homme/test/reg_test/run_tests/baro2a.cmake index 0eee75a6c69..f7bfbfcce9a 100644 --- a/components/homme/test/reg_test/run_tests/baro2a.cmake +++ b/components/homme/test/reg_test/run_tests/baro2a.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2a) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baro2b.cmake b/components/homme/test/reg_test/run_tests/baro2b.cmake index cfeb4ae313b..cc88c15c1e6 100644 --- a/components/homme/test/reg_test/run_tests/baro2b.cmake +++ b/components/homme/test/reg_test/run_tests/baro2b.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2b) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baro2c.cmake b/components/homme/test/reg_test/run_tests/baro2c.cmake index 82101c0e9af..425da22414f 100644 --- a/components/homme/test/reg_test/run_tests/baro2c.cmake +++ b/components/homme/test/reg_test/run_tests/baro2c.cmake @@ -4,8 +4,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2c) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake index 66881f565d9..f7082997f4d 100644 --- a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake +++ b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake @@ -10,9 +10,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2d-imp) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE prim) - # The specifically compiled executable that this test uses SET(EXEC_NAME primA) diff --git a/components/homme/test/reg_test/run_tests/baro2d.cmake b/components/homme/test/reg_test/run_tests/baro2d.cmake index bfc76e55de7..f51a7955525 100644 --- a/components/homme/test/reg_test/run_tests/baro2d.cmake +++ b/components/homme/test/reg_test/run_tests/baro2d.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2d) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake index 5a240184657..688ff6644e9 100644 --- a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake +++ b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baroCamMoistSL) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroCam) diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake new file mode 100644 index 00000000000..9767cc947a8 --- /dev/null +++ b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake @@ -0,0 +1,51 @@ +############################################################### +# RK + PIO_INTERP +############################################################### +# +# Spectral Element -- 9 days of ASP baroclinic test +# (Jablonowski and Williamson test + 4 tracers) +# NE=15, dt=150, nu=1e16, filter_freq=0, NV=4, PLEV=26 +# (explicit RK with subcycling) +# +############################################################### + +# The name of this test (should be the basename of this file) +SET(TEST_NAME baroCamMoist-acc) + +# The specifically compiled executable that this test uses +SET(EXEC_NAME baroCam-acc) + +SET(NUM_CPUS 16) + +SET(NAMELIST_FILES +${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist.nl +) +SET(VCOORD_FILES ${HOMME_ROOT}/test/vcoord/*26*) +SET(REFSOLN_FILES ${HOMME_ROOT}/test/reg_test/ref_sol/T340ref.nc) + +SET(NC_OUTPUT_FILES + camBaroMoist-asp_baroclinic1.nc + camBaroMoist-asp_baroclinic2.nc +) + + +# OMP tests (disabled for now) +#SET(OMP_SUB_TESTS true) +#SET(OMP_NUM_THREADS 4) +#SET(OMP_NAMELIST_FILES +#${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist-omp4.nl +#) + + +# compare output with CPU-only baroCamMoist test: +SET(TESTCASE_REF_TOL 1E-11) +SET(NC_OUTPUT_REF + ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic1.nc + ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic2.nc +) +SET(NC_OUTPUT_CHECKREF + camBaroMoist-asp_baroclinic1.nc + camBaroMoist-asp_baroclinic2.nc +) + + diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake index 904ee7f6696..3989967caee 100644 --- a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake +++ b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baroCamMoist) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroCam) diff --git a/components/homme/test/reg_test/run_tests/swimtc5.cmake b/components/homme/test/reg_test/run_tests/swimtc5.cmake index a2ba5e84b9f..e5398816556 100644 --- a/components/homme/test/reg_test/run_tests/swimtc5.cmake +++ b/components/homme/test/reg_test/run_tests/swimtc5.cmake @@ -5,8 +5,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swimtc5) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swim) # The specifically compiled executable that this test uses SET(EXEC_NAME swim5) diff --git a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake b/components/homme/test/reg_test/run_tests/swtc1-dg.cmake deleted file mode 100644 index 7dc8bc37c9a..00000000000 --- a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake +++ /dev/null @@ -1,19 +0,0 @@ -############################################################### -# -# Discontinuous Galerkin -- swtc1 -# NE=10, dt=100, nu=0, limiter=0, filter_freq=1, NP=6 -# -############################################################### - -# The name of this test (should be the basename of this file) -SET(TEST_NAME swtc1-dg) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swdgx) -# The specifically compiled executable that this test uses -SET(EXEC_NAME swtc-dgA) - -SET(NUM_CPUS 16) - -SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl) - -SET(NC_OUTPUT_FILES swtc11.nc) diff --git a/components/homme/test/reg_test/run_tests/swtc1.cmake b/components/homme/test/reg_test/run_tests/swtc1.cmake index de170ab52b3..ce3ddaf3590 100644 --- a/components/homme/test/reg_test/run_tests/swtc1.cmake +++ b/components/homme/test/reg_test/run_tests/swtc1.cmake @@ -9,8 +9,7 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc1) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) + # The specifically compiled executable that this test uses SET(EXEC_NAME swtcA) diff --git a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake b/components/homme/test/reg_test/run_tests/swtc2-dg.cmake deleted file mode 100644 index dcbc5b9d6ee..00000000000 --- a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake +++ /dev/null @@ -1,21 +0,0 @@ -############################################################### -# RKSSP default benchmark (used to check nothing is broken) -############################################################### -# -# Discontinous Galerkin swtc2 -# NE=6, dt=30, nu=0, filter_freq=1, NP=6 -# -############################################################### - -# The name of this test (should be the basename of this file) -SET(TEST_NAME swtc2-dg) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swdgx) -# The specifically compiled executable that this test uses -SET(EXEC_NAME swtc-dgA) - -SET(NUM_CPUS 16) - -SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl) - -SET(NC_OUTPUT_FILES swtc21.nc) diff --git a/components/homme/test/reg_test/run_tests/swtc2.cmake b/components/homme/test/reg_test/run_tests/swtc2.cmake index eac2e7bfdd0..1659361a0b9 100644 --- a/components/homme/test/reg_test/run_tests/swtc2.cmake +++ b/components/homme/test/reg_test/run_tests/swtc2.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc2) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcA) diff --git a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake b/components/homme/test/reg_test/run_tests/swtc5-dg.cmake deleted file mode 100644 index 8d482b38d7d..00000000000 --- a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake +++ /dev/null @@ -1,21 +0,0 @@ -############################################################### -# -# Discontinuous Galerkin -- swtc5 -# NE=6, dt=30, nu=7e5, limiter=0, filter_freq=1, NP=6 -# -############################################################### - -# The name of this test (should be the basename of this file) -SET(TEST_NAME swtc5-dg) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swdgx) -# The specifically compiled executable that this test uses -SET(EXEC_NAME swtc-dgA) - -SET(NUM_CPUS 16) - -SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl) -SET(NCL_FILES ${HOMME_ROOT}/test/reg_test/ncl/swtc5ref.ncl) - -SET(NC_OUTPUT_FILES swtc51.nc) - diff --git a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake index 166bd7e3893..16155506e38 100644 --- a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake +++ b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc5-mr) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcB) diff --git a/components/homme/test/reg_test/run_tests/swtc5.cmake b/components/homme/test/reg_test/run_tests/swtc5.cmake index 9407f6c62ec..35de2a7b160 100644 --- a/components/homme/test/reg_test/run_tests/swtc5.cmake +++ b/components/homme/test/reg_test/run_tests/swtc5.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc5) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcB) diff --git a/components/homme/test/reg_test/run_tests/swtc6.cmake b/components/homme/test/reg_test/run_tests/swtc6.cmake index 894eb9b578f..d0a8bef306e 100644 --- a/components/homme/test/reg_test/run_tests/swtc6.cmake +++ b/components/homme/test/reg_test/run_tests/swtc6.cmake @@ -7,8 +7,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc6) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcC) diff --git a/components/homme/test/reg_test/run_tests/templates.cmake b/components/homme/test/reg_test/run_tests/templates.cmake index 2b413d73ccb..d50f27d9de9 100644 --- a/components/homme/test/reg_test/run_tests/templates.cmake +++ b/components/homme/test/reg_test/run_tests/templates.cmake @@ -15,8 +15,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME templates) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroCam) diff --git a/components/homme/test/reg_test/run_tests/test-list.cmake b/components/homme/test/reg_test/run_tests/test-list.cmake index c66831ccebb..12fbf8a3367 100644 --- a/components/homme/test/reg_test/run_tests/test-list.cmake +++ b/components/homme/test/reg_test/run_tests/test-list.cmake @@ -10,6 +10,13 @@ SET(HOMME_TESTS baro2d.cmake baroCamMoist.cmake baroCamMoist-SL.cmake + baroCamMoist-acc.cmake baro2d-imp.cmake templates.cmake ) + + +#set_test_properties( +# baroCamMoist-acc.cmake PROPERTY DEPENDS baroCamMoist.cmake +#) + diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt new file mode 100644 index 00000000000..339c886a75a --- /dev/null +++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt @@ -0,0 +1,5 @@ +# Set the qsize_d in dimensions_mod.F90 +SET(QSIZE_D 25) +# Set the variables for this test executable +# NP NC PLEV USE_PIO WITH_ENERGY +createTestExec(baroCam-acc preqx-acc 4 4 26 TRUE TRUE) From 8bf2fd033d59a145f195d1905bc2cd8c33fbf694 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Mon, 29 Feb 2016 15:54:44 -0500 Subject: [PATCH 08/27] Turning OpenACC on for preqx_acc and off for preqx --- components/homme/src/preqx/CMakeLists.txt | 5 +---- components/homme/src/preqx_acc/CMakeLists.txt | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt index 1a2324962f8..77d8844f80c 100644 --- a/components/homme/src/preqx/CMakeLists.txt +++ b/components/homme/src/preqx/CMakeLists.txt @@ -156,10 +156,7 @@ MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") -IF (${PREQX_USE_OPENACC}) - SET(USE_OPENACC TRUE) - MESSAGE(STATUS " Using OpenACC") -ENDIF () +SET(USE_OPENACC FALSE) IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt index 512be357de8..5f80ab184f5 100644 --- a/components/homme/src/preqx_acc/CMakeLists.txt +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -156,10 +156,7 @@ MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") -IF (${PREQX_USE_OPENACC}) - SET(USE_OPENACC TRUE) - MESSAGE(STATUS " Using OpenACC") -ENDIF () +SET(USE_OPENACC TRUE) IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") From fe3b49589b581a88413b6c82c0a989113c7a0938 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Wed, 2 Mar 2016 12:36:16 -0500 Subject: [PATCH 09/27] Fixing preqx_acc dependencies in HOMME cmake --- .../homme/compile_scripts/titan/compile.cpu | 17 ++++++----------- components/homme/src/preqx_acc/CMakeLists.txt | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/components/homme/compile_scripts/titan/compile.cpu b/components/homme/compile_scripts/titan/compile.cpu index 1a21f327622..2803047f277 100755 --- a/components/homme/compile_scripts/titan/compile.cpu +++ b/components/homme/compile_scripts/titan/compile.cpu @@ -14,32 +14,27 @@ mkdir -p titan-cpu || exit -1 cd titan-cpu if [ $do_cmake -eq 1 ]; then -sed -i "s/ integer, parameter :: qsize_d=.*/ integer, parameter :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1 rm -rf CMakeFiles CMakeCache.txt cmake \ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ -DCMAKE_Fortran_COMPILER=ftn \ -DCMAKE_C_COMPILER=cc \ -DCMAKE_CXX_COMPILER=CC \ - -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ + -DOPT_FLAGS="-O0 -Kieee -Mvect=nosse" \ -DDEBUG_FLAGS=" " \ -DNETCDF_DIR=$NETCDF_DIR \ -DWITH_PNETCDF=FALSE \ -DHDF5_DIR=$HDF5_DIR \ -DPREQX_NP=4 \ -DPREQX_PLEV=$NLEVELS \ - -DBUILD_HOMME_SWDGX=FALSE \ - -DBUILD_HOMME_SWEQX=FALSE \ - -DBUILD_HOMME_PRIMDGX=FALSE \ + -DPREQX_QSIZE_D=$TRACERS \ + -DBUILD_HOMME_SWEQX=TRUE \ + -DBUILD_HOMME_PREQX=TRUE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ - -DENABLE_OPENACC=FALSE \ - -DENABLE_CUDA_FORTRAN=FALSE \ -DHOMME_FIND_BLASLAPACK=TRUE \ -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ - \ - \ - \ -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \ $HOMME_ROOT fi @@ -50,7 +45,7 @@ make clean || exit -1 fi if [ $do_make -eq 1 ]; then -make -j16 preqx || exit -1 +make -j32 || exit -1 mkdir -p $HOMME_ROOT/build/preqx cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.cpu || exit -1 fi diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt index 5f80ab184f5..84c94b211ed 100644 --- a/components/homme/src/preqx_acc/CMakeLists.txt +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -127,7 +127,7 @@ IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) ENDIF () # Make SRCS global so the tests can access it -SET(PREQX_ACC_SRCS ${PREQX_SRCS} ${PREQX_SRCS_C} ${PREQX_SRCS_F90} CACHE INTERNAL "Sources for preqx") +SET(PREQX_ACC_SRCS ${PREQX_ACC_SRCS} ${PREQX_SRCS_C} ${PREQX_SRCS_F90} CACHE INTERNAL "Sources for preqx_acc") # Set up defaults IF (NOT PREQX_NP) From 342bf0166e953f8f6bff051210e27b917cda49f8 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Wed, 2 Mar 2016 13:21:49 -0500 Subject: [PATCH 10/27] CMake testing changes from Mark in target2 --- components/homme/test_execs/CMakeLists.txt | 1 + components/homme/test_execs/baroCam-acc/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/components/homme/test_execs/CMakeLists.txt b/components/homme/test_execs/CMakeLists.txt index e2e6b4abda1..5017b92ceea 100644 --- a/components/homme/test_execs/CMakeLists.txt +++ b/components/homme/test_execs/CMakeLists.txt @@ -167,6 +167,7 @@ IF(${BUILD_HOMME_PREQX}) # ADD_SUBDIRECTORY(baroB) ADD_SUBDIRECTORY(baroC) ADD_SUBDIRECTORY(baroCam) + ADD_SUBDIRECTORY(baroCam-acc) ENDIF() # Add the test exec subdirs for the prim executable diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt index 339c886a75a..c6c9f487d30 100644 --- a/components/homme/test_execs/baroCam-acc/CMakeLists.txt +++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt @@ -2,4 +2,4 @@ SET(QSIZE_D 25) # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY -createTestExec(baroCam-acc preqx-acc 4 4 26 TRUE TRUE) +createTestExec(baroCam-acc preqx_acc 4 4 26 TRUE TRUE) From 47a0ec7d8d196feabe5c3c1cddcb010a1f7ee2d0 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Wed, 2 Mar 2016 14:52:11 -0500 Subject: [PATCH 11/27] changing inheriting of _base modules in HOMME It was requested that everything public in the _base modules be explicitly listed in the inheriting modules of each target. So I have changed preqx to list them all as a template for new solvers to use. I've also made the new modules private so they need to re-declare public everything in the base module and the new solver module inheriting it. --- components/homme/src/preqx/bndry_mod.F90 | 2 +- components/homme/src/preqx/derivative_mod.F90 | 6 ++++- components/homme/src/preqx/edge_mod.F90 | 6 ++++- .../homme/src/preqx/prim_advection_mod.F90 | 3 ++- .../homme/src/preqx/solver_init_mod.F90 | 2 +- components/homme/src/preqx/vertremap_mod.F90 | 2 +- components/homme/src/preqx/viscosity_mod.F90 | 5 +++- components/homme/src/preqx_acc/bndry_mod.F90 | 9 +++++++- .../homme/src/preqx_acc/derivative_mod.F90 | 21 +++++++++++++---- components/homme/src/preqx_acc/edge_mod.F90 | 23 +++++++++++++++---- .../src/preqx_acc/prim_advection_mod.F90 | 18 +++++++++------ .../homme/src/preqx_acc/solver_init_mod.F90 | 5 ++-- .../homme/src/preqx_acc/vertremap_mod.F90 | 5 +++- .../homme/src/preqx_acc/viscosity_mod.F90 | 16 ++++++++++--- components/homme/src/sweqx/bndry_mod.F90 | 2 +- components/homme/src/sweqx/derivative_mod.F90 | 6 ++++- components/homme/src/sweqx/edge_mod.F90 | 6 ++++- .../homme/src/sweqx/solver_init_mod.F90 | 2 +- components/homme/src/sweqx/viscosity_mod.F90 | 5 +++- 19 files changed, 109 insertions(+), 35 deletions(-) diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90 index 2ef89f27f70..5ad1125cbef 100644 --- a/components/homme/src/preqx/bndry_mod.F90 +++ b/components/homme/src/preqx/bndry_mod.F90 @@ -3,6 +3,6 @@ #endif module bndry_mod - use bndry_mod_base + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping implicit none end module bndry_mod diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90 index 461fe9c8323..e9c3f81be3f 100644 --- a/components/homme/src/preqx/derivative_mod.F90 +++ b/components/homme/src/preqx/derivative_mod.F90 @@ -3,6 +3,10 @@ #endif module derivative_mod - use derivative_mod_base + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere implicit none end module derivative_mod diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90 index c7f39fe20f6..902503b36cb 100644 --- a/components/homme/src/preqx/edge_mod.F90 +++ b/components/homme/src/preqx/edge_mod.F90 @@ -3,6 +3,10 @@ #endif module edge_mod - use edge_mod_base + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax implicit none end module edge_mod diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90 index 948ec0ad3fc..a3e235c41aa 100644 --- a/components/homme/src/preqx/prim_advection_mod.F90 +++ b/components/homme/src/preqx/prim_advection_mod.F90 @@ -3,6 +3,7 @@ #endif module prim_advection_mod - use prim_advection_mod_base + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap, Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, & + prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 implicit none end module prim_advection_mod diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90 index 224928eeb25..90efb25eadf 100644 --- a/components/homme/src/preqx/solver_init_mod.F90 +++ b/components/homme/src/preqx/solver_init_mod.F90 @@ -3,6 +3,6 @@ #endif module solver_init_mod - use solver_init_mod_base + use solver_init_mod_base, only: solver_init2 implicit none end module solver_init_mod diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90 index b9e537a6dcf..63617bf66e6 100644 --- a/components/homme/src/preqx/vertremap_mod.F90 +++ b/components/homme/src/preqx/vertremap_mod.F90 @@ -3,6 +3,6 @@ #endif module vertremap_mod - use vertremap_mod_base + use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm implicit none end module vertremap_mod diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90 index 80e849ae94c..f78ae69a8f3 100644 --- a/components/homme/src/preqx/viscosity_mod.F90 +++ b/components/homme/src/preqx/viscosity_mod.F90 @@ -3,6 +3,9 @@ #endif module viscosity_mod - use viscosity_mod_base + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif implicit none end module viscosity_mod diff --git a/components/homme/src/preqx_acc/bndry_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90 index b0d86e73ec2..d4a83d342e1 100644 --- a/components/homme/src/preqx_acc/bndry_mod.F90 +++ b/components/homme/src/preqx_acc/bndry_mod.F90 @@ -4,12 +4,13 @@ #endif module bndry_mod - use bndry_mod_base + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping use parallel_mod, only : syncmp,parallel_t,abortmp,iam use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads use kinds, only: real_kind implicit none + private integer, parameter, private :: maxCycles = 20 integer, parameter, private :: maxChunks = 64 real(kind=real_kind), parameter, private :: chunk_denom = 1.e5 @@ -29,6 +30,12 @@ module bndry_mod type(send_stager_t), private :: stg_send(maxCycles) type(recv_stager_t), private :: stg_recv(maxCycles) + public :: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + public :: bndry_exchangeS_simple_overlap + public :: bndry_exchangeV_timing + public :: bndry_exchangeV_simple_overlap + public :: bndry_exchangeV_finer_overlap + contains subroutine bndry_exchangeS_simple_overlap(hybrid,buffer) diff --git a/components/homme/src/preqx_acc/derivative_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90 index 3833b48bfc8..97dea44734d 100644 --- a/components/homme/src/preqx_acc/derivative_mod.F90 +++ b/components/homme/src/preqx_acc/derivative_mod.F90 @@ -4,11 +4,11 @@ #endif module derivative_mod - use derivative_mod_base - use derivative_mod_base, only: laplace_sphere_wk_base => laplace_sphere_wk, & - divergence_sphere_wk_base => divergence_sphere_wk, & - gradient_sphere_base => gradient_sphere, & - divergence_sphere_base => divergence_sphere + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere use kinds, only : real_kind, longdouble_kind use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi @@ -18,6 +18,17 @@ module derivative_mod use element_mod, only : element_t use control_mod, only : hypervis_scaling, hypervis_power implicit none + private + + public :: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + public :: laplace_sphere_wk_openacc + public :: divergence_sphere_wk_openacc + public :: gradient_sphere_openacc + public :: divergence_sphere_openacc contains diff --git a/components/homme/src/preqx_acc/edge_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90 index 43d054b0ae7..dfb42939114 100644 --- a/components/homme/src/preqx_acc/edge_mod.F90 +++ b/components/homme/src/preqx_acc/edge_mod.F90 @@ -4,10 +4,11 @@ #endif module edge_mod - use edge_mod_base - use edge_mod_base, only: edgeSpack_base => edgeSpack, & - edgeSunpackMin_base => edgeSunpackMin, & - edgeSunpackMax_base => edgeSunpackMax + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax use kinds, only : int_kind, log_kind, real_kind use dimensions_mod, only : max_neigh_edges, nelemd, np use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL @@ -20,6 +21,20 @@ module edge_mod Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid use element_mod, only : element_t implicit none + private + + public :: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + public :: edgeSpack_openacc + public :: edgeSunpackMin_openacc + public :: edgeSunpackMax_openacc + public :: edgeVpack_openacc + public :: edgeVunpack_openacc + public :: edgeVunpackMin_openacc + public :: edgeVunpackMax_openacc contains diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index 9bbf4b5831e..d645ee16502 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -7,13 +7,8 @@ #endif module prim_advection_mod - use prim_advection_mod_base - use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_base => Prim_Advec_Tracers_remap, & - prim_advec_init1_base => prim_advec_init1, & - prim_advec_init2_base => prim_advec_init2, & - prim_advec_init_deriv_base => prim_advec_init_deriv, & - deriv_base => deriv, & - Prim_Advec_Tracers_remap_rk2_base => Prim_Advec_Tracers_remap_rk2 + !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap use kinds, only : real_kind use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp @@ -38,6 +33,7 @@ module prim_advection_mod use parallel_mod, only : abortmp use derivative_mod, only: derivative_t implicit none + private type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads) real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:) real(kind=real_kind), private, allocatable :: dp0(:) @@ -53,6 +49,14 @@ module prim_advection_mod real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:) logical, private :: first_time = .true. + public :: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap + public :: Prim_Advec_Tracers_remap + public :: prim_advec_init1 + public :: prim_advec_init2 + public :: prim_advec_init_deriv + public :: deriv + public :: Prim_Advec_Tracers_remap_rk2 + contains subroutine copy_qdp1_h2d( elem , tl , nets , nete ) diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90 index 67ad782bacf..0b2027af701 100644 --- a/components/homme/src/preqx_acc/solver_init_mod.F90 +++ b/components/homme/src/preqx_acc/solver_init_mod.F90 @@ -3,10 +3,11 @@ #endif module solver_init_mod - use solver_init_mod_base - use solver_init_mod_base, only: solver_init2_base => solver_init2 + !OVERWRITING: solver_init2 + use solver_init_mod_base, only: use dimensions_mod, only: nelemd implicit none + private public :: solver_init2 diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90 index b9e537a6dcf..4c3471721bb 100644 --- a/components/homme/src/preqx_acc/vertremap_mod.F90 +++ b/components/homme/src/preqx_acc/vertremap_mod.F90 @@ -3,6 +3,9 @@ #endif module vertremap_mod - use vertremap_mod_base + use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm implicit none + private + + public :: remap1, remap1_nofilter, remap_q_ppm end module vertremap_mod diff --git a/components/homme/src/preqx_acc/viscosity_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90 index 2735430e907..4bc0c10960f 100644 --- a/components/homme/src/preqx_acc/viscosity_mod.F90 +++ b/components/homme/src/preqx_acc/viscosity_mod.F90 @@ -4,9 +4,10 @@ #endif module viscosity_mod - use viscosity_mod_base -! use viscosity_mod_base, only: neighbor_minmax_base => neighbor_minmax, & -! biharmonic_wk_scalar_base => biharmonic_wk_scalar + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif use thread_mod, only : omp_get_num_threads use kinds, only : real_kind, iulog use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac @@ -18,6 +19,15 @@ module viscosity_mod use control_mod, only : hypervis_scaling, nu, nu_div use perf_mod, only: t_startf, t_stopf implicit none + private + + public :: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 +#ifdef _PRIM + public :: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif + public :: biharmonic_wk_scalar_openacc + public :: neighbor_minmax_openacc + contains diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90 index 2ef89f27f70..5ad1125cbef 100644 --- a/components/homme/src/sweqx/bndry_mod.F90 +++ b/components/homme/src/sweqx/bndry_mod.F90 @@ -3,6 +3,6 @@ #endif module bndry_mod - use bndry_mod_base + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping implicit none end module bndry_mod diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90 index 461fe9c8323..e9c3f81be3f 100644 --- a/components/homme/src/sweqx/derivative_mod.F90 +++ b/components/homme/src/sweqx/derivative_mod.F90 @@ -3,6 +3,10 @@ #endif module derivative_mod - use derivative_mod_base + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere implicit none end module derivative_mod diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90 index c7f39fe20f6..902503b36cb 100644 --- a/components/homme/src/sweqx/edge_mod.F90 +++ b/components/homme/src/sweqx/edge_mod.F90 @@ -3,6 +3,10 @@ #endif module edge_mod - use edge_mod_base + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax implicit none end module edge_mod diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90 index 224928eeb25..90efb25eadf 100644 --- a/components/homme/src/sweqx/solver_init_mod.F90 +++ b/components/homme/src/sweqx/solver_init_mod.F90 @@ -3,6 +3,6 @@ #endif module solver_init_mod - use solver_init_mod_base + use solver_init_mod_base, only: solver_init2 implicit none end module solver_init_mod diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90 index 80e849ae94c..f78ae69a8f3 100644 --- a/components/homme/src/sweqx/viscosity_mod.F90 +++ b/components/homme/src/sweqx/viscosity_mod.F90 @@ -3,6 +3,9 @@ #endif module viscosity_mod - use viscosity_mod_base + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif implicit none end module viscosity_mod From fc2f42ff88b72c5a4386caf227887aff7c070d9f Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Wed, 2 Mar 2016 15:00:12 -0500 Subject: [PATCH 12/27] Rearranging dummy params for Intel Intel needs integer array size params to be declared before the arrays using them. It's an intel bug, but this is the workaround. --- components/homme/src/preqx_acc/bndry_mod.F90 | 4 ++-- components/homme/src/preqx_acc/edge_mod.F90 | 14 +++++++------- .../homme/src/preqx_acc/prim_advection_mod.F90 | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/components/homme/src/preqx_acc/bndry_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90 index d4a83d342e1..75fd0ddd909 100644 --- a/components/homme/src/preqx_acc/bndry_mod.F90 +++ b/components/homme/src/preqx_acc/bndry_mod.F90 @@ -518,8 +518,8 @@ function mpi_isend_openacc_stage(buf, count, dest, tag_root, comm, ierror, nchun use parallel_mod , only: mpireal_t use openacc_utils_mod, only: update_host_async, acc_async_test_wrap implicit none - real(kind=real_kind), intent(in ) :: buf(count) !buffer from which to send data integer , intent(in ) :: count !number of elements in buffer + real(kind=real_kind), intent(in ) :: buf(count) !buffer from which to send data integer , intent(in ) :: dest !the MPI rank I'm sending data to integer , intent(in ) :: tag_root !tag of the original send (I'll alter this for internal mpi_isend calls) integer , intent(in ) :: comm !Communicator to use @@ -580,8 +580,8 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch use parallel_mod , only: mpireal_t use openacc_utils_mod, only: update_device_async, copy_ondev_async implicit none - real(kind=real_kind), intent(in ) :: buf(count) !buffer in which to receive data integer , intent(in ) :: count !number of elements in buffer + real(kind=real_kind), intent(in ) :: buf(count) !buffer in which to receive data integer , intent(in ) :: source !the MPI rank I'm receiving data from integer , intent(in ) :: tag_root !tag of the original send (I'll alter this for internal mpi_isend calls) integer , intent(in ) :: comm !Communicator to use diff --git a/components/homme/src/preqx_acc/edge_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90 index dfb42939114..645fe05a66a 100644 --- a/components/homme/src/preqx_acc/edge_mod.F90 +++ b/components/homme/src/preqx_acc/edge_mod.F90 @@ -48,10 +48,10 @@ subroutine edgeSpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) ,intent(inout) :: edge integer ,intent(in ) :: vlyr - real (kind=real_kind) ,intent(in ) :: v(vlyr,tdim,nelemd) integer ,intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real (kind=real_kind) ,intent(in ) :: v(vlyr,tdim,nelemd) ! Local variables integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 64 @@ -95,10 +95,10 @@ subroutine edgeSunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 64 @@ -154,10 +154,10 @@ subroutine edgeSunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 64 @@ -214,10 +214,10 @@ subroutine edgeVpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) ,intent(inout) :: edge integer ,intent(in ) :: vlyr - real (kind=real_kind) ,intent(in ) :: v(np,np,vlyr,tdim,nelemd) integer ,intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real (kind=real_kind) ,intent(in ) :: v(np,np,vlyr,tdim,nelemd) ! Local variables integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 32 @@ -276,10 +276,10 @@ subroutine edgeVunpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j integer, parameter :: kchunk = 32 @@ -353,10 +353,10 @@ subroutine edgeVunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j integer, parameter :: kchunk = 32 @@ -430,10 +430,10 @@ subroutine edgeVunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j integer, parameter :: kchunk = 32 diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index d645ee16502..7af261cd0a2 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -825,9 +825,9 @@ subroutine limiter2d_zero(Qdp,tdim,tl) ! ps is only used when advecting Q instead of Qdp ! so ps should be at one timelevel behind Q implicit none - real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd) integer , intent(in ) :: tdim integer , intent(in ) :: tl + real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd) ! local real (kind=real_kind) :: mass,mass_new real (kind=real_kind) :: qtmp(np,np) From af7e4a1ca0d93582270d38645409b01dcc1f49ea Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 08:19:18 -0500 Subject: [PATCH 13/27] Revert "changing inheriting of _base modules in HOMME" This reverts commit 47a0ec7d8d196feabe5c3c1cddcb010a1f7ee2d0. --- components/homme/src/preqx/bndry_mod.F90 | 2 +- components/homme/src/preqx/derivative_mod.F90 | 6 +---- components/homme/src/preqx/edge_mod.F90 | 6 +---- .../homme/src/preqx/prim_advection_mod.F90 | 3 +-- .../homme/src/preqx/solver_init_mod.F90 | 2 +- components/homme/src/preqx/vertremap_mod.F90 | 2 +- components/homme/src/preqx/viscosity_mod.F90 | 5 +--- components/homme/src/preqx_acc/bndry_mod.F90 | 9 +------- .../homme/src/preqx_acc/derivative_mod.F90 | 21 ++++------------- components/homme/src/preqx_acc/edge_mod.F90 | 23 ++++--------------- .../src/preqx_acc/prim_advection_mod.F90 | 18 ++++++--------- .../homme/src/preqx_acc/solver_init_mod.F90 | 5 ++-- .../homme/src/preqx_acc/vertremap_mod.F90 | 5 +--- .../homme/src/preqx_acc/viscosity_mod.F90 | 16 +++---------- components/homme/src/sweqx/bndry_mod.F90 | 2 +- components/homme/src/sweqx/derivative_mod.F90 | 6 +---- components/homme/src/sweqx/edge_mod.F90 | 6 +---- .../homme/src/sweqx/solver_init_mod.F90 | 2 +- components/homme/src/sweqx/viscosity_mod.F90 | 5 +--- 19 files changed, 35 insertions(+), 109 deletions(-) diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90 index 5ad1125cbef..2ef89f27f70 100644 --- a/components/homme/src/preqx/bndry_mod.F90 +++ b/components/homme/src/preqx/bndry_mod.F90 @@ -3,6 +3,6 @@ #endif module bndry_mod - use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + use bndry_mod_base implicit none end module bndry_mod diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90 index e9c3f81be3f..461fe9c8323 100644 --- a/components/homme/src/preqx/derivative_mod.F90 +++ b/components/homme/src/preqx/derivative_mod.F90 @@ -3,10 +3,6 @@ #endif module derivative_mod - use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & - derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & - remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & - curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & - laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + use derivative_mod_base implicit none end module derivative_mod diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90 index 902503b36cb..c7f39fe20f6 100644 --- a/components/homme/src/preqx/edge_mod.F90 +++ b/components/homme/src/preqx/edge_mod.F90 @@ -3,10 +3,6 @@ #endif module edge_mod - use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & - edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & - ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & - ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & - ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + use edge_mod_base implicit none end module edge_mod diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90 index a3e235c41aa..948ec0ad3fc 100644 --- a/components/homme/src/preqx/prim_advection_mod.F90 +++ b/components/homme/src/preqx/prim_advection_mod.F90 @@ -3,7 +3,6 @@ #endif module prim_advection_mod - use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap, Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, & - prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 + use prim_advection_mod_base implicit none end module prim_advection_mod diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90 index 90efb25eadf..224928eeb25 100644 --- a/components/homme/src/preqx/solver_init_mod.F90 +++ b/components/homme/src/preqx/solver_init_mod.F90 @@ -3,6 +3,6 @@ #endif module solver_init_mod - use solver_init_mod_base, only: solver_init2 + use solver_init_mod_base implicit none end module solver_init_mod diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90 index 63617bf66e6..b9e537a6dcf 100644 --- a/components/homme/src/preqx/vertremap_mod.F90 +++ b/components/homme/src/preqx/vertremap_mod.F90 @@ -3,6 +3,6 @@ #endif module vertremap_mod - use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm + use vertremap_mod_base implicit none end module vertremap_mod diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90 index f78ae69a8f3..80e849ae94c 100644 --- a/components/homme/src/preqx/viscosity_mod.F90 +++ b/components/homme/src/preqx/viscosity_mod.F90 @@ -3,9 +3,6 @@ #endif module viscosity_mod - use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax -#ifdef _PRIM - use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d -#endif + use viscosity_mod_base implicit none end module viscosity_mod diff --git a/components/homme/src/preqx_acc/bndry_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90 index 75fd0ddd909..751a4143c4b 100644 --- a/components/homme/src/preqx_acc/bndry_mod.F90 +++ b/components/homme/src/preqx_acc/bndry_mod.F90 @@ -4,13 +4,12 @@ #endif module bndry_mod - use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + use bndry_mod_base use parallel_mod, only : syncmp,parallel_t,abortmp,iam use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads use kinds, only: real_kind implicit none - private integer, parameter, private :: maxCycles = 20 integer, parameter, private :: maxChunks = 64 real(kind=real_kind), parameter, private :: chunk_denom = 1.e5 @@ -30,12 +29,6 @@ module bndry_mod type(send_stager_t), private :: stg_send(maxCycles) type(recv_stager_t), private :: stg_recv(maxCycles) - public :: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping - public :: bndry_exchangeS_simple_overlap - public :: bndry_exchangeV_timing - public :: bndry_exchangeV_simple_overlap - public :: bndry_exchangeV_finer_overlap - contains subroutine bndry_exchangeS_simple_overlap(hybrid,buffer) diff --git a/components/homme/src/preqx_acc/derivative_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90 index 97dea44734d..3833b48bfc8 100644 --- a/components/homme/src/preqx_acc/derivative_mod.F90 +++ b/components/homme/src/preqx_acc/derivative_mod.F90 @@ -4,11 +4,11 @@ #endif module derivative_mod - use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & - derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & - remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & - curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & - laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + use derivative_mod_base + use derivative_mod_base, only: laplace_sphere_wk_base => laplace_sphere_wk, & + divergence_sphere_wk_base => divergence_sphere_wk, & + gradient_sphere_base => gradient_sphere, & + divergence_sphere_base => divergence_sphere use kinds, only : real_kind, longdouble_kind use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi @@ -18,17 +18,6 @@ module derivative_mod use element_mod, only : element_t use control_mod, only : hypervis_scaling, hypervis_power implicit none - private - - public :: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & - derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & - remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & - curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & - laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere - public :: laplace_sphere_wk_openacc - public :: divergence_sphere_wk_openacc - public :: gradient_sphere_openacc - public :: divergence_sphere_openacc contains diff --git a/components/homme/src/preqx_acc/edge_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90 index 645fe05a66a..06af6cd38a2 100644 --- a/components/homme/src/preqx_acc/edge_mod.F90 +++ b/components/homme/src/preqx_acc/edge_mod.F90 @@ -4,11 +4,10 @@ #endif module edge_mod - use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & - edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & - ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & - ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & - ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + use edge_mod_base + use edge_mod_base, only: edgeSpack_base => edgeSpack, & + edgeSunpackMin_base => edgeSunpackMin, & + edgeSunpackMax_base => edgeSunpackMax use kinds, only : int_kind, log_kind, real_kind use dimensions_mod, only : max_neigh_edges, nelemd, np use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL @@ -21,20 +20,6 @@ module edge_mod Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid use element_mod, only : element_t implicit none - private - - public :: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & - edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & - ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & - ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & - ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax - public :: edgeSpack_openacc - public :: edgeSunpackMin_openacc - public :: edgeSunpackMax_openacc - public :: edgeVpack_openacc - public :: edgeVunpack_openacc - public :: edgeVunpackMin_openacc - public :: edgeVunpackMax_openacc contains diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index 7af261cd0a2..4eacbf04ff4 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -7,8 +7,13 @@ #endif module prim_advection_mod - !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 - use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap + use prim_advection_mod_base + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_base => Prim_Advec_Tracers_remap, & + prim_advec_init1_base => prim_advec_init1, & + prim_advec_init2_base => prim_advec_init2, & + prim_advec_init_deriv_base => prim_advec_init_deriv, & + deriv_base => deriv, & + Prim_Advec_Tracers_remap_rk2_base => Prim_Advec_Tracers_remap_rk2 use kinds, only : real_kind use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp @@ -33,7 +38,6 @@ module prim_advection_mod use parallel_mod, only : abortmp use derivative_mod, only: derivative_t implicit none - private type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads) real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:) real(kind=real_kind), private, allocatable :: dp0(:) @@ -49,14 +53,6 @@ module prim_advection_mod real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:) logical, private :: first_time = .true. - public :: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap - public :: Prim_Advec_Tracers_remap - public :: prim_advec_init1 - public :: prim_advec_init2 - public :: prim_advec_init_deriv - public :: deriv - public :: Prim_Advec_Tracers_remap_rk2 - contains subroutine copy_qdp1_h2d( elem , tl , nets , nete ) diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90 index 0b2027af701..67ad782bacf 100644 --- a/components/homme/src/preqx_acc/solver_init_mod.F90 +++ b/components/homme/src/preqx_acc/solver_init_mod.F90 @@ -3,11 +3,10 @@ #endif module solver_init_mod - !OVERWRITING: solver_init2 - use solver_init_mod_base, only: + use solver_init_mod_base + use solver_init_mod_base, only: solver_init2_base => solver_init2 use dimensions_mod, only: nelemd implicit none - private public :: solver_init2 diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90 index 4c3471721bb..b9e537a6dcf 100644 --- a/components/homme/src/preqx_acc/vertremap_mod.F90 +++ b/components/homme/src/preqx_acc/vertremap_mod.F90 @@ -3,9 +3,6 @@ #endif module vertremap_mod - use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm + use vertremap_mod_base implicit none - private - - public :: remap1, remap1_nofilter, remap_q_ppm end module vertremap_mod diff --git a/components/homme/src/preqx_acc/viscosity_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90 index 4bc0c10960f..2735430e907 100644 --- a/components/homme/src/preqx_acc/viscosity_mod.F90 +++ b/components/homme/src/preqx_acc/viscosity_mod.F90 @@ -4,10 +4,9 @@ #endif module viscosity_mod - use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 -#ifdef _PRIM - use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d -#endif + use viscosity_mod_base +! use viscosity_mod_base, only: neighbor_minmax_base => neighbor_minmax, & +! biharmonic_wk_scalar_base => biharmonic_wk_scalar use thread_mod, only : omp_get_num_threads use kinds, only : real_kind, iulog use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac @@ -19,15 +18,6 @@ module viscosity_mod use control_mod, only : hypervis_scaling, nu, nu_div use perf_mod, only: t_startf, t_stopf implicit none - private - - public :: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 -#ifdef _PRIM - public :: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d -#endif - public :: biharmonic_wk_scalar_openacc - public :: neighbor_minmax_openacc - contains diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90 index 5ad1125cbef..2ef89f27f70 100644 --- a/components/homme/src/sweqx/bndry_mod.F90 +++ b/components/homme/src/sweqx/bndry_mod.F90 @@ -3,6 +3,6 @@ #endif module bndry_mod - use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + use bndry_mod_base implicit none end module bndry_mod diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90 index e9c3f81be3f..461fe9c8323 100644 --- a/components/homme/src/sweqx/derivative_mod.F90 +++ b/components/homme/src/sweqx/derivative_mod.F90 @@ -3,10 +3,6 @@ #endif module derivative_mod - use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & - derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & - remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & - curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & - laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + use derivative_mod_base implicit none end module derivative_mod diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90 index 902503b36cb..c7f39fe20f6 100644 --- a/components/homme/src/sweqx/edge_mod.F90 +++ b/components/homme/src/sweqx/edge_mod.F90 @@ -3,10 +3,6 @@ #endif module edge_mod - use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & - edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & - ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & - ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & - ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + use edge_mod_base implicit none end module edge_mod diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90 index 90efb25eadf..224928eeb25 100644 --- a/components/homme/src/sweqx/solver_init_mod.F90 +++ b/components/homme/src/sweqx/solver_init_mod.F90 @@ -3,6 +3,6 @@ #endif module solver_init_mod - use solver_init_mod_base, only: solver_init2 + use solver_init_mod_base implicit none end module solver_init_mod diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90 index f78ae69a8f3..80e849ae94c 100644 --- a/components/homme/src/sweqx/viscosity_mod.F90 +++ b/components/homme/src/sweqx/viscosity_mod.F90 @@ -3,9 +3,6 @@ #endif module viscosity_mod - use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax -#ifdef _PRIM - use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d -#endif + use viscosity_mod_base implicit none end module viscosity_mod From 5789ef7c5d2345a51663445db7442670074c0f52 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 08:53:01 -0500 Subject: [PATCH 14/27] Removing CUDA FORTRAN from cmake compiler flags --- components/homme/cmake/SetCompilerFlags.cmake | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/components/homme/cmake/SetCompilerFlags.cmake b/components/homme/cmake/SetCompilerFlags.cmake index 54c2d46058d..1d42d29c2cf 100644 --- a/components/homme/cmake/SetCompilerFlags.cmake +++ b/components/homme/cmake/SetCompilerFlags.cmake @@ -225,48 +225,6 @@ IF (${ENABLE_OPENACC}) SET(PREQX_USE_OPENACC TRUE) ENDIF () -############################################################################## -# CUDA Fortran specific flags - only supporting PGI compiler -############################################################################## -OPTION(ENABLE_CUDA_FORTRAN "Whether to build with CUDA Fortran support" FALSE) -IF (${ENABLE_CUDA_FORTRAN}) - IF (NOT ${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI) - MESSAGE(FATAL_ERROR "CUDA Fortran only supported through the PGI compiler") - ELSE () - # Set PGI CUDA Fortran flags - - # Set defaults as lowest version of CUDA and device capability allowed - # To do: determine a way to generalize this - IF (NOT CUDA_VERSION) - SET(CUDA_VERSION "4.1") - ENDIF () - - # Compute capability: cc2x is for devices with compute capability >= 2.0 - IF (NOT CUDA_DEVICE_CAPABILITY) - SET(CUDA_DEVICE_CAPABILITY "cc2x") - ENDIF () - - SET(CMAKE_Fortran_FLAGS - "${CMAKE_Fortran_FLAGS} -ta=nvidia -Mcuda=${CUDA_VERSION},${CUDA_DEVICE_CAPABILITY},ptxinfo,keepgpu") - - MESSAGE(STATUS "Testing PGI CUDA Fortran Compilation with flags: ${CMAKE_Fortran_FLAGS}") - - TRY_COMPILE(CUDAFOR - ${CMAKE_BINARY_DIR}/tests/compilerTests/ - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/cudaFortranTest.f90 - OUTPUT_VARIABLE COMPILE_OUTPUT) - IF (${CUDAFOR}) - SET(PREQX_USE_CUDA_FORTRAN TRUE) - MESSAGE(STATUS "Succeeded. Using CUDA Fortran through PGI compiler") - ELSE() - SET(PREQX_USE_CUDA_FORTRAN FALSE) - MESSAGE(FATAL_ERROR "Unable to use CUDA Fortran through the PGI " - "compiler. Compilation failed with the following " - "output.\n${COMPILE_OUTPUT}") - ENDIF() - ENDIF() -ENDIF () - ############################################################################## # Intel Phi (MIC) specific flags - only supporting the Intel compiler ############################################################################## From 84cd106e4aacb83bd1be728d68bcb21aaedbd353 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 09:46:50 -0500 Subject: [PATCH 15/27] Fixing baroCam-acc USE_OPENACC had to be set in baroCam-acc to fix the element structure. Evidently setting it in preqx_acc does not set it for every executable that uses preqx_acc --- components/homme/test_execs/baroCam-acc/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt index c6c9f487d30..529c5db680b 100644 --- a/components/homme/test_execs/baroCam-acc/CMakeLists.txt +++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt @@ -1,5 +1,6 @@ # Set the qsize_d in dimensions_mod.F90 SET(QSIZE_D 25) +SET(USE_OPENACC TRUE) # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroCam-acc preqx_acc 4 4 26 TRUE TRUE) From abee170bf9dd199723208b9223487ba244019344 Mon Sep 17 00:00:00 2001 From: Mark Taylor Date: Thu, 3 Mar 2016 08:05:35 -0700 Subject: [PATCH 16/27] Edgebuffer_t is in edgetype_mod.F90, not edge_mod.F90 --- components/homme/src/preqx_acc/prim_advection_mod.F90 | 1 - 1 file changed, 1 deletion(-) diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index 4eacbf04ff4..a2a14c90cd2 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -343,7 +343,6 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , use hybrid_mod , only: hybrid_t use element_mod , only: element_t, derived_divdp_proj, state_qdp use derivative_mod , only: derivative_t - use edge_mod , only: EdgeBuffer_t use perf_mod , only: t_startf, t_stopf ! _EXTERNAL use hybvcoord_mod , only: hvcoord_t use control_mod , only: nu_q, hypervis_order, hypervis_subcycle_q, nu_p From 95f07bb820813b4e6bdacfcf96c4753d8f7cc80c Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 12:27:37 -0500 Subject: [PATCH 17/27] Revert "Revert "changing inheriting of _base modules in HOMME"" This reverts commit af7e4a1ca0d93582270d38645409b01dcc1f49ea. --- components/homme/src/preqx/bndry_mod.F90 | 2 +- components/homme/src/preqx/derivative_mod.F90 | 6 ++++- components/homme/src/preqx/edge_mod.F90 | 6 ++++- .../homme/src/preqx/prim_advection_mod.F90 | 3 ++- .../homme/src/preqx/solver_init_mod.F90 | 2 +- components/homme/src/preqx/vertremap_mod.F90 | 2 +- components/homme/src/preqx/viscosity_mod.F90 | 5 +++- components/homme/src/preqx_acc/bndry_mod.F90 | 9 +++++++- .../homme/src/preqx_acc/derivative_mod.F90 | 21 +++++++++++++---- components/homme/src/preqx_acc/edge_mod.F90 | 23 +++++++++++++++---- .../src/preqx_acc/prim_advection_mod.F90 | 18 +++++++++------ .../homme/src/preqx_acc/solver_init_mod.F90 | 5 ++-- .../homme/src/preqx_acc/vertremap_mod.F90 | 5 +++- .../homme/src/preqx_acc/viscosity_mod.F90 | 16 ++++++++++--- components/homme/src/sweqx/bndry_mod.F90 | 2 +- components/homme/src/sweqx/derivative_mod.F90 | 6 ++++- components/homme/src/sweqx/edge_mod.F90 | 6 ++++- .../homme/src/sweqx/solver_init_mod.F90 | 2 +- components/homme/src/sweqx/viscosity_mod.F90 | 5 +++- 19 files changed, 109 insertions(+), 35 deletions(-) diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90 index 2ef89f27f70..5ad1125cbef 100644 --- a/components/homme/src/preqx/bndry_mod.F90 +++ b/components/homme/src/preqx/bndry_mod.F90 @@ -3,6 +3,6 @@ #endif module bndry_mod - use bndry_mod_base + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping implicit none end module bndry_mod diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90 index 461fe9c8323..e9c3f81be3f 100644 --- a/components/homme/src/preqx/derivative_mod.F90 +++ b/components/homme/src/preqx/derivative_mod.F90 @@ -3,6 +3,10 @@ #endif module derivative_mod - use derivative_mod_base + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere implicit none end module derivative_mod diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90 index c7f39fe20f6..902503b36cb 100644 --- a/components/homme/src/preqx/edge_mod.F90 +++ b/components/homme/src/preqx/edge_mod.F90 @@ -3,6 +3,10 @@ #endif module edge_mod - use edge_mod_base + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax implicit none end module edge_mod diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90 index 948ec0ad3fc..a3e235c41aa 100644 --- a/components/homme/src/preqx/prim_advection_mod.F90 +++ b/components/homme/src/preqx/prim_advection_mod.F90 @@ -3,6 +3,7 @@ #endif module prim_advection_mod - use prim_advection_mod_base + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap, Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, & + prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 implicit none end module prim_advection_mod diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90 index 224928eeb25..90efb25eadf 100644 --- a/components/homme/src/preqx/solver_init_mod.F90 +++ b/components/homme/src/preqx/solver_init_mod.F90 @@ -3,6 +3,6 @@ #endif module solver_init_mod - use solver_init_mod_base + use solver_init_mod_base, only: solver_init2 implicit none end module solver_init_mod diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90 index b9e537a6dcf..63617bf66e6 100644 --- a/components/homme/src/preqx/vertremap_mod.F90 +++ b/components/homme/src/preqx/vertremap_mod.F90 @@ -3,6 +3,6 @@ #endif module vertremap_mod - use vertremap_mod_base + use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm implicit none end module vertremap_mod diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90 index 80e849ae94c..f78ae69a8f3 100644 --- a/components/homme/src/preqx/viscosity_mod.F90 +++ b/components/homme/src/preqx/viscosity_mod.F90 @@ -3,6 +3,9 @@ #endif module viscosity_mod - use viscosity_mod_base + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif implicit none end module viscosity_mod diff --git a/components/homme/src/preqx_acc/bndry_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90 index 751a4143c4b..75fd0ddd909 100644 --- a/components/homme/src/preqx_acc/bndry_mod.F90 +++ b/components/homme/src/preqx_acc/bndry_mod.F90 @@ -4,12 +4,13 @@ #endif module bndry_mod - use bndry_mod_base + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping use parallel_mod, only : syncmp,parallel_t,abortmp,iam use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads use kinds, only: real_kind implicit none + private integer, parameter, private :: maxCycles = 20 integer, parameter, private :: maxChunks = 64 real(kind=real_kind), parameter, private :: chunk_denom = 1.e5 @@ -29,6 +30,12 @@ module bndry_mod type(send_stager_t), private :: stg_send(maxCycles) type(recv_stager_t), private :: stg_recv(maxCycles) + public :: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + public :: bndry_exchangeS_simple_overlap + public :: bndry_exchangeV_timing + public :: bndry_exchangeV_simple_overlap + public :: bndry_exchangeV_finer_overlap + contains subroutine bndry_exchangeS_simple_overlap(hybrid,buffer) diff --git a/components/homme/src/preqx_acc/derivative_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90 index 3833b48bfc8..97dea44734d 100644 --- a/components/homme/src/preqx_acc/derivative_mod.F90 +++ b/components/homme/src/preqx_acc/derivative_mod.F90 @@ -4,11 +4,11 @@ #endif module derivative_mod - use derivative_mod_base - use derivative_mod_base, only: laplace_sphere_wk_base => laplace_sphere_wk, & - divergence_sphere_wk_base => divergence_sphere_wk, & - gradient_sphere_base => gradient_sphere, & - divergence_sphere_base => divergence_sphere + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere use kinds, only : real_kind, longdouble_kind use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi @@ -18,6 +18,17 @@ module derivative_mod use element_mod, only : element_t use control_mod, only : hypervis_scaling, hypervis_power implicit none + private + + public :: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + public :: laplace_sphere_wk_openacc + public :: divergence_sphere_wk_openacc + public :: gradient_sphere_openacc + public :: divergence_sphere_openacc contains diff --git a/components/homme/src/preqx_acc/edge_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90 index 06af6cd38a2..645fe05a66a 100644 --- a/components/homme/src/preqx_acc/edge_mod.F90 +++ b/components/homme/src/preqx_acc/edge_mod.F90 @@ -4,10 +4,11 @@ #endif module edge_mod - use edge_mod_base - use edge_mod_base, only: edgeSpack_base => edgeSpack, & - edgeSunpackMin_base => edgeSunpackMin, & - edgeSunpackMax_base => edgeSunpackMax + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax use kinds, only : int_kind, log_kind, real_kind use dimensions_mod, only : max_neigh_edges, nelemd, np use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL @@ -20,6 +21,20 @@ module edge_mod Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid use element_mod, only : element_t implicit none + private + + public :: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + public :: edgeSpack_openacc + public :: edgeSunpackMin_openacc + public :: edgeSunpackMax_openacc + public :: edgeVpack_openacc + public :: edgeVunpack_openacc + public :: edgeVunpackMin_openacc + public :: edgeVunpackMax_openacc contains diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index a2a14c90cd2..b9ed14c295c 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -7,13 +7,8 @@ #endif module prim_advection_mod - use prim_advection_mod_base - use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_base => Prim_Advec_Tracers_remap, & - prim_advec_init1_base => prim_advec_init1, & - prim_advec_init2_base => prim_advec_init2, & - prim_advec_init_deriv_base => prim_advec_init_deriv, & - deriv_base => deriv, & - Prim_Advec_Tracers_remap_rk2_base => Prim_Advec_Tracers_remap_rk2 + !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap use kinds, only : real_kind use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp @@ -38,6 +33,7 @@ module prim_advection_mod use parallel_mod, only : abortmp use derivative_mod, only: derivative_t implicit none + private type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads) real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:) real(kind=real_kind), private, allocatable :: dp0(:) @@ -53,6 +49,14 @@ module prim_advection_mod real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:) logical, private :: first_time = .true. + public :: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap + public :: Prim_Advec_Tracers_remap + public :: prim_advec_init1 + public :: prim_advec_init2 + public :: prim_advec_init_deriv + public :: deriv + public :: Prim_Advec_Tracers_remap_rk2 + contains subroutine copy_qdp1_h2d( elem , tl , nets , nete ) diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90 index 67ad782bacf..0b2027af701 100644 --- a/components/homme/src/preqx_acc/solver_init_mod.F90 +++ b/components/homme/src/preqx_acc/solver_init_mod.F90 @@ -3,10 +3,11 @@ #endif module solver_init_mod - use solver_init_mod_base - use solver_init_mod_base, only: solver_init2_base => solver_init2 + !OVERWRITING: solver_init2 + use solver_init_mod_base, only: use dimensions_mod, only: nelemd implicit none + private public :: solver_init2 diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90 index b9e537a6dcf..4c3471721bb 100644 --- a/components/homme/src/preqx_acc/vertremap_mod.F90 +++ b/components/homme/src/preqx_acc/vertremap_mod.F90 @@ -3,6 +3,9 @@ #endif module vertremap_mod - use vertremap_mod_base + use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm implicit none + private + + public :: remap1, remap1_nofilter, remap_q_ppm end module vertremap_mod diff --git a/components/homme/src/preqx_acc/viscosity_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90 index 2735430e907..4bc0c10960f 100644 --- a/components/homme/src/preqx_acc/viscosity_mod.F90 +++ b/components/homme/src/preqx_acc/viscosity_mod.F90 @@ -4,9 +4,10 @@ #endif module viscosity_mod - use viscosity_mod_base -! use viscosity_mod_base, only: neighbor_minmax_base => neighbor_minmax, & -! biharmonic_wk_scalar_base => biharmonic_wk_scalar + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif use thread_mod, only : omp_get_num_threads use kinds, only : real_kind, iulog use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac @@ -18,6 +19,15 @@ module viscosity_mod use control_mod, only : hypervis_scaling, nu, nu_div use perf_mod, only: t_startf, t_stopf implicit none + private + + public :: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 +#ifdef _PRIM + public :: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif + public :: biharmonic_wk_scalar_openacc + public :: neighbor_minmax_openacc + contains diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90 index 2ef89f27f70..5ad1125cbef 100644 --- a/components/homme/src/sweqx/bndry_mod.F90 +++ b/components/homme/src/sweqx/bndry_mod.F90 @@ -3,6 +3,6 @@ #endif module bndry_mod - use bndry_mod_base + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping implicit none end module bndry_mod diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90 index 461fe9c8323..e9c3f81be3f 100644 --- a/components/homme/src/sweqx/derivative_mod.F90 +++ b/components/homme/src/sweqx/derivative_mod.F90 @@ -3,6 +3,10 @@ #endif module derivative_mod - use derivative_mod_base + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere implicit none end module derivative_mod diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90 index c7f39fe20f6..902503b36cb 100644 --- a/components/homme/src/sweqx/edge_mod.F90 +++ b/components/homme/src/sweqx/edge_mod.F90 @@ -3,6 +3,10 @@ #endif module edge_mod - use edge_mod_base + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax implicit none end module edge_mod diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90 index 224928eeb25..90efb25eadf 100644 --- a/components/homme/src/sweqx/solver_init_mod.F90 +++ b/components/homme/src/sweqx/solver_init_mod.F90 @@ -3,6 +3,6 @@ #endif module solver_init_mod - use solver_init_mod_base + use solver_init_mod_base, only: solver_init2 implicit none end module solver_init_mod diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90 index 80e849ae94c..f78ae69a8f3 100644 --- a/components/homme/src/sweqx/viscosity_mod.F90 +++ b/components/homme/src/sweqx/viscosity_mod.F90 @@ -3,6 +3,9 @@ #endif module viscosity_mod - use viscosity_mod_base + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif implicit none end module viscosity_mod From 2443b33fdb1dfa260fac826b1dd94b0fee4a5c30 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 12:57:58 -0500 Subject: [PATCH 18/27] Fixing dimensions_mod error I ran a bad script by accident that changed dimensions_mod.F90 at some point. This is fixing it. --- components/homme/src/share/dimensions_mod.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/homme/src/share/dimensions_mod.F90 b/components/homme/src/share/dimensions_mod.F90 index 61d3138e996..b204379c899 100644 --- a/components/homme/src/share/dimensions_mod.F90 +++ b/components/homme/src/share/dimensions_mod.F90 @@ -27,9 +27,9 @@ module dimensions_mod #endif #else #ifdef QSIZE_D - integer, parameter :: qsize_d=50 + integer, parameter :: qsize_d=QSIZE_D #else - integer, parameter :: qsize_d=50 + integer, parameter :: qsize_d=4 #endif integer, parameter :: ntrac_d=4 ! fvm tracers #endif From d9de5726416a678c149db58667b36331615894e0 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 14:28:02 -0500 Subject: [PATCH 19/27] Fixing qsize_d inconsistency in preqx_acc preqx_acc/prim_advaction_mod.F90 had an inconsistency in the qsize dimension for state_qdp. it was allocated with dimension qsize_d for consistency with state%Qdp, but in prim_advection_mod routines, it was being re-sized to qsize. This presented a problem when qsize < qsize_d, and giving wrong answers. This commit fixes that bug. --- .../homme/src/preqx_acc/prim_advection_mod.F90 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index b9ed14c295c..acc0c2161f1 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -10,7 +10,7 @@ module prim_advection_mod !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap use kinds, only : real_kind - use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd + use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd, qsize_d use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp use element_mod, only : element_t use fvm_control_volume_mod, only : fvm_struct @@ -269,7 +269,7 @@ subroutine prim_advec_init1(par, elem, n_domains) type (element_t), intent(in) :: elem(:) call initEdgeBuffer (par,edgeAdvQ3 ,elem(:),max(nlev,qsize*nlev*3)) call initEdgeBuffer (par,edgeAdv1 ,elem(:),nlev ) - call initEdgeBuffer (par,edgeAdv ,elem(:),qsize*nlev ) + call initEdgeBuffer (par,edgeAdv ,elem(:),qsize_d*nlev ) call initEdgeBuffer (par,edgeAdv_p1,elem(:),qsize*nlev + nlev ) call initEdgeBuffer (par,edgeAdvQ2 ,elem(:),qsize*nlev*2 ) call initEdgeBuffer (par,edgeAdv3 ,elem(:),nlev*3 ) @@ -432,7 +432,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , enddo call limiter2d_zero(state_Qdp,2,nt_qdp) call t_startf('ah_scalar_PEU') - call edgeVpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVpack_openacc(edgeAdv,state_qdp,qsize_d*nlev,0,elem(:),1,nelemd,2,nt_qdp) !$omp end master !$omp barrier @@ -442,7 +442,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , !$omp barrier !$omp master - call edgeVunpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVunpack_openacc(edgeAdv,state_qdp,qsize_d*nlev,0,elem(:),1,nelemd,2,nt_qdp) call t_stopf('ah_scalar_PEU') !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:)) do ie = 1 , nelemd @@ -785,7 +785,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , ! note: eta_dot_dpdn is actually dimension nlev+1, but nlev+1 data is ! all zero so we only have to DSS 1:nlev call t_startf('eus_PEU') - call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize_d , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) !$omp end master !$omp barrier @@ -795,7 +795,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , !$omp barrier !$omp master - call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize_d , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) call t_stopf('eus_PEU') !$acc parallel loop gang vector collapse(4) present(state_Qdp,elem(:)) do ie = 1 , nelemd @@ -826,7 +826,7 @@ subroutine limiter2d_zero(Qdp,tdim,tl) implicit none integer , intent(in ) :: tdim integer , intent(in ) :: tl - real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd) + real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize_d,tdim,nelemd) ! local real (kind=real_kind) :: mass,mass_new real (kind=real_kind) :: qtmp(np,np) From 8f8eb4f46ed25ef9c9ab742df9a48a11e790eea3 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 15:03:52 -0500 Subject: [PATCH 20/27] Revert "Fixing qsize_d inconsistency in preqx_acc" This reverts commit d9de5726416a678c149db58667b36331615894e0. --- .../homme/src/preqx_acc/prim_advection_mod.F90 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/components/homme/src/preqx_acc/prim_advection_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 index acc0c2161f1..b9ed14c295c 100644 --- a/components/homme/src/preqx_acc/prim_advection_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -10,7 +10,7 @@ module prim_advection_mod !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap use kinds, only : real_kind - use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd, qsize_d + use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp use element_mod, only : element_t use fvm_control_volume_mod, only : fvm_struct @@ -269,7 +269,7 @@ subroutine prim_advec_init1(par, elem, n_domains) type (element_t), intent(in) :: elem(:) call initEdgeBuffer (par,edgeAdvQ3 ,elem(:),max(nlev,qsize*nlev*3)) call initEdgeBuffer (par,edgeAdv1 ,elem(:),nlev ) - call initEdgeBuffer (par,edgeAdv ,elem(:),qsize_d*nlev ) + call initEdgeBuffer (par,edgeAdv ,elem(:),qsize*nlev ) call initEdgeBuffer (par,edgeAdv_p1,elem(:),qsize*nlev + nlev ) call initEdgeBuffer (par,edgeAdvQ2 ,elem(:),qsize*nlev*2 ) call initEdgeBuffer (par,edgeAdv3 ,elem(:),nlev*3 ) @@ -432,7 +432,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , enddo call limiter2d_zero(state_Qdp,2,nt_qdp) call t_startf('ah_scalar_PEU') - call edgeVpack_openacc(edgeAdv,state_qdp,qsize_d*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) !$omp end master !$omp barrier @@ -442,7 +442,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , !$omp barrier !$omp master - call edgeVunpack_openacc(edgeAdv,state_qdp,qsize_d*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVunpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) call t_stopf('ah_scalar_PEU') !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:)) do ie = 1 , nelemd @@ -785,7 +785,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , ! note: eta_dot_dpdn is actually dimension nlev+1, but nlev+1 data is ! all zero so we only have to DSS 1:nlev call t_startf('eus_PEU') - call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize_d , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) !$omp end master !$omp barrier @@ -795,7 +795,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , !$omp barrier !$omp master - call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize_d , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) call t_stopf('eus_PEU') !$acc parallel loop gang vector collapse(4) present(state_Qdp,elem(:)) do ie = 1 , nelemd @@ -826,7 +826,7 @@ subroutine limiter2d_zero(Qdp,tdim,tl) implicit none integer , intent(in ) :: tdim integer , intent(in ) :: tl - real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize_d,tdim,nelemd) + real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd) ! local real (kind=real_kind) :: mass,mass_new real (kind=real_kind) :: qtmp(np,np) From 2fc14bb28f92346e32f091a518ac8810dd12c54c Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Thu, 3 Mar 2016 15:20:35 -0500 Subject: [PATCH 21/27] Fixing state_qdp qsize / qsize_d discrepancy --- components/homme/src/share/element_mod.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/homme/src/share/element_mod.F90 b/components/homme/src/share/element_mod.F90 index 84811e50676..d6ce91490ac 100644 --- a/components/homme/src/share/element_mod.F90 +++ b/components/homme/src/share/element_mod.F90 @@ -36,7 +36,7 @@ module element_mod real (kind=real_kind) :: ps_v(np,np,timelevels) ! surface pressure 4 real (kind=real_kind) :: phis(np,np) ! surface geopotential (prescribed) 5 real (kind=real_kind) :: Q (np,np,nlev,qsize_d) ! Tracer concentration 6 - real (kind=real_kind), pointer :: Qdp (:,:,:,:,:) ! Tracer mass 7 (np,np,nlev,qsize_d,2) + real (kind=real_kind), pointer :: Qdp (:,:,:,:,:) ! Tracer mass 7 (np,np,nlev,qsize,2) end type elem_state_t integer(kind=int_kind),public,parameter::StateComponents=8 ! num prognistics variables (for prim_restart_mod.F90) @@ -564,12 +564,12 @@ end subroutine allocate_element_desc !___________________________________________________________________ subroutine setup_element_pointers(elem) - use dimensions_mod, only: nelemd + use dimensions_mod, only: nelemd, qsize implicit none type(element_t), intent(inout) :: elem(:) #if USE_OPENACC integer :: ie - allocate( state_Qdp (np,np,nlev,qsize_d,2,nelemd) ) + allocate( state_Qdp (np,np,nlev,qsize,2,nelemd) ) allocate( derived_vn0 (np,np,2,nlev,nelemd) ) allocate( derived_divdp (np,np,nlev,nelemd) ) allocate( derived_divdp_proj (np,np,nlev,nelemd) ) From 1a437cb904def0d887961d3ab4aa676a8a60db37 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Fri, 4 Mar 2016 09:23:19 -0500 Subject: [PATCH 22/27] Custom flags for baro* & Titan cmake cache -Beefing up the titan.cmake initial cache file -Caching the PREQX_SRCS_F90 variable for custom flags application outside of preqx and preqx_acc -Adding the -d option to aprun for regression tests -Adding custom flags capabilities to baro* executables for faster compilation on Titan --- .../homme/cmake/machineFiles/titan.cmake | 8 +++++++ .../homme/compile_scripts/titan/compile.cpu | 23 +++++++------------ components/homme/src/preqx/CMakeLists.txt | 2 +- components/homme/src/preqx_acc/CMakeLists.txt | 2 +- .../test/reg_test/run_tests/testing-utils.sh | 6 ++++- .../homme/test_execs/baroA/CMakeLists.txt | 4 ++++ .../homme/test_execs/baroB/CMakeLists.txt | 4 ++++ .../homme/test_execs/baroC/CMakeLists.txt | 4 ++++ .../test_execs/baroCam-acc/CMakeLists.txt | 4 ++++ .../homme/test_execs/baroCam/CMakeLists.txt | 4 ++++ 10 files changed, 43 insertions(+), 18 deletions(-) diff --git a/components/homme/cmake/machineFiles/titan.cmake b/components/homme/cmake/machineFiles/titan.cmake index 2a73ce474f6..30e77efe3de 100644 --- a/components/homme/cmake/machineFiles/titan.cmake +++ b/components/homme/cmake/machineFiles/titan.cmake @@ -5,7 +5,15 @@ SET (CMAKE_CXX_COMPILER CC CACHE FILEPATH "") SET (NETCDF_DIR $ENV{NETCDF_DIR} CACHE FILEPATH "") SET (PNETCDF_DIR $ENV{PARALLEL_NETCDF_DIR} CACHE FILEPATH "") SET (HDF5_DIR $ENV{HDF5_DIR} CACHE FILEPATH "") +SET (DEBUG_FLAGS " " CACHE STRING "") +SET (Netcdf_NC_CONFIG_BIN "/opt/cray/netcdf/4.3.3.1/bin" CACHE FILEPATH "") +SET (HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") # The following is required for cross compilation SET (CMAKE_SYSTEM_NAME Catamount CACHE FILEPATH "") + +#Regression test parameters SET (USE_QUEUING FALSE CACHE BOOL "") +SET (USE_NUM_PROCS 64 CACHE STRING "") + + diff --git a/components/homme/compile_scripts/titan/compile.cpu b/components/homme/compile_scripts/titan/compile.cpu index 2803047f277..76e345a4818 100755 --- a/components/homme/compile_scripts/titan/compile.cpu +++ b/components/homme/compile_scripts/titan/compile.cpu @@ -5,26 +5,21 @@ do_clean=1 do_make=1 HOMME_ROOT=/ccs/home/$USER/ACME/components/homme -NTRACERS=50 -NLEVELS=64 +BUILD_ROOT=/lustre/atlas/scratch/imn/stf006/homme_tests +NTRACERS=4 +NLEVELS=26 source ./env_mach_specific.cpu -mkdir -p titan-cpu || exit -1 -cd titan-cpu +mkdir -p $BUILD_ROOT/titan-cpu || exit -1 +cd $BUILD_ROOT/titan-cpu if [ $do_cmake -eq 1 ]; then rm -rf CMakeFiles CMakeCache.txt cmake \ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ - -DCMAKE_Fortran_COMPILER=ftn \ - -DCMAKE_C_COMPILER=cc \ - -DCMAKE_CXX_COMPILER=CC \ - -DOPT_FLAGS="-O0 -Kieee -Mvect=nosse" \ - -DDEBUG_FLAGS=" " \ - -DNETCDF_DIR=$NETCDF_DIR \ - -DWITH_PNETCDF=FALSE \ - -DHDF5_DIR=$HDF5_DIR \ + -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ + -DWITH_PNETCDF=TRUE \ -DPREQX_NP=4 \ -DPREQX_PLEV=$NLEVELS \ -DPREQX_QSIZE_D=$TRACERS \ @@ -33,12 +28,9 @@ cmake \ -DBUILD_HOMME_PREQX_ACC=TRUE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ - -DHOMME_FIND_BLASLAPACK=TRUE \ - -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \ $HOMME_ROOT fi -# -DOPT_FLAGS="-fastsse -Mvect" \ if [ $do_clean -eq 1 ]; then make clean || exit -1 @@ -48,4 +40,5 @@ if [ $do_make -eq 1 ]; then make -j32 || exit -1 mkdir -p $HOMME_ROOT/build/preqx cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.cpu || exit -1 +#cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 fi diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt index 77d8844f80c..710b9c0a964 100644 --- a/components/homme/src/preqx/CMakeLists.txt +++ b/components/homme/src/preqx/CMakeLists.txt @@ -113,7 +113,7 @@ SET(PREQX_SRCS_F90 ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 ${PHYSICS_DIR}/column_model_mod.F90 ${PHYSICS_DIR}/column_types_mod.F90 -) + CACHE INTERNAL "") SET(PREQX_SRCS_C diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt index 84c94b211ed..4a4217c5e08 100644 --- a/components/homme/src/preqx_acc/CMakeLists.txt +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -114,7 +114,7 @@ SET(PREQX_SRCS_F90 ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 ${PHYSICS_DIR}/column_model_mod.F90 ${PHYSICS_DIR}/column_types_mod.F90 -) + CACHE INTERNAL "") SET(PREQX_SRCS_C ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh index d6af5974388..45656fd1f61 100644 --- a/components/homme/test/reg_test/run_tests/testing-utils.sh +++ b/components/homme/test/reg_test/run_tests/testing-utils.sh @@ -488,7 +488,11 @@ execLine() { echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT elif [ "$HOMME_Submission_Type" = pbs ]; then - echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + if [ -n "${OMP_NUMBER_THREADS}" ]; then + echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + else + echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + fi else echo "mpiexec -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT diff --git a/components/homme/test_execs/baroA/CMakeLists.txt b/components/homme/test_execs/baroA/CMakeLists.txt index cf504ddc027..a378d446ec0 100644 --- a/components/homme/test_execs/baroA/CMakeLists.txt +++ b/components/homme/test_execs/baroA/CMakeLists.txt @@ -1,4 +1,8 @@ +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroA preqx 8 4 20 FALSE TRUE) diff --git a/components/homme/test_execs/baroB/CMakeLists.txt b/components/homme/test_execs/baroB/CMakeLists.txt index ef96e91490e..1d4ba9cbd5f 100644 --- a/components/homme/test_execs/baroB/CMakeLists.txt +++ b/components/homme/test_execs/baroB/CMakeLists.txt @@ -1,4 +1,8 @@ +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroB preqx 8 4 20 TRUE FALSE) diff --git a/components/homme/test_execs/baroC/CMakeLists.txt b/components/homme/test_execs/baroC/CMakeLists.txt index b1500f3abe1..c4118fa1298 100644 --- a/components/homme/test_execs/baroC/CMakeLists.txt +++ b/components/homme/test_execs/baroC/CMakeLists.txt @@ -1,4 +1,8 @@ +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroC preqx 4 4 26 FALSE TRUE) diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt index 529c5db680b..cc4e07b4393 100644 --- a/components/homme/test_execs/baroCam-acc/CMakeLists.txt +++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt @@ -1,6 +1,10 @@ # Set the qsize_d in dimensions_mod.F90 SET(QSIZE_D 25) SET(USE_OPENACC TRUE) +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroCam-acc preqx_acc 4 4 26 TRUE TRUE) diff --git a/components/homme/test_execs/baroCam/CMakeLists.txt b/components/homme/test_execs/baroCam/CMakeLists.txt index 21c7160c5c5..a56151eefb0 100644 --- a/components/homme/test_execs/baroCam/CMakeLists.txt +++ b/components/homme/test_execs/baroCam/CMakeLists.txt @@ -1,5 +1,9 @@ # Set the qsize_d in dimensions_mod.F90 SET(QSIZE_D 25) +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroCam preqx 4 4 26 TRUE TRUE) From f011bd58f13aa37e8ca8f7af2815126670a8efc0 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Fri, 4 Mar 2016 10:17:25 -0500 Subject: [PATCH 23/27] Titan aprun and threading working in cmake --- components/homme/cmake/machineFiles/titan.cmake | 1 + .../homme/test/reg_test/run_tests/testing-utils.sh | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/components/homme/cmake/machineFiles/titan.cmake b/components/homme/cmake/machineFiles/titan.cmake index 30e77efe3de..aa94a807240 100644 --- a/components/homme/cmake/machineFiles/titan.cmake +++ b/components/homme/cmake/machineFiles/titan.cmake @@ -8,6 +8,7 @@ SET (HDF5_DIR $ENV{HDF5_DIR} CACHE FILEPATH "") SET (DEBUG_FLAGS " " CACHE STRING "") SET (Netcdf_NC_CONFIG_BIN "/opt/cray/netcdf/4.3.3.1/bin" CACHE FILEPATH "") SET (HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") +SET (USE_MPIEXEC "aprun" CACHE STRING "") # The following is required for cross compilation SET (CMAKE_SYSTEM_NAME Catamount CACHE FILEPATH "") diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh index 45656fd1f61..3bb9555b193 100644 --- a/components/homme/test/reg_test/run_tests/testing-utils.sh +++ b/components/homme/test/reg_test/run_tests/testing-utils.sh @@ -480,20 +480,20 @@ execLine() { # mpirun.lsf is a special case if [ "${MPI_EXEC}" = "mpirun.lsf" ] ; then echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + elif [ "${MPI_EXEC}" = "aprun" ] ; then + if [ -n "${OMP_NUMBER_THREADS}" ]; then + echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + else + echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + fi else echo "${MPI_EXEC} -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT fi else if [ "$HOMME_Submission_Type" = lsf ]; then echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT - elif [ "$HOMME_Submission_Type" = pbs ]; then - if [ -n "${OMP_NUMBER_THREADS}" ]; then - echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT - else echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT - fi - else echo "mpiexec -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT From 5a8bc56e83f018c78e41249157d13d3c9c21f325 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Fri, 4 Mar 2016 13:58:50 -0500 Subject: [PATCH 24/27] Almost done enabling GPU OpenACC code with BaroCamMoist New variables to specify OpenACC compiler and linker flags. Fixed the threading line "-d ..." in aprun for cmake regressiono tests for Titan. --- components/homme/compile_scripts/titan/compile.cpu | 2 ++ components/homme/src/preqx_acc/CMakeLists.txt | 2 ++ components/homme/test/reg_test/run_tests/testing-utils.sh | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/components/homme/compile_scripts/titan/compile.cpu b/components/homme/compile_scripts/titan/compile.cpu index 76e345a4818..a88be66966f 100755 --- a/components/homme/compile_scripts/titan/compile.cpu +++ b/components/homme/compile_scripts/titan/compile.cpu @@ -28,6 +28,8 @@ cmake \ -DBUILD_HOMME_PREQX_ACC=TRUE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ + -DOPENACC_Fortran_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Minfo=accel" \ + -DOPENACC_Linker_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Mcuda" \ -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \ $HOMME_ROOT fi diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt index 4a4217c5e08..83e28c4d87b 100644 --- a/components/homme/src/preqx_acc/CMakeLists.txt +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -157,6 +157,8 @@ MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") SET(USE_OPENACC TRUE) +SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}") +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}") IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh index 3bb9555b193..be4989a1f01 100644 --- a/components/homme/test/reg_test/run_tests/testing-utils.sh +++ b/components/homme/test/reg_test/run_tests/testing-utils.sh @@ -481,7 +481,7 @@ execLine() { if [ "${MPI_EXEC}" = "mpirun.lsf" ] ; then echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT elif [ "${MPI_EXEC}" = "aprun" ] ; then - if [ -n "${OMP_NUMBER_THREADS}" ]; then + if [[ $4 == *"_OMP"* ]]; then echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT else echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT From 8529a0005b04776b2c159d71e30eca4027828f35 Mon Sep 17 00:00:00 2001 From: Matt Norman Date: Tue, 8 Mar 2016 13:50:02 -0500 Subject: [PATCH 25/27] Adding OpenACC flags to baroCam-acc --- components/homme/compile_scripts/titan/env_mach_specific.cpu | 3 ++- components/homme/test_execs/baroCam-acc/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/components/homme/compile_scripts/titan/env_mach_specific.cpu b/components/homme/compile_scripts/titan/env_mach_specific.cpu index 6799e8c6966..41aee75a4e7 100644 --- a/components/homme/compile_scripts/titan/env_mach_specific.cpu +++ b/components/homme/compile_scripts/titan/env_mach_specific.cpu @@ -37,7 +37,8 @@ module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 module switch atp atp/1.7.5 - + module add cudatoolkit + export CRAY_CUDA_MPS=1 export CRAY_CPU_TARGET=istanbul module load cray-netcdf-hdf5parallel/4.3.3.1 diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt index cc4e07b4393..f48313201f8 100644 --- a/components/homme/test_execs/baroCam-acc/CMakeLists.txt +++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt @@ -1,6 +1,8 @@ # Set the qsize_d in dimensions_mod.F90 SET(QSIZE_D 25) SET(USE_OPENACC TRUE) +SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}") +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}") # If the user specified a file for custom compiler options use those IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) From 63b41f45623a28326be131278a08cbccaf693705 Mon Sep 17 00:00:00 2001 From: Mark Taylor Date: Wed, 16 Mar 2016 08:26:55 -0600 Subject: [PATCH 26/27] Tweaks to Depends file to reflect renamed source files [BFB][ --- cime/machines-acme/Depends.intel | 9 ++--- cime/machines-acme/Depends.intel14 | 34 +++++++++---------- cime/machines-acme/Depends.intelmic | 4 +-- cime/machines-acme/Depends.intelmic14 | 4 +-- cime/machines-acme/Depends.mira | 13 ++++--- .../test/reg_test/run_tests/testing-utils.sh | 4 +++ 6 files changed, 38 insertions(+), 30 deletions(-) diff --git a/cime/machines-acme/Depends.intel b/cime/machines-acme/Depends.intel index c213b908ff3..577f7aa320c 100644 --- a/cime/machines-acme/Depends.intel +++ b/cime/machines-acme/Depends.intel @@ -1,9 +1,10 @@ # PERFOBJS=\ -prim_advection_mod.o \ -edge_mod.o \ -derivative_mod.o \ -bndry_mod.o \ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ prim_advance_mod.o \ uwshcu.o diff --git a/cime/machines-acme/Depends.intel14 b/cime/machines-acme/Depends.intel14 index 0c913270b2b..32e4747d7a3 100644 --- a/cime/machines-acme/Depends.intel14 +++ b/cime/machines-acme/Depends.intel14 @@ -6,23 +6,23 @@ #ecosys_mod.o: ecosys_mod.F90 # $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -CU $< -prim_advection_mod.o: prim_advection_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< -edge_mod.o: edge_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< -derivative_mod.o: derivative_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< +PERFOBJS=\ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ +prim_advance_mod.o \ +uwshcu.o \ +wetdep.o -bndry_mod.o: bndry_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< - -prim_advance_mod.o: prim_advance_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< - -uwshcu.o: uwshcu.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< - -wetdep.o: wetdep.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< +ifeq ($(DEBUG),FALSE) + $(PERFOBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< + $(REDUCED_OPT_OBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O1 $< + $(REDUCED_PRECISION_OBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -fimf-precision=low -fp-model fast $< +endif diff --git a/cime/machines-acme/Depends.intelmic b/cime/machines-acme/Depends.intelmic index 0f84949daa3..6f05aee44a7 100644 --- a/cime/machines-acme/Depends.intelmic +++ b/cime/machines-acme/Depends.intelmic @@ -1,6 +1,6 @@ -derivative_mod.o: derivative_mod.F90 - $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< +#derivative_mod_base.o: derivative_mod_base.F90 +# $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< shr_ncread_mod.o: shr_ncread_mod.F90 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $< diff --git a/cime/machines-acme/Depends.intelmic14 b/cime/machines-acme/Depends.intelmic14 index 0f84949daa3..6f05aee44a7 100644 --- a/cime/machines-acme/Depends.intelmic14 +++ b/cime/machines-acme/Depends.intelmic14 @@ -1,6 +1,6 @@ -derivative_mod.o: derivative_mod.F90 - $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< +#derivative_mod_base.o: derivative_mod_base.F90 +# $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< shr_ncread_mod.o: shr_ncread_mod.F90 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $< diff --git a/cime/machines-acme/Depends.mira b/cime/machines-acme/Depends.mira index 85453dd931a..7f9c05b6299 100644 --- a/cime/machines-acme/Depends.mira +++ b/cime/machines-acme/Depends.mira @@ -13,13 +13,16 @@ shr_reprosum_mod.o: shr_reprosum_mod.F90 # These routines benefit from -qnostrict without violating the bfb test PERFOBJS=\ -prim_advection_mod.o \ -edge_mod.o \ -derivative_mod.o \ -bndry_mod.o \ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ prim_advance_mod.o \ uwshcu.o \ -wetdep.o +wetdep.o + + ifeq ($(DEBUG),FALSE) $(PERFOBJS): %.o: %.F90 $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qnostrict $< diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh index be4989a1f01..8ce21fcb1f9 100644 --- a/components/homme/test/reg_test/run_tests/testing-utils.sh +++ b/components/homme/test/reg_test/run_tests/testing-utils.sh @@ -480,6 +480,8 @@ execLine() { # mpirun.lsf is a special case if [ "${MPI_EXEC}" = "mpirun.lsf" ] ; then echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + elif [ "${MPI_EXEC}" = "runjob" ]; then + echo "runjob -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} --block \$COBALT_PARTNAME --verbose=INFO : $EXEC $OPT" >> $RUN_SCRIPT elif [ "${MPI_EXEC}" = "aprun" ] ; then if [[ $4 == *"_OMP"* ]]; then echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT @@ -509,6 +511,8 @@ serExecLine() { # mpirun.lsf is a special case if [ "${MPI_EXEC}" = "mpirun.lsf" ] ; then echo "$EXEC" >> $RUN_SCRIPT + elif [ "${MPI_EXEC}" = "runjob" ]; then + echo "runjob -n 1 ${MPI_OPTIONS} --block \$COBALT_PARTNAME --verbose=INFO : $EXEC" >> $RUN_SCRIPT else echo "${MPI_EXEC} -n 1 ${MPI_OPTIONS} $EXEC" >> $RUN_SCRIPT fi From f5e6c9acf269f256f7be4cc3e8cb4f12f0f3b81b Mon Sep 17 00:00:00 2001 From: Mark Taylor Date: Thu, 17 Mar 2016 17:06:47 -0600 Subject: [PATCH 27/27] enable ACC executable by default in cmake [BFB] --- components/homme/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index e226bad7043..0a39c868c57 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -56,7 +56,7 @@ ENDIF () # Some options for conditional builds OPTION(BUILD_HOMME_SWEQX "Shallow water equations FEM" ON) OPTION(BUILD_HOMME_PREQX "Primitive equations FEM" ON) -OPTION(BUILD_HOMME_PREQX_ACC "Primitive equations FEM with OpenACC" OFF) +OPTION(BUILD_HOMME_PREQX_ACC "Primitive equations FEM with OpenACC" ON) OPTION(BUILD_HOMME_SWDGX "Shallow water equations DG" OFF) OPTION(BUILD_HOMME_PRIMDGX "Primitive equations DG" OFF) OPTION(BUILD_HOMME_SWIM "Shallow water equations implicit" OFF)