Skip to content

Commit

Permalink
Merge branch 'mt5555/nggps-nh' into next (PR #2708)
Browse files Browse the repository at this point in the history
namelists only, skipping integration testing

[BFB]
  • Loading branch information
mt5555 committed Feb 2, 2019
2 parents 44d5be1 + b72f231 commit 5fc2505
Show file tree
Hide file tree
Showing 9 changed files with 297 additions and 13 deletions.
2 changes: 2 additions & 0 deletions components/homme/cmake/machineFiles/cori-knl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ SET (CMAKE_Fortran_COMPILER ftn CACHE FILEPATH "")
SET (CMAKE_C_COMPILER cc CACHE FILEPATH "")
SET (CMAKE_CXX_COMPILER CC CACHE FILEPATH "")

#SET (PIO_FILESYSTEM_HINTS lustre CACHE FILEPATH "")

SET (NETCDF_DIR $ENV{NETCDF_DIR} CACHE FILEPATH "")
SET (PNETCDF_DIR $ENV{PARALLEL_NETCDF_DIR} CACHE FILEPATH "")
SET (HDF5_DIR $ENV{HDF5_DIR} CACHE FILEPATH "")
Expand Down
162 changes: 162 additions & 0 deletions components/homme/test/benchmarks/NGGPS/coriknl-nh.job
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/bin/tcsh
#
# This script will configure, build and run HOMME using the
# NGGPS benchmark problem
#
# 128 levels
# 10 tracers
#
# ne4 ultra-low-res for testing
# ne30 (1 degree)
# ne256 15 days, 2h, 512 nodes
# ne1024 15 days, 32h, 2048 nodes
#
#
#SBATCH --job-name nhbench
#SBATCH -N 512
#SBATCH -C knl
#SBATCH -q debug
#SBATCH --time=0:30:00
#
# set paths to source code, build directory and run directory
#
set wdir = ~/scratch2/knl/nggpsbench # run directory
set HOMME = `pwd`/../../.. # /path/to/acme/components/homme
set MACH = $HOMME/cmake/machineFiles/cori-knl.cmake

#
# Which problem? tiny, ne30 or ne120 configuration
#
# use 4 nodes
#set namelist = nggps-tiny.nl ; set name = tiny
#set namelist = nggps-nh-ne256.nl ; set name = nh-ne265
#set namelist = nggps-nh-ne256-io.nl ; set name = nh-ne265
set namelist = nggps-nh-ne1024.nl ; set name = nh-ne1024

# good Edison nodes: 64 128 256 512 1024 2048 4096 5462
# elem/node 6144 3072 1536 768 384 192 96 71/72
#
# good KNL nodes: 9145 6144 3072 2048 1024 512 256 128 64
# ele/node 42/43 64 128 192
# elem/core 0/1 1 2 3 6 12 24 48 96 ( using 64 cores/node )



#
# mpi run command
#
setenv OMP_STACKSIZE 16M # Cori has 96GB per node. had to lower to 8M on 3K nodes
#setenv OMP_STACKSIZE 32M # needed for 256 nodes.
setenv PER_NODE 64 # MPI per node
setenv OMP_NUM_THREADS 2

# number of virtual cores per MPI task
set VC_PER_MPI = 256 # Set this to 272 if using PER_NODE divides 272 instead of 256
@ VC_PER_MPI /= $PER_NODE

setenv KMP_AFFINITY granularity=core,scatter
set bind = --cpu_bind=core
#setenv KMP_AFFINITY granularity=thread,scatter
#set bind = --cpu_bind=thread


# compute number of MPI tasks
if (! ${?SLURM_NNODES} ) then
# not running in batch system. set some defaults so this script
# will work on a workstation
set SLURM_NNODES=1
endif
set NNODES = $SLURM_NNODES

if ($#argv >= 1) then
# override number of nodes with $1
set NNODES = $1
endif

set NMPI = $NNODES
@ NMPI *= $PER_NODE
#if ( $NMPI > 393216 ) set NMPI = 393216 # max number of elements in NE=256 mesh



echo NODES = $NNODES
echo NMPI_PER_NODE = $PER_NODE
echo NTHREADS_PER_MPI = $OMP_NUM_THREADS
# note: in tests on 4K nodes,the --bcase and --compress options were much slower. DONT USE:
#set mpirun = "srun --bcast=/tmp/${SLURM_JOB_ID} --compress=lz4 -n $NMPI -N $NNODES -c $VC_PER_MPI $bind"
set mpirun = "srun -n $NMPI -N $NNODES -c $VC_PER_MPI $bind"
echo mpi commnand:
echo $mpirun




set input = $HOMME/test/benchmarks/NGGPS # input files for test case
set vdir = $HOMME/test/vcoord # vertical coordinate files
set bld = $wdir/bld
set run = $wdir/run-$NNODES-$OMP_NUM_THREADS-$$
set nlev = 128
set qsize = 10

#
# BUILD THETA-L
# rm $bld/CMakeCache.txt to force re-configure
#
# default build is haswell. switch to knl:
module unload craype-haswell ; module load craype-mic-knl

mkdir -p $bld
cd $bld
set exe = $bld/src/theta-l/theta-l
set build = 1 # set to 1 to force build
# rm $bld/CMakeCache.txt # remove this file to force re-configure
if (! -f CMakeCache.txt) then
rm -rf CMakeFiles CMakeCache.txt src
echo "running CMAKE to configure the model"

cmake -C $MACH -DQSIZE_D=$qsize -DPREQX_PLEV=$nlev -DPREQX_NP=4 \
-DBUILD_HOMME_SWEQX=FALSE -DPREQX_USE_PIO=TRUE \
-DPREQX_USE_ENERGY=FALSE $HOMME

if ($status) exit
make -j4 clean
endif
if ( ! -f $exe) then
make -j4 theta-l
if ($status) exit
endif

#
# Run the code
#
mkdir -p $run/movies
mkdir -p $run/restart
cd $run


# copy all vertical levels to run directory
rsync -a $vdir/sab?-128.ascii $run

# namelist has to be called input.nl for perf settings to be read
\rm -f input.nl
\cp -f $input/$namelist input.nl

date
$mpirun $exe < input.nl
date
if (-f HommeTime ) then
# save timings from run
set timingfile = $name.nodes${NNODES}.HommeTime
set summary = $name.nodes${NNODES}.summary
mv HommeTime $timingfile
# total run time (not counting init)
grep -a prim_main_loop $timingfile | head -1 | tee $summary

# breakdown dyn, tracers, remap. about 97% of the cost:
grep -a prim_step_dyn $timingfile | head -1 | tee -a $summary
grep -a PAT_remap $timingfile | head -1 | tee -a $summary
grep -a vertical_remap $timingfile | head -1 | tee -a $summary
echo "run parameters:" >> $summary
cat input.nl >> $summary
endif

4 changes: 0 additions & 4 deletions components/homme/test/benchmarks/NGGPS/coriknl.job
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,6 @@ mkdir -p $run/movies
cd $run


# default: assume pure sigma levels:
set vfile_mid = "./acme-72m.ascii"
set vfile_int = "./acme-72i.ascii"

# copy all vertical levels to run directory
rsync -a $vdir/sab?-128.ascii $run

Expand Down
4 changes: 0 additions & 4 deletions components/homme/test/benchmarks/NGGPS/edison.job
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,6 @@ mkdir -p $run/movies
cd $run


# default: assume pure sigma levels:
set vfile_mid = "./acme-72m.ascii"
set vfile_int = "./acme-72i.ascii"

# copy all vertical levels to run directory
rsync -a $vdir/sab?-128.ascii $run

Expand Down
9 changes: 4 additions & 5 deletions components/homme/test/benchmarks/NGGPS/nggps-ne256-sl.nl
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ disable_diagnostics = .true.
restartfreq = 43200
restartfile = "./R0001"
runtype = 0
theta_hydrostatic_mode=.true.
theta_advect_form = 1
tstep=40

!rsplit=3
!qsplit = 1

semi_lagrange_nearest_point_lev=100
cubed_sphere_map=2
rsplit = 1
qsplit = 8
Expand All @@ -32,7 +31,7 @@ integration = "explicit"
nu=7e11
nu_div=7e11
nu_p=7e11
nu_q=7e11
nu_q=0
nu_s=7e11
nu_top = 0e5
se_ftype = 0
Expand Down
2 changes: 2 additions & 0 deletions components/homme/test/benchmarks/NGGPS/nggps-ne256.nl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ disable_diagnostics = .true.
restartfreq = 43200
restartfile = "./R0001"
runtype = 0
theta_hydrostatic_mode=.true.
theta_advect_form = 1
tstep=40
rsplit=3
qsplit = 1
Expand Down
61 changes: 61 additions & 0 deletions components/homme/test/benchmarks/NGGPS/nggps-nh-ne1024.nl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
&ctl_nl
NThreads=-1
partmethod = 4
topology = "cube"
test_case = "jw_baroclinic"
u_perturb = 1
rotate_grid = 0
ne=1024
qsize = 10
! tstep=9 200 steps = 30min (Bechmark reports 30min time)
nmax = 200
statefreq=200
disable_diagnostics = .true.
theta_hydrostatic_mode=.false.
theta_advect_form = 1
runtype = 0
tstep=9
rsplit=8
qsplit = 1
tstep_type = 7
integration = "explicit"
nu=1e10
nu_div=1e10
nu_p=1e10
nu_q=1e10
nu_s=1e10
nu_top = 0e5
se_ftype = 0
limiter_option = 9
vert_remap_q_alg = 1
hypervis_order = 2
hypervis_subcycle=1
hypervis_subcycle_q=1
/
&vert_nl
vform = "ccm"
vfile_mid = './sabm-128.ascii'
vfile_int = './sabi-128.ascii'
/

&prof_inparm
profile_outpe_num = 100
profile_single_file = .true.
/

&analysis_nl
! to compare with EUL ref solution:
! interp_nlat = 32
! interp_nlon = 64
! interp_gridtype=2

output_timeunits=1,1
output_frequency=0,0
output_start_time=0,0
output_end_time=30000,30000
output_varnames1='ps','zeta'
output_varnames2='Q','Q2','Q3','Q4','Q5'
io_stride=8
output_type = 'pnetcdf64'
/

65 changes: 65 additions & 0 deletions components/homme/test/benchmarks/NGGPS/nggps-nh-ne256.nl
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
&ctl_nl
NThreads=-1
partmethod = 4
topology = "cube"
test_case = "jw_baroclinic"
u_perturb = 1
rotate_grid = 0
ne=256
qsize = 10
! tstep=36 360 steps = 2h (Bechmark reports 2h time)
nmax = 360
!ndays=2
statefreq=200
disable_diagnostics = .true.
!disable_diagnostics = .false.
restartfreq = 1
restartfile = "./R0001"
runtype = 0
theta_hydrostatic_mode=.false.
theta_advect_form = 1
tstep=36
rsplit=8
qsplit = 1
tstep_type = 7
integration = "explicit"
nu=1e12
nu_div=1e12
nu_p=1e12
nu_q=1e12
nu_s=1e12
nu_top = 0e5
se_ftype = 0
limiter_option = 9
vert_remap_q_alg = 1
hypervis_order = 2
hypervis_subcycle=1
hypervis_subcycle_q=1
/
&vert_nl
vform = "ccm"
vfile_mid = './sabm-128.ascii'
vfile_int = './sabi-128.ascii'
/

&prof_inparm
profile_outpe_num = 100
profile_single_file = .true.
/

&analysis_nl
! to compare with EUL ref solution:
! interp_nlat = 32
! interp_nlon = 64
! interp_gridtype=2

output_timeunits=1,1
output_frequency=0,0
output_start_time=0,0
output_end_time=30000,30000
output_varnames1='ps','zeta'
output_varnames2='Q','Q2','Q3','Q4','Q5'
io_stride=8
output_type = 'pnetcdf'
/

1 change: 1 addition & 0 deletions components/homme/test/benchmarks/NGGPS/nggps-tiny.nl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ nmax = 360
statefreq=360
restartfreq = 43200
restartfile = "./R0001"
theta_hydrostatic_mode=.true.
runtype = 0
tstep=40
rsplit=3
Expand Down

0 comments on commit 5fc2505

Please sign in to comment.