-
Notifications
You must be signed in to change notification settings - Fork 383
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'mt5555/nggps-nh' into next (PR #2708)
namelists only, skipping integration testing [BFB]
- Loading branch information
Showing
9 changed files
with
297 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
#!/bin/tcsh | ||
# | ||
# This script will configure, build and run HOMME using the | ||
# NGGPS benchmark problem | ||
# | ||
# 128 levels | ||
# 10 tracers | ||
# | ||
# ne4 ultra-low-res for testing | ||
# ne30 (1 degree) | ||
# ne256 15 days, 2h, 512 nodes | ||
# ne1024 15 days, 32h, 2048 nodes | ||
# | ||
# | ||
#SBATCH --job-name nhbench | ||
#SBATCH -N 512 | ||
#SBATCH -C knl | ||
#SBATCH -q debug | ||
#SBATCH --time=0:30:00 | ||
# | ||
# set paths to source code, build directory and run directory | ||
# | ||
set wdir = ~/scratch2/knl/nggpsbench # run directory | ||
set HOMME = `pwd`/../../.. # /path/to/acme/components/homme | ||
set MACH = $HOMME/cmake/machineFiles/cori-knl.cmake | ||
|
||
# | ||
# Which problem? tiny, ne30 or ne120 configuration | ||
# | ||
# use 4 nodes | ||
#set namelist = nggps-tiny.nl ; set name = tiny | ||
#set namelist = nggps-nh-ne256.nl ; set name = nh-ne265 | ||
#set namelist = nggps-nh-ne256-io.nl ; set name = nh-ne265 | ||
set namelist = nggps-nh-ne1024.nl ; set name = nh-ne1024 | ||
|
||
# good Edison nodes: 64 128 256 512 1024 2048 4096 5462 | ||
# elem/node 6144 3072 1536 768 384 192 96 71/72 | ||
# | ||
# good KNL nodes: 9145 6144 3072 2048 1024 512 256 128 64 | ||
# ele/node 42/43 64 128 192 | ||
# elem/core 0/1 1 2 3 6 12 24 48 96 ( using 64 cores/node ) | ||
|
||
|
||
|
||
# | ||
# mpi run command | ||
# | ||
setenv OMP_STACKSIZE 16M # Cori has 96GB per node. had to lower to 8M on 3K nodes | ||
#setenv OMP_STACKSIZE 32M # needed for 256 nodes. | ||
setenv PER_NODE 64 # MPI per node | ||
setenv OMP_NUM_THREADS 2 | ||
|
||
# number of virtual cores per MPI task | ||
set VC_PER_MPI = 256 # Set this to 272 if using PER_NODE divides 272 instead of 256 | ||
@ VC_PER_MPI /= $PER_NODE | ||
|
||
setenv KMP_AFFINITY granularity=core,scatter | ||
set bind = --cpu_bind=core | ||
#setenv KMP_AFFINITY granularity=thread,scatter | ||
#set bind = --cpu_bind=thread | ||
|
||
|
||
# compute number of MPI tasks | ||
if (! ${?SLURM_NNODES} ) then | ||
# not running in batch system. set some defaults so this script | ||
# will work on a workstation | ||
set SLURM_NNODES=1 | ||
endif | ||
set NNODES = $SLURM_NNODES | ||
|
||
if ($#argv >= 1) then | ||
# override number of nodes with $1 | ||
set NNODES = $1 | ||
endif | ||
|
||
set NMPI = $NNODES | ||
@ NMPI *= $PER_NODE | ||
#if ( $NMPI > 393216 ) set NMPI = 393216 # max number of elements in NE=256 mesh | ||
|
||
|
||
|
||
echo NODES = $NNODES | ||
echo NMPI_PER_NODE = $PER_NODE | ||
echo NTHREADS_PER_MPI = $OMP_NUM_THREADS | ||
# note: in tests on 4K nodes,the --bcase and --compress options were much slower. DONT USE: | ||
#set mpirun = "srun --bcast=/tmp/${SLURM_JOB_ID} --compress=lz4 -n $NMPI -N $NNODES -c $VC_PER_MPI $bind" | ||
set mpirun = "srun -n $NMPI -N $NNODES -c $VC_PER_MPI $bind" | ||
echo mpi commnand: | ||
echo $mpirun | ||
|
||
|
||
|
||
|
||
set input = $HOMME/test/benchmarks/NGGPS # input files for test case | ||
set vdir = $HOMME/test/vcoord # vertical coordinate files | ||
set bld = $wdir/bld | ||
set run = $wdir/run-$NNODES-$OMP_NUM_THREADS-$$ | ||
set nlev = 128 | ||
set qsize = 10 | ||
|
||
# | ||
# BUILD THETA-L | ||
# rm $bld/CMakeCache.txt to force re-configure | ||
# | ||
# default build is haswell. switch to knl: | ||
module unload craype-haswell ; module load craype-mic-knl | ||
|
||
mkdir -p $bld | ||
cd $bld | ||
set exe = $bld/src/theta-l/theta-l | ||
set build = 1 # set to 1 to force build | ||
# rm $bld/CMakeCache.txt # remove this file to force re-configure | ||
if (! -f CMakeCache.txt) then | ||
rm -rf CMakeFiles CMakeCache.txt src | ||
echo "running CMAKE to configure the model" | ||
|
||
cmake -C $MACH -DQSIZE_D=$qsize -DPREQX_PLEV=$nlev -DPREQX_NP=4 \ | ||
-DBUILD_HOMME_SWEQX=FALSE -DPREQX_USE_PIO=TRUE \ | ||
-DPREQX_USE_ENERGY=FALSE $HOMME | ||
|
||
if ($status) exit | ||
make -j4 clean | ||
endif | ||
if ( ! -f $exe) then | ||
make -j4 theta-l | ||
if ($status) exit | ||
endif | ||
|
||
# | ||
# Run the code | ||
# | ||
mkdir -p $run/movies | ||
mkdir -p $run/restart | ||
cd $run | ||
|
||
|
||
# copy all vertical levels to run directory | ||
rsync -a $vdir/sab?-128.ascii $run | ||
|
||
# namelist has to be called input.nl for perf settings to be read | ||
\rm -f input.nl | ||
\cp -f $input/$namelist input.nl | ||
|
||
date | ||
$mpirun $exe < input.nl | ||
date | ||
if (-f HommeTime ) then | ||
# save timings from run | ||
set timingfile = $name.nodes${NNODES}.HommeTime | ||
set summary = $name.nodes${NNODES}.summary | ||
mv HommeTime $timingfile | ||
# total run time (not counting init) | ||
grep -a prim_main_loop $timingfile | head -1 | tee $summary | ||
|
||
# breakdown dyn, tracers, remap. about 97% of the cost: | ||
grep -a prim_step_dyn $timingfile | head -1 | tee -a $summary | ||
grep -a PAT_remap $timingfile | head -1 | tee -a $summary | ||
grep -a vertical_remap $timingfile | head -1 | tee -a $summary | ||
echo "run parameters:" >> $summary | ||
cat input.nl >> $summary | ||
endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
&ctl_nl | ||
NThreads=-1 | ||
partmethod = 4 | ||
topology = "cube" | ||
test_case = "jw_baroclinic" | ||
u_perturb = 1 | ||
rotate_grid = 0 | ||
ne=1024 | ||
qsize = 10 | ||
! tstep=9 200 steps = 30min (Bechmark reports 30min time) | ||
nmax = 200 | ||
statefreq=200 | ||
disable_diagnostics = .true. | ||
theta_hydrostatic_mode=.false. | ||
theta_advect_form = 1 | ||
runtype = 0 | ||
tstep=9 | ||
rsplit=8 | ||
qsplit = 1 | ||
tstep_type = 7 | ||
integration = "explicit" | ||
nu=1e10 | ||
nu_div=1e10 | ||
nu_p=1e10 | ||
nu_q=1e10 | ||
nu_s=1e10 | ||
nu_top = 0e5 | ||
se_ftype = 0 | ||
limiter_option = 9 | ||
vert_remap_q_alg = 1 | ||
hypervis_order = 2 | ||
hypervis_subcycle=1 | ||
hypervis_subcycle_q=1 | ||
/ | ||
&vert_nl | ||
vform = "ccm" | ||
vfile_mid = './sabm-128.ascii' | ||
vfile_int = './sabi-128.ascii' | ||
/ | ||
|
||
&prof_inparm | ||
profile_outpe_num = 100 | ||
profile_single_file = .true. | ||
/ | ||
|
||
&analysis_nl | ||
! to compare with EUL ref solution: | ||
! interp_nlat = 32 | ||
! interp_nlon = 64 | ||
! interp_gridtype=2 | ||
|
||
output_timeunits=1,1 | ||
output_frequency=0,0 | ||
output_start_time=0,0 | ||
output_end_time=30000,30000 | ||
output_varnames1='ps','zeta' | ||
output_varnames2='Q','Q2','Q3','Q4','Q5' | ||
io_stride=8 | ||
output_type = 'pnetcdf64' | ||
/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
&ctl_nl | ||
NThreads=-1 | ||
partmethod = 4 | ||
topology = "cube" | ||
test_case = "jw_baroclinic" | ||
u_perturb = 1 | ||
rotate_grid = 0 | ||
ne=256 | ||
qsize = 10 | ||
! tstep=36 360 steps = 2h (Bechmark reports 2h time) | ||
nmax = 360 | ||
!ndays=2 | ||
statefreq=200 | ||
disable_diagnostics = .true. | ||
!disable_diagnostics = .false. | ||
restartfreq = 1 | ||
restartfile = "./R0001" | ||
runtype = 0 | ||
theta_hydrostatic_mode=.false. | ||
theta_advect_form = 1 | ||
tstep=36 | ||
rsplit=8 | ||
qsplit = 1 | ||
tstep_type = 7 | ||
integration = "explicit" | ||
nu=1e12 | ||
nu_div=1e12 | ||
nu_p=1e12 | ||
nu_q=1e12 | ||
nu_s=1e12 | ||
nu_top = 0e5 | ||
se_ftype = 0 | ||
limiter_option = 9 | ||
vert_remap_q_alg = 1 | ||
hypervis_order = 2 | ||
hypervis_subcycle=1 | ||
hypervis_subcycle_q=1 | ||
/ | ||
&vert_nl | ||
vform = "ccm" | ||
vfile_mid = './sabm-128.ascii' | ||
vfile_int = './sabi-128.ascii' | ||
/ | ||
|
||
&prof_inparm | ||
profile_outpe_num = 100 | ||
profile_single_file = .true. | ||
/ | ||
|
||
&analysis_nl | ||
! to compare with EUL ref solution: | ||
! interp_nlat = 32 | ||
! interp_nlon = 64 | ||
! interp_gridtype=2 | ||
|
||
output_timeunits=1,1 | ||
output_frequency=0,0 | ||
output_start_time=0,0 | ||
output_end_time=30000,30000 | ||
output_varnames1='ps','zeta' | ||
output_varnames2='Q','Q2','Q3','Q4','Q5' | ||
io_stride=8 | ||
output_type = 'pnetcdf' | ||
/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters