Skip to content

Commit

Permalink
Modified batch system to slurm and fixed an environment issue.
Browse files Browse the repository at this point in the history
[BFB] - Bit-For-Bit
  • Loading branch information
singhbalwinder committed Apr 14, 2017
1 parent 515c55d commit 963cdae
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 22 deletions.
10 changes: 10 additions & 0 deletions cime_config/acme/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,16 @@
<queue walltimemax="01:00:00" jobmin="1" jobmax="65536" default="true">default</queue>
</queues>
</batch_system>

<batch_system MACH="cascade" type="slurm">
<directives>
<directive>--mail-type=END</directive>
<directive>--mail-user=email@pnnl.gov</directive>
</directives>
<queues>
<queue walltimemax="00:30:00" jobmin="1" jobmax="9999" default="true">small</queue>
</queues>
</batch_system>

<batch_system MACH="constance" type="slurm">
<directives>
Expand Down
91 changes: 69 additions & 22 deletions cime_config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1266,28 +1266,75 @@
</environment_variables>
</machine>

<machine MACH="cascade">
<DESC>PNL cluster, os is Linux (pgi), batch system is SLURM</DESC>
<NODENAME_REGEX>cascade</NODENAME_REGEX>
<TESTS>acme_developer</TESTS>
<OS>LINUX</OS>
<COMPILERS>intel,nag</COMPILERS>
<MPILIBS>mpich</MPILIBS>
<RUNDIR>/dtemp/$USER/csmruns/$CASE/run</RUNDIR>
<EXEROOT>/dtemp/$USER/csmruns/$CASE/bld</EXEROOT>
<DIN_LOC_ROOT>/dtemp/sing201/inputdata/CAM/CSMDATA_CAM/aerocom/csmdata</DIN_LOC_ROOT>
<DIN_LOC_ROOT_CLMFORC>/dtemp/sing201/inputdata/CAM/CSMDATA_CAM/aerocom/csmdata/atm/datm7</DIN_LOC_ROOT_CLMFORC>
<DOUT_S_ROOT>/dtemp/$USER/archive/$CASE</DOUT_S_ROOT>
<DOUT_L_MSROOT>csm/$CASE</DOUT_L_MSROOT>
<CCSM_BASELINE>/dtemp/sing201/acme_testing/acme_baselines/</CCSM_BASELINE>
<CESMSCRATCHROOT>/dtemp/$USER/csmruns</CESMSCRATCHROOT>
<CCSM_CPRNC>/home/sing201/CAM/cprnc/cprnc</CCSM_CPRNC>
<BATCH_SYSTEM>slurm</BATCH_SYSTEM>
<SUPPORTED_BY>balwinder.singh at pnnl dot gov</SUPPORTED_BY>
<GMAKE_J>8</GMAKE_J>
<MAX_TASKS_PER_NODE>16</MAX_TASKS_PER_NODE>
</machine>

<machine MACH="cascade">
<DESC>PNL Intel KNC cluster, OS is Linux, batch system is SLURM</DESC>
<OS>LINUX</OS>
<COMPILERS>intel</COMPILERS>
<MPILIBS>mvapich2</MPILIBS>
<NODENAME_REGEX>glogin</NODENAME_REGEX>
<CESMSCRATCHROOT>/dtemp/$USER</CESMSCRATCHROOT>
<RUNDIR>/dtemp/$USER/csmruns/$CASE/run</RUNDIR>
<EXEROOT>/dtemp/$USER/csmruns/$CASE/bld</EXEROOT>
<CIME_OUTPUT_ROOT>/dtemp/$USER</CIME_OUTPUT_ROOT>
<DIN_LOC_ROOT>/dtemp/sing201/acme/inputdata/</DIN_LOC_ROOT>
<DIN_LOC_ROOT_CLMFORC>/dtemp/sing201/acme/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC>
<DOUT_S_ROOT>/dtemp/sing201/$USER/archive/$CASE</DOUT_S_ROOT>
<DOUT_L_MSROOT>UNSET</DOUT_L_MSROOT>
<CCSM_BASELINE>/dtemp/sing201/acme/acme_baselines</CCSM_BASELINE>
<CCSM_CPRNC>/dtemp/sing201/acme/acme_baselines/cprnc/cprnc</CCSM_CPRNC>
<SAVE_TIMING_DIR>/dtemp/sing201/acme/timing_acme</SAVE_TIMING_DIR>
<BATCH_SYSTEM>slurm</BATCH_SYSTEM>
<SUPPORTED_BY>balwinder.singh -at- pnnl.gov</SUPPORTED_BY>
<GMAKE_J>8</GMAKE_J>
<MAX_TASKS_PER_NODE>16</MAX_TASKS_PER_NODE>
<PES_PER_NODE>16</PES_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<mpirun mpilib="mpi-serial">
<executable></executable>
</mpirun>
<mpirun mpilib="mvapich2">
<executable>srun</executable>
<arguments>
<arg name="mpi">--mpi=none</arg>
<arg name="num_tasks">--ntasks=$TOTALPES</arg>
<arg name="cpu_bind">--cpu_bind=sockets --cpu_bind=verbose</arg>
<arg name="kill-on-bad-exit">--kill-on-bad-exit</arg>
</arguments>
</mpirun>
<module_system type="module">
<init_path lang="python">/opt/lmod/5.0.1/init/env_modules_python.py</init_path>
<init_path lang="csh">/etc/profile.d/modules.csh</init_path>
<init_path lang="sh">/etc/profile.d/modules.sh</init_path>
<cmd_path lang="python">/opt/lmod/lmod/libexec/lmod python</cmd_path>
<cmd_path lang="sh">module</cmd_path>
<cmd_path lang="csh">module</cmd_path>
<modules>
<command name="purge"></command>
</modules>
<modules>
<command name="load">python/2.7.9</command>
</modules>
<modules compiler="intel">
<command name="load">intel/15.0.090</command>
<command name="load">mkl/14.0</command>
</modules>
<modules mpilib="mvapich2">
<command name="load">mvapich2/1.9</command>
</modules>
<modules>
<command name="load">netcdf/4.3.0</command>
</modules>
</module_system>
<environment_variables>
<env name="OMP_STACKSIZE">64M</env>
<env name="NETCDF_HOME">$ENV{NETCDF_ROOT}</env>
</environment_variables>
<environment_variables compiler="intel">
<env name="MKL_PATH">$ENV{MLIBHOME}</env>
<env name="COMPILER">intel</env>
</environment_variables>
</machine>

<machine MACH="constance">
<DESC>PNL Haswell cluster, OS is Linux, batch system is SLURM</DESC>
<OS>LINUX</OS>
Expand Down
1 change: 1 addition & 0 deletions cime_config/acme/machines/config_pio.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
<value mach="userdefined">netcdf</value>
<value mach="eastwind">netcdf</value>
<value mach="constance">netcdf</value>
<value mach="cascade">netcdf</value>
<value mach="sooty">netcdf</value>
<value mach="pleiades.*">netcdf</value>
<value mach="hobart" compiler="pgi">netcdf</value>
Expand Down

0 comments on commit 963cdae

Please sign in to comment.