Skip to content

Commit

Permalink
merge to trunk
Browse files Browse the repository at this point in the history
  • Loading branch information
jedwards4b committed Jul 19, 2016
2 parents 367d47f + 84ebf2d commit 03520b1
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 74 deletions.
11 changes: 6 additions & 5 deletions cime_config/acme/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@
<batch_directive></batch_directive>
<jobid_pattern>(\d+)</jobid_pattern>
<depend_string> --dependencies</depend_string>
<walltime_format>%H:%M:%S</walltime_format>
<walltime_format>%M</walltime_format>
<submit_args>
<arg flag="--cwd" name="$CASEROOT"/>
<arg flag="-A" name="$PROJECT"/>
<arg flag="-t" name="$JOB_WALLCLOCK_TIME"/>
<arg flag="--cwd" name="CASEROOT"/>
<arg flag="-A" name="PROJECT"/>
<arg flag="-t" name="JOB_WALLCLOCK_TIME"/>
<arg flag="-n" name="$TOTALPES/$PES_PER_NODE"/>
<arg flag="-q" name="$JOB_QUEUE"/>
<arg flag="-q" name="JOB_QUEUE"/>
<arg flag="--mode script"/>
<arg flag="--env" name='"CIMEROOT=$CIMEROOT"'/>
</submit_args>
</batch_system>

Expand Down
1 change: 0 additions & 1 deletion cime_config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,6 @@
<arg name="bg_threadlayout"> --envs BG_THREADLAYOUT=1</arg>
<arg name="omp_stacksize"> --envs OMP_STACKSIZE=64M</arg>
<arg name="thread_count"> --envs OMP_NUM_THREADS={{ thread_count }}</arg>
<arg name="colon">:</arg>
</arguments>
</mpirun>
<module_system type="soft">
Expand Down
57 changes: 18 additions & 39 deletions cime_config/cesm/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,14 @@
<depend_string> --dependencies</depend_string>
<walltime_format>%M</walltime_format>
<submit_args>
<arg flag="--cwd" name="$CASEROOT"/>
<arg flag="-A" name="$PROJECT"/>
<arg flag="-t" name="$JOB_WALLCLOCK_TIME"/>
<arg flag="--cwd" name="CASEROOT"/>
<arg flag="-A" name="PROJECT"/>
<arg flag="-t" name="JOB_WALLCLOCK_TIME"/>
<arg flag="-n" name="$TOTALPES/$PES_PER_NODE"/>
<arg flag="-q" name="$JOB_QUEUE"/>
<arg flag="-q" name="JOB_QUEUE"/>
<arg flag="--mode script"/>
</submit_args>
<arg flag="--env" name='"CIMEROOT=$CIMEROOT"'/>
</submit_args>
</batch_system>

<batch_system type="lsf" version="9.1">
Expand Down Expand Up @@ -142,21 +143,21 @@
</batch_system>

<!-- babbage is PBS -->
<batch_system MACH="babbage" version="x.y">
<batch_system MACH="babbage" type="pbs">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>

<!-- babbageKnc is PBS -->
<batch_system MACH="babbageKnc" version="x.y">
<batch_system MACH="babbageKnc" type="pbs">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>

<!-- brutus is PBS -->
<batch_system type="pbs" MACH="brutus" version="x.y">
<batch_system type="pbs" MACH="brutus" >
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
Expand Down Expand Up @@ -186,25 +187,25 @@
</batch_system>

<!-- brutus is PBS -->
<batch_system MACH="brutus" version="x.y">
<batch_system MACH="brutus" type="pbs">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>

<!-- eos is PBS -->
<batch_system MACH="eos" version="x.y">
<!-- eos is PBS -->
<batch_system MACH="eos" type="pbs">
<jobid_pattern>^(\d+)</jobid_pattern>
<directives>
<directive>-A $PROJECT</directive>
<directive>-l mppwidth={{ mppwidth }}</directive>
<directive>-l nodes={{ num_nodes }}</directive>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>
</batch_system>

<!-- erebus is PBS -->
<batch_system MACH="erebus" version="x.y">
<batch_system MACH="erebus" type="pbs">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
Expand Down Expand Up @@ -232,14 +233,6 @@
</walltimes>
</batch_system>

<!-- goldbach is PBS -->
<batch_system MACH="goldbach" version="x.y">
<directives>
<directive>-l nodes={{ num_nodes }}:ppn={{ tasks_per_node }}</directive>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>

<!-- hobart is PBS -->
<batch_system type="pbs" MACH="hobart" version="x.y">
<directives>
Expand All @@ -253,28 +246,14 @@
</batch_system>

<!-- hera is SLURM -->
<batch_system MACH="hera">
<batch_system MACH="hera" type="slurm">
<batch_directive>#MSUB</batch_directive>
<directives>
<directive>-A ees</directive>
<directive>-l gres=lscratchd</directive>
</directives>
</batch_system>

<!-- hopper is PBS -->
<batch_system MACH="hopper" version="x.y">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>

<!-- janus is PBS -->
<batch_system MACH="janus" version="x.y">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
</batch_system>

<batch_system MACH="mira" type="cobalt">
<queues>
<queue waltimemin="0" walltimemax="360" jobmin="512" jobmax="4096" default="true">default</queue>
Expand Down Expand Up @@ -361,7 +340,7 @@
</batch_system>

<!-- all pleiades machines are PBS -->
<batch_system MACH="pleiades-wes" version="x.y">
<batch_system MACH="pleiades-wes" version="x.y" type="pbs">
<jobid_pattern>^(\S+)</jobid_pattern>
<directives>
<directive>-W group_list=$PROJECT </directive>
Expand All @@ -372,7 +351,7 @@
</batch_system>

<!-- sierra is SLURM -->
<batch_system MACH="sierra">
<batch_system MACH="sierra" type="slurm">
<batch_directive>#MSUB</batch_directive>
<directives>
<directive> </directive>
Expand Down Expand Up @@ -431,7 +410,7 @@
</batch_system>

<!-- titan is PBS -->
<batch_system MACH="titan" version="x.y">
<batch_system MACH="titan" type="pbs">
<directives>
<directive default="/bin/bash" > -S {{ shell }} </directive>
</directives>
Expand Down
9 changes: 4 additions & 5 deletions cime_config/cesm/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -789,18 +789,17 @@
<PES_PER_NODE>8</PES_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<mpirun mpilib="default">
<executable>/usr/bin/runjob</executable>
<executable>/usr/bin/runjob</executable>
<arguments>
<arg name="label"> --label short</arg>
<!-- Ranks per node!! -->
<arg name="tasks_per_node"> -p {{ tasks_per_node }}</arg>
<arg name="tasks_per_node"> --ranks-per-node $PES_PER_NODE</arg>
<!-- Total MPI Tasks -->
<arg name="num_tasks"> -n {{ num_tasks }}</arg>
<arg name="locargs"> $LOCARGS</arg>
<arg name="num_tasks"> --np $TOTALPES</arg>
<arg name="locargs">--block $COBALT_PARTNAME --envs OMP_WAIT_POLICY=active --envs BG_SMP_FAST_WAKEUP=yes $LOCARGS</arg>
<arg name="bg_threadlayout"> --envs BG_THREADLAYOUT=1</arg>
<arg name="omp_stacksize"> --envs OMP_STACKSIZE=32M</arg>
<arg name="thread_count"> --envs OMP_NUM_THREADS={{ thread_count }}</arg>
<arg name="colon">:</arg>
</arguments>
</mpirun>
<module_system type="soft">
Expand Down
8 changes: 6 additions & 2 deletions utils/python/CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,12 @@ def _check_for_memleak(self, cpllog):
originaldate = int(memlist[0][0])
finalmem = float(memlist[-1][1])
originalmem = float(memlist[0][1])
memdiff = (finalmem - originalmem)/originalmem
if memdiff < 0.1:
memdiff = -1
if originalmem > 0:
memdiff = (finalmem - originalmem)/originalmem
if memdiff < 0:
append_status("COMMENT: insuffiencient data for memleak test",sfile="TestStatus")
elif memdiff < 0.1:
append_status("PASS %s memleak"%(self._case.get_value("CASEBASEID")),
sfile="TestStatus")
else:
Expand Down
14 changes: 13 additions & 1 deletion utils/python/CIME/XML/env_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,12 @@ def get_submit_args(self, case, job):
for arg in submit_arg_nodes:
flag = arg.get("flag")
name = arg.get("name")
if self.batchtype == "cobalt" and job == "case.st_archive":
if flag == "-n":
name = 'task_count'
if flag == "--mode":
continue

if name is None:
submitargs+=" %s"%flag
else:
Expand Down Expand Up @@ -346,16 +352,18 @@ def submit_jobs(self, case, no_batch=False, job=None):
startindex = alljobs.index(job)

for index, job in enumerate(alljobs):
logger.debug( "Index %d job %s startindex %d"%(index, job, startindex))
if index < startindex:
continue
logger.debug( "Index %d job %s"%(index, job))
try:
prereq = case.get_resolved_value(self.get_value('prereq', subgroup=job))
prereq = eval(prereq)
except:
expect(False,"Unable to evaluate prereq expression '%s' for job '%s'"%(self.get_value('prereq',subgroup=job), job))
if prereq:
jobs.append((job,self.get_value('dependency', subgroup=job)))
if self.batchtype == "cobalt":
break
depid = {}
for job, dependency in jobs:
if dependency is not None:
Expand All @@ -379,9 +387,13 @@ def submit_jobs(self, case, no_batch=False, job=None):
if slen == 0:
jobid = None

logger.warn("job is %s"%job)
depid[job] = self.submit_single_job(case, job, jobid, no_batch=no_batch)
if self.batchtype == "cobalt":
break

def submit_single_job(self, case, job, depid=None, no_batch=False):
logger.warn("Submit job %s"%job)
caseroot = case.get_value("CASEROOT")
batch_system = self.get_value("BATCH_SYSTEM", subgroup=None)
if batch_system is None or batch_system == "none" or no_batch:
Expand Down
5 changes: 4 additions & 1 deletion utils/python/CIME/XML/machines.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,10 @@ def get_full_mpirun(self, check_members, case, job):
executable, args = self.get_mpirun(mpi_attribs, check_members, case, job)

mpi_arg_string = " ".join(args.values())

batch_system = self.get_value("BATCH_SYSTEM")
if batch_system == "cobalt":
mpi_arg_string += " : "

return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, default_run_suffix)

def print_values(self):
Expand Down
33 changes: 18 additions & 15 deletions utils/python/CIME/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from CIME.utils import get_model, append_status
from CIME.preview_namelists import preview_namelists
from CIME.check_input_data import check_input_data

import glob, shutil, time, threading, gzip
import glob, shutil, time, threading, gzip, subprocess

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -92,9 +91,12 @@ def build_model(build_threaded, exeroot, clm_config_opts, incroot, complist,
file_build = os.path.join(exeroot, "%s.bldlog.%s" % (cime_model, lid))

config_dir = os.path.join(cimeroot, "driver_cpl", "cime_config")
stat = run_cmd("%s/buildexe %s %s %s >> %s 2>&1" %
(config_dir, caseroot, bldroot, libroot, file_build),
from_dir=bldroot, verbose=True)[0]
f = open(file_build, "w")
stat = run_cmd("%s/buildexe %s %s %s" %
(config_dir, caseroot, bldroot, libroot),
from_dir=bldroot, verbose=True, arg_stdout=f,
arg_stderr=subprocess.STDOUT)[0]
f.close()

expect(stat == 0, "ERROR: buildexe failed, cat %s" % file_build)

Expand Down Expand Up @@ -490,14 +492,14 @@ def build_libraries(case, exeroot, caseroot, cimeroot, libroot, mpilib, lid, mac
os.makedirs(full_lib_path)

file_build = os.path.join(sharedpath, "%s.bldlog.%s" % (lib, lid))
my_file = os.path.join(os.path.dirname(machines_file), "buildlib.%s" % lib)
with open(file_build, "w") as fd:
fd.write("Current env:\n%s" % "\n".join([" %s = %s" % (env, os.environ[env]) for env in sorted(os.environ)]))

my_file = os.path.join(os.path.dirname(machines_file), "buildlib.%s" % lib)
stat = run_cmd("%s %s %s >> %s 2>&1" %
(my_file, sharedpath, caseroot, file_build),
from_dir=exeroot,
verbose=True)[0]
stat = run_cmd("%s %s %s" %
(my_file, sharedpath, caseroot),
from_dir=exeroot,
verbose=True, arg_stdout=fd,
arg_stderr=subprocess.STDOUT)[0]
expect(stat == 0, "ERROR: buildlib.%s failed, cat %s" % (lib, file_build))
logs.append(file_build)

Expand Down Expand Up @@ -525,10 +527,11 @@ def build_libraries(case, exeroot, caseroot, cimeroot, libroot, mpilib, lid, mac
def _build_model_thread(config_dir, compclass, caseroot, bldroot, libroot, incroot, file_build,
thread_bad_results):
###############################################################################

stat = run_cmd("%s/buildlib %s %s %s >> %s 2>&1" %
(config_dir, caseroot, bldroot, libroot, file_build),
from_dir=bldroot, verbose=True)[0]
with open(file_build, "w") as fd:
stat = run_cmd("%s/buildlib %s %s %s " %
(config_dir, caseroot, bldroot, libroot),
from_dir=bldroot, verbose=True, arg_stdout=fd,
arg_stderr=subprocess.STDOUT)[0]
if (stat != 0):
thread_bad_results.append("ERROR: %s.buildlib failed, see %s" % (compclass, file_build))

Expand Down
12 changes: 9 additions & 3 deletions utils/python/CIME/case_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,17 +355,22 @@ def resubmit_check(case):
# Note that Mira requires special logic

dout_s = case.get_value("DOUT_S")
logger.warn("dout_s %s "%(dout_s))
mach = case.get_value("MACH")
logger.warn("mach %s "%(mach))
testcase = case.get_value("TESTCASE")
resubmit_num = case.get_value("RESUBMIT")

logger.warn("resubmit_num %s"%(resubmit_num))
# If dout_s is True than short-term archiving handles the resubmit
# that is not the case on Mira
# If dout_s is True and machine is mira submit the st_archive script
resubmit = False
if not dout_s and resubmit_num > 0:
resubmit = True
elif dout_s and mach == 'mira':
resubmit = True
caseroot = case.get_value("CASEROOT")
cimeroot = case.get_value("CIMEROOT")
cmd = "ssh cooleylogin1 'cd %s; CIMEROOT=%s ./case.submit %s --job case.st_archive' "%(caseroot, cimeroot, caseroot)
run_cmd(cmd, verbose=True)

if resubmit:
if testcase is not None and testcase in ['ERR']:
Expand Down Expand Up @@ -412,6 +417,7 @@ def case_run(case):
if data_assimilation:
do_data_assimilation(data_assimilation_script, lid)

logger.warn("check for resubmit")
resubmit_check(case)

return True
9 changes: 8 additions & 1 deletion utils/python/CIME/case_st_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ def case_st_archive(case):
append_status("resubmitting from st_archive",
caseroot=caseroot, sfile="CaseStatus")
logger.info("resubmitting from st_archive, resubmit=%d"%resubmit)
submit(case, resubmit=True)
if case.get_value("MACH") == "mira":
expect(os.path.isfile(".original_host"), "ERROR alcf host file not found")
with open(".original_host", "r") as fd:
sshhost = fd.read()
run_cmd("ssh %s `cd %s; CIMEROOT=%s ./case.submit --resubmit' "\
%(sshhost, caseroot, case.get_value("CIMEROOT")))
else:
submit(case, resubmit=True)

return True
Loading

0 comments on commit 03520b1

Please sign in to comment.