Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Give users more info and control over key run commands #1359

Merged
merged 8 commits into from
Apr 27, 2017
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions scripts/Tools/preview_run
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python

"""
Script to query key CIME shell commands (mpirun and batch submission).

To force a certain mpirun command, use:
./xmlchange MPI_RUN_COMMAND $your_cmd

To force a certain qsub command, use:
./xmlchange --subgroup=case.run BATCH_COMMAND $your_cmd
"""

from standard_script_setup import *

from CIME.case import Case

###############################################################################
def parse_command_line(args, description):
###############################################################################
parser = argparse.ArgumentParser(
usage="""\n%s [--verbose]
OR
%s --help
OR
%s --test

\033[1mEXAMPLES:\033[0m
\033[1;32m# Run the tool \033[0m
> %s
""" % ((os.path.basename(args[0]), ) * 4),
description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument("caseroot", nargs="?", default=os.getcwd(),
help="Case directory to build")

args = parser.parse_args(args[1:])

return args.caseroot

###############################################################################
def _main_func(description):
###############################################################################
if "--test" in sys.argv:
test_results = doctest.testmod(verbose=True)
sys.exit(1 if test_results.failed > 0 else 0)

caseroot = parse_command_line(sys.argv, description)

logging.disable(logging.CRITICAL)

with Case(caseroot, read_only=False) as case:
print "BATCH SUBMIT:"
job = "case.test" if case.get_value("TEST") else "case.run"
job_id_to_cmd = case.submit_jobs(dry_run=True, job=job)
for job_id, cmd in job_id_to_cmd:
print " ", job_id, "->", case.get_resolved_value(cmd)
print
print "MPIRUN:", case.get_resolved_value(case.get_mpirun_cmd())

if __name__ == "__main__":
_main_func(__doc__)
54 changes: 33 additions & 21 deletions scripts/lib/CIME/XML/env_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def get_submit_args(self, case, job):

return submitargs

def submit_jobs(self, case, no_batch=False, job=None, batch_args=None):
def submit_jobs(self, case, no_batch=False, job=None, batch_args=None, dry_run=False):
alljobs = self.get_jobs()
startindex = 0
jobs = []
Expand All @@ -299,16 +299,23 @@ def submit_jobs(self, case, no_batch=False, job=None, batch_args=None):
if prereq is None or job == firstjob:
prereq = True
else:
if dry_run:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just add dry_run to the condition on line 299?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dry_run is the only case where we want to fake-out the BUILD_COMPLETE prereq.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see what you mean. We want to preserve the other prereqs besides the BUILD_COMPLETE one so that we get an accurate list of which jobs will be run.

# Assume build is complete
prereq = prereq.replace("$BUILD_COMPLETE", "True")
prereq = case.get_resolved_value(prereq)
prereq = eval(prereq)
except:
expect(False,"Unable to evaluate prereq expression '%s' for job '%s'"%(self.get_value('prereq',subgroup=job), job))

if prereq:
jobs.append((job,self.get_value('dependency', subgroup=job)))
jobs.append((job, self.get_value('dependency', subgroup=job)))

if self.batchtype == "cobalt":
break

depid = {}
jobcmds = []

for job, dependency in jobs:
if dependency is not None:
deps = dependency.split()
Expand All @@ -331,16 +338,21 @@ def submit_jobs(self, case, no_batch=False, job=None, batch_args=None):
if slen == 0:
jobid = None

logger.warn("job is %s"%job)
depid[job] = self.submit_single_job(case, job, jobid, no_batch=no_batch, batch_args=batch_args)
logger.warn("job is %s" % job)
result = self._submit_single_job(case, job, jobid, no_batch=no_batch, batch_args=batch_args, dry_run=dry_run)
batch_job_id = str(alljobs.index(job)) if dry_run else result
depid[job] = batch_job_id
jobcmds.append( (job, result) )
if self.batchtype == "cobalt":
break

return sorted(list(depid.values()))
if dry_run:
return jobcmds
else:
return sorted(list(depid.values()))

def submit_single_job(self, case, job, depid=None, no_batch=False, batch_args=None):
def _submit_single_job(self, case, job, depid=None, no_batch=False, batch_args=None, dry_run=False):
logger.warn("Submit job %s"%job)
caseroot = case.get_value("CASEROOT")
batch_system = self.get_value("BATCH_SYSTEM", subgroup=None)
if batch_system is None or batch_system == "none" or no_batch:
# Import here to avoid circular include
Expand All @@ -351,20 +363,17 @@ def submit_single_job(self, case, job, depid=None, no_batch=False, batch_args=No

logger.info("Starting job script %s" % job)

# Hack until all testcases are ported to python
testcase = case.get_value("TESTCASE")
cimeroot = get_cime_root()
testscript = os.path.join(cimeroot, "scripts", "Testing", "Testcases", "%s_script" % testcase)
if job == "case.test" and testcase is not None and os.path.exists(testscript):
run_cmd_no_fail("%s --caseroot %s" % (os.path.join(".", job), caseroot))
else:
# This is what we want longterm
function_name = job.replace(".", "_")
if not dry_run:
function_name = job.replace(".", "_")
locals()[function_name](case)

return

submitargs = self.get_submit_args(case, job)
args_override = self.get_value("BATCH_COMMAND", subgroup=job)
if args_override:
submitargs = args_override

if depid is not None:
dep_string = self.get_value("depend_string", subgroup=None)
Expand All @@ -383,11 +392,14 @@ def submit_single_job(self, case, job, depid=None, no_batch=False, batch_args=No
if string is not None:
submitcmd += string + " "

logger.info("Submitting job script %s"%submitcmd)
output = run_cmd_no_fail(submitcmd, combine_output=True)
jobid = self.get_job_id(output)
logger.info("Submitted job id is %s"%jobid)
return jobid
if dry_run:
return submitcmd
else:
logger.info("Submitting job script %s"%submitcmd)
output = run_cmd_no_fail(submitcmd, combine_output=True)
jobid = self.get_job_id(output)
logger.info("Submitted job id is %s"%jobid)
return jobid

def get_batch_system_type(self):
nodes = self.get_nodes("batch_system")
Expand Down Expand Up @@ -445,7 +457,7 @@ def get_all_queues(self):

def get_nodes(self, nodename, attributes=None, root=None, xpath=None):
if nodename in ("JOB_WALLCLOCK_TIME", "PROJECT", "PROJECT_REQUIRED",
"JOB_QUEUE"):
"JOB_QUEUE", "BATCH_COMMAND"):
nodes = EnvBase.get_nodes(self, "entry", attributes={"id":nodename},
root=root, xpath=xpath)
else:
Expand Down
46 changes: 26 additions & 20 deletions scripts/lib/CIME/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,7 @@ def _create_caseroot_tools(self):
os.path.join(toolsdir, "lt_archive.sh"),
os.path.join(toolsdir, "getTiming"),
os.path.join(toolsdir, "save_provenance"),
os.path.join(toolsdir, "preview_run"),
os.path.join(machines_dir,"Makefile"),
os.path.join(machines_dir,"mkSrcfiles"),
os.path.join(machines_dir,"mkDepends")]
Expand Down Expand Up @@ -1062,9 +1063,9 @@ def create_clone(self, newcase, keepexe=False, mach_dir=None, project=None, cime

return newcase

def submit_jobs(self, no_batch=False, job=None, batch_args=None):
def submit_jobs(self, no_batch=False, job=None, batch_args=None, dry_run=False):
env_batch = self.get_env('batch')
return env_batch.submit_jobs(self, no_batch=no_batch, job=job, batch_args=batch_args)
return env_batch.submit_jobs(self, no_batch=no_batch, job=job, batch_args=batch_args, dry_run=dry_run)

def get_mpirun_cmd(self, job="case.run"):
env_mach_specific = self.get_env('mach_specific')
Expand All @@ -1073,28 +1074,33 @@ def get_mpirun_cmd(self, job="case.run"):
run_misc_suffix = "" if run_misc_suffix is None else run_misc_suffix
run_suffix = run_exe + run_misc_suffix

# Things that will have to be matched against mpirun element attributes
mpi_attribs = {
"compiler" : self.get_value("COMPILER"),
"mpilib" : self.get_value("MPILIB"),
"threaded" : self.get_build_threaded(),
"unit_testing" : False
}

executable, args = env_mach_specific.get_mpirun(self, mpi_attribs, job=job)
mpirun_cmd_override = self.get_value("MPI_RUN_COMMAND")

# special case for aprun
if executable == "aprun":
aprun_cmd, num_nodes = get_aprun_cmd_for_case(self, run_exe)
expect(num_nodes == self.num_nodes, "Not using optimized num nodes")
return aprun_cmd + " " + run_misc_suffix
if mpirun_cmd_override not in ["", None, "UNSET"]:
return mpirun_cmd_override + " " + run_exe + " " + run_misc_suffix
else:
mpi_arg_string = " ".join(args.values())
# Things that will have to be matched against mpirun element attributes
mpi_attribs = {
"compiler" : self.get_value("COMPILER"),
"mpilib" : self.get_value("MPILIB"),
"threaded" : self.get_build_threaded(),
"unit_testing" : False
}

executable, args = env_mach_specific.get_mpirun(self, mpi_attribs, job=job)

# special case for aprun
if executable == "aprun":
aprun_cmd, num_nodes = get_aprun_cmd_for_case(self, run_exe)
expect(num_nodes == self.num_nodes, "Not using optimized num nodes")
return aprun_cmd + " " + run_misc_suffix
else:
mpi_arg_string = " ".join(args.values())

if self.get_value("BATCH_SYSTEM") == "cobalt":
mpi_arg_string += " : "
if self.get_value("BATCH_SYSTEM") == "cobalt":
mpi_arg_string += " : "

return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)
return "%s %s %s" % (executable if executable is not None else "", mpi_arg_string, run_suffix)

def set_model_version(self, model):
version = "unknown"
Expand Down
13 changes: 1 addition & 12 deletions scripts/lib/CIME/case_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,7 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False,
input_batch_script = os.path.join(case.get_value("MACHDIR"), env_batch.get_value('template', subgroup=job))
if job == "case.test" and testcase is not None and not test_mode:
logger.info("Writing %s script" % job)
testscript = os.path.join(cimeroot, "scripts", "Testing", "Testcases", "%s_script" % testcase)
# Short term fix to be removed when csh tests are removed
if not os.path.exists(testscript):
env_batch.make_batch_script(input_batch_script, job, case, pestot, tasks_per_node, num_nodes, thread_count)
env_batch.make_batch_script(input_batch_script, job, case, pestot, tasks_per_node, num_nodes, thread_count)
elif job != "case.test":
logger.info("Writing %s script from input template %s" % (job, input_batch_script))
env_batch.make_batch_script(input_batch_script, job, case, pestot, tasks_per_node, num_nodes, thread_count)
Expand Down Expand Up @@ -203,14 +200,6 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False,

logger.info("If an old case build already exists, might want to run \'case.build --clean\' before building")

# Create test script if appropriate
# Short term fix to be removed when csh tests are removed
if os.path.exists("env_test.xml"):
if not os.path.exists("case.test"):
logger.info("Starting testcase.setup")
run_cmd_no_fail("./testcase.setup -caseroot %s" % caseroot)
logger.info("Finished testcase.setup")

# Some tests need namelists created here (ERP) - so do this if are in test mode
if test_mode or get_model() == "acme":
logger.info("Generating component namelists as part of setup")
Expand Down
4 changes: 2 additions & 2 deletions scripts/lib/CIME/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def run_cmd_no_fail(cmd, input_str=None, from_dir=None, verbose=None,
if stat != 0:
# If command produced no errput, put output in the exception since we
# have nothing else to go on.
errput = output if errput == "" else errput
errput = output if not errput else errput
expect(False, "Command: '%s' failed with error '%s'%s" %
(cmd, errput, "" if from_dir is None else " from dir '%s'" % from_dir))

Expand Down Expand Up @@ -1172,7 +1172,7 @@ def run_and_log_case_status(func, phase, caseroot='.'):
try:
rv = func()
except:
e = sys.exc_info()[0]
e = sys.exc_info()[1]
append_case_status(phase, "error", msg=("\n%s" % e), caseroot=caseroot)
raise
else:
Expand Down
11 changes: 10 additions & 1 deletion src/drivers/mct/cime_config/config_component.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1899,7 +1899,7 @@
<default_value>UNSET</default_value>
<group>run_mpi</group>
<file>env_run.xml</file>
<desc>mpi run command</desc>
<desc>override the mpi run command, do not include model executable</desc>
</entry>

<!-- ===================================================================== -->
Expand Down Expand Up @@ -2602,6 +2602,15 @@
<desc>The machine wallclock setting. Default determined in config_machines.xml can be overwritten by testing</desc>
</entry>

<entry id="BATCH_COMMAND">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename to BATCH_COMMAND_FLAGS

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

<type>char</type>
<valid_values></valid_values>
<default_value></default_value>
<group>job_submission</group>
<file>env_batch.xml</file>
<desc>Override the batch submit command this job. Do not include executable or dependencies</desc>
</entry>

<entry id="PROJECT">
<type>char</type>
<default_value></default_value>
Expand Down