Skip to content

Commit

Permalink
Merge pull request #1317 from ESMCI/jgfouca/better_testlog_content
Browse files Browse the repository at this point in the history
Better content in TestStatus.log file.

    There is no reason to print a full stacktrace when the model
    fails. That just clutters the log file and provides no debugging value.
    In some cases, create_test was losing stderr output. Now we redirect
    stderr to stdout so it's never lost.
    In some cases, the TestStatus claimed the RUN phase both passed
    and failed. What actually happened was that the submit passed and the
    run failed. The situation is now more clear.

Test suite: scripts-regression-tests --fast
Test baseline:
Test namelist changes:
Test status: bit for bit

Fixes #1291

User interface changes?: Improved formatting/content of TestStatus.log

Code review: @jedwards4b
  • Loading branch information
jgfouca authored Apr 7, 2017
2 parents 7fb258b + ebcccaf commit cb7bd10
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 12 deletions.
8 changes: 7 additions & 1 deletion scripts/lib/CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,13 @@ def run(self):

except:
success = False
excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc())
msg = sys.exc_info()[1].message
if "RUN FAIL" in msg:
# Don't want to print stacktrace for a model failure since that
# is not a CIME/infrastructure problem.
excmsg = msg
else:
excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc())
logger.warning(excmsg)
append_testlog(excmsg)

Expand Down
Empty file modified scripts/lib/CIME/aprun.py
100755 → 100644
Empty file.
3 changes: 2 additions & 1 deletion scripts/lib/CIME/case_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _run_model_impl(case, lid):
while loop:
loop = False
stat = run_cmd(cmd, from_dir=rundir)[0]
model_logfile = os.path.join(rundir, model + ".log." + lid)
# Determine if failure was due to a failed node, if so, try to restart
if stat != 0:
node_fail_re = case.get_value("NODE_FAIL_REGEX")
Expand Down Expand Up @@ -130,7 +131,7 @@ def _run_model_impl(case, lid):

if not loop:
# We failed and we're not restarting
expect(False, "Command '%s' failed" % cmd)
expect(False, "RUN FAIL: Command '%s' failed\nSee log file for details: %s" % (cmd, model_logfile))

logger.info("%s MODEL EXECUTION HAS FINISHED" %(time.strftime("%Y-%m-%d %H:%M:%S")))

Expand Down
Empty file modified scripts/lib/CIME/case_st_archive.py
100755 → 100644
Empty file.
20 changes: 10 additions & 10 deletions scripts/lib/CIME/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,25 +325,25 @@ def _update_test_status(self, test, phase, status):
def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
###########################################################################
while True:
rc, output, errput = run_cmd(cmd, from_dir=from_dir)
rc, output, _ = run_cmd(cmd + " 2>&1", from_dir=from_dir)
if rc != 0:
self._log_output(test,
"%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
(phase, test, cmd, output, errput))
"%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n" %
(phase, test, cmd, output))
# Temporary hack to get around odd file descriptor use by
# buildnml scripts.
if "bad interpreter" in errput:
if "bad interpreter" in output:
time.sleep(1)
continue
else:
break
else:
# We don't want "RUN PASSED" in the TestStatus.log if the only thing that
# succeeded was the submission.
if phase != RUN_PHASE or self._no_batch:
self._log_output(test,
"%s PASSED for test '%s'.\nCommand: %s\nOutput: %s" %
(phase, test, cmd, output))
phase = "SUBMIT" if phase == RUN_PHASE else phase
self._log_output(test,
"%s PASSED for test '%s'.\nCommand: %s\nOutput: %s\n" %
(phase, test, cmd, output))
break

return rc == 0
Expand Down Expand Up @@ -545,8 +545,8 @@ def _setup_phase(self, test):
rv = self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)

# It's OK for this command to fail with baseline diffs but not catastrophically
cmdstat, output, errput = run_cmd("./case.cmpgen_namelists", from_dir=test_dir)
expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: %s" % (output + "\n" + errput))
cmdstat, output, _ = run_cmd("./case.cmpgen_namelists 2>&1", from_dir=test_dir)
expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: %s" % output)

return rv

Expand Down

0 comments on commit cb7bd10

Please sign in to comment.