From ebcccaf8f9c4d8a17e3af927cd3085762f75b7eb Mon Sep 17 00:00:00 2001
From: James Foucar <jgfouca@sandia.gov>
Date: Thu, 6 Apr 2017 16:58:31 -0600
Subject: [PATCH] Better content in TestStatus.log file.

1) There is no reason to print a full stacktrace when the model
fails. That just clutters the log file and provides no debugging value.
2) In some cases, create_test was losing stderr output. Now we redirect
stderr to stdout so it's never lost.
3) In some cases, the TestStatus claimed the RUN phase both passed
and failed. What actually happened was that the submit passed and the
run failed. The situation is now more clear.
---
 .../CIME/SystemTests/system_tests_common.py   |  8 +++++++-
 scripts/lib/CIME/aprun.py                     |  0
 scripts/lib/CIME/case_run.py                  |  3 ++-
 scripts/lib/CIME/case_st_archive.py           |  0
 scripts/lib/CIME/test_scheduler.py            | 20 +++++++++----------
 5 files changed, 19 insertions(+), 12 deletions(-)
 mode change 100755 => 100644 scripts/lib/CIME/aprun.py
 mode change 100755 => 100644 scripts/lib/CIME/case_st_archive.py

diff --git a/scripts/lib/CIME/SystemTests/system_tests_common.py b/scripts/lib/CIME/SystemTests/system_tests_common.py
index 051f3f1f5dd4..d0eba6383a58 100644
--- a/scripts/lib/CIME/SystemTests/system_tests_common.py
+++ b/scripts/lib/CIME/SystemTests/system_tests_common.py
@@ -146,7 +146,13 @@ def run(self):
 
         except:
             success = False
-            excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc())
+            msg = sys.exc_info()[1].message
+            if "RUN FAIL" in msg:
+                # Don't want to print stacktrace for a model failure since that
+                # is not a CIME/infrastructure problem.
+                excmsg = msg
+            else:
+                excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc())
             logger.warning(excmsg)
             append_testlog(excmsg)
 
diff --git a/scripts/lib/CIME/aprun.py b/scripts/lib/CIME/aprun.py
old mode 100755
new mode 100644
diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py
index 16a787e379f0..0b4728b2f466 100644
--- a/scripts/lib/CIME/case_run.py
+++ b/scripts/lib/CIME/case_run.py
@@ -102,6 +102,7 @@ def _run_model_impl(case, lid):
     while loop:
         loop = False
         stat = run_cmd(cmd, from_dir=rundir)[0]
+        model_logfile = os.path.join(rundir, model + ".log." + lid)
         # Determine if failure was due to a failed node, if so, try to restart
         if stat != 0:
             node_fail_re = case.get_value("NODE_FAIL_REGEX")
@@ -130,7 +131,7 @@ def _run_model_impl(case, lid):
 
             if not loop:
                 # We failed and we're not restarting
-                expect(False, "Command '%s' failed" % cmd)
+                expect(False, "RUN FAIL: Command '%s' failed\nSee log file for details: %s" % (cmd, model_logfile))
 
     logger.info("%s MODEL EXECUTION HAS FINISHED" %(time.strftime("%Y-%m-%d %H:%M:%S")))
 
diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py
old mode 100755
new mode 100644
diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py
index 5f472806cf2d..b0b4c2316267 100644
--- a/scripts/lib/CIME/test_scheduler.py
+++ b/scripts/lib/CIME/test_scheduler.py
@@ -325,14 +325,14 @@ def _update_test_status(self, test, phase, status):
     def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
     ###########################################################################
         while True:
-            rc, output, errput = run_cmd(cmd, from_dir=from_dir)
+            rc, output, _ = run_cmd(cmd + " 2>&1", from_dir=from_dir)
             if rc != 0:
                 self._log_output(test,
-                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
-                                 (phase, test, cmd, output, errput))
+                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n" %
+                                 (phase, test, cmd, output))
                 # Temporary hack to get around odd file descriptor use by
                 # buildnml scripts.
-                if "bad interpreter" in errput:
+                if "bad interpreter" in output:
                     time.sleep(1)
                     continue
                 else:
@@ -340,10 +340,10 @@ def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
             else:
                 # We don't want "RUN PASSED" in the TestStatus.log if the only thing that
                 # succeeded was the submission.
-                if phase != RUN_PHASE or self._no_batch:
-                    self._log_output(test,
-                                     "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s" %
-                                     (phase, test, cmd, output))
+                phase = "SUBMIT" if phase == RUN_PHASE else phase
+                self._log_output(test,
+                                 "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s\n" %
+                                 (phase, test, cmd, output))
                 break
 
         return rc == 0
@@ -545,8 +545,8 @@ def _setup_phase(self, test):
         rv = self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)
 
         # It's OK for this command to fail with baseline diffs but not catastrophically
-        cmdstat, output, errput = run_cmd("./case.cmpgen_namelists", from_dir=test_dir)
-        expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: %s" % (output + "\n" + errput))
+        cmdstat, output, _ = run_cmd("./case.cmpgen_namelists 2>&1", from_dir=test_dir)
+        expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: %s" % output)
 
         return rv