Merge pull request #1317 from ESMCI/jgfouca/better_testlog_content

Better content in TestStatus.log file. There is no reason to print a full stacktrace when the model fails. That just clutters the log file and provides no debugging value. In some cases, create_test was losing stderr output. Now we redirect stderr to stdout so it's never lost. In some cases, the TestStatus claimed the RUN phase both passed and failed. What actually happened was that the submit passed and the run failed. The situation is now more clear. Test suite: scripts-regression-tests --fast Test baseline: Test namelist changes: Test status: bit for bit Fixes #1291 User interface changes?: Improved formatting/content of TestStatus.log Code review: @jedwards4b
E3SM-Project · Apr 7, 2017 · cb7bd10 · cb7bd10
2 parents 7fb258b + ebcccaf
commit cb7bd10
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 12 deletions.
diff --git a/scripts/lib/CIME/SystemTests/system_tests_common.py b/scripts/lib/CIME/SystemTests/system_tests_common.py
@@ -146,7 +146,13 @@ def run(self):
 
         except:
             success = False
-            excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc())
+            msg = sys.exc_info()[1].message
+            if "RUN FAIL" in msg:
+                # Don't want to print stacktrace for a model failure since that
+                # is not a CIME/infrastructure problem.
+                excmsg = msg
+            else:
+                excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc())
             logger.warning(excmsg)
             append_testlog(excmsg)
 

diff --git a/scripts/lib/CIME/aprun.py b/scripts/lib/CIME/aprun.py
diff --git a/scripts/lib/CIME/case_run.py b/scripts/lib/CIME/case_run.py
@@ -102,6 +102,7 @@ def _run_model_impl(case, lid):
     while loop:
         loop = False
         stat = run_cmd(cmd, from_dir=rundir)[0]
+        model_logfile = os.path.join(rundir, model + ".log." + lid)
         # Determine if failure was due to a failed node, if so, try to restart
         if stat != 0:
             node_fail_re = case.get_value("NODE_FAIL_REGEX")
@@ -130,7 +131,7 @@ def _run_model_impl(case, lid):
 
             if not loop:
                 # We failed and we're not restarting
-                expect(False, "Command '%s' failed" % cmd)
+                expect(False, "RUN FAIL: Command '%s' failed\nSee log file for details: %s" % (cmd, model_logfile))
 
     logger.info("%s MODEL EXECUTION HAS FINISHED" %(time.strftime("%Y-%m-%d %H:%M:%S")))
 

diff --git a/scripts/lib/CIME/case_st_archive.py b/scripts/lib/CIME/case_st_archive.py
diff --git a/scripts/lib/CIME/test_scheduler.py b/scripts/lib/CIME/test_scheduler.py
@@ -325,25 +325,25 @@ def _update_test_status(self, test, phase, status):
     def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
     ###########################################################################
         while True:
-            rc, output, errput = run_cmd(cmd, from_dir=from_dir)
+            rc, output, _ = run_cmd(cmd + " 2>&1", from_dir=from_dir)
             if rc != 0:
                 self._log_output(test,
-                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
-                                 (phase, test, cmd, output, errput))
+                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n" %
+                                 (phase, test, cmd, output))
                 # Temporary hack to get around odd file descriptor use by
                 # buildnml scripts.
-                if "bad interpreter" in errput:
+                if "bad interpreter" in output:
                     time.sleep(1)
                     continue
                 else:
                     break
             else:
                 # We don't want "RUN PASSED" in the TestStatus.log if the only thing that
                 # succeeded was the submission.
-                if phase != RUN_PHASE or self._no_batch:
-                    self._log_output(test,
-                                     "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s" %
-                                     (phase, test, cmd, output))
+                phase = "SUBMIT" if phase == RUN_PHASE else phase
+                self._log_output(test,
+                                 "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s\n" %
+                                 (phase, test, cmd, output))
                 break
 
         return rc == 0
@@ -545,8 +545,8 @@ def _setup_phase(self, test):
         rv = self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)
 
         # It's OK for this command to fail with baseline diffs but not catastrophically
-        cmdstat, output, errput = run_cmd("./case.cmpgen_namelists", from_dir=test_dir)
-        expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: %s" % (output + "\n" + errput))
+        cmdstat, output, _ = run_cmd("./case.cmpgen_namelists 2>&1", from_dir=test_dir)
+        expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: %s" % output)
 
         return rv