From d342b296fbaa94776cbbe3df1db131b3e10edb54 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Thu, 17 Aug 2023 16:12:00 -0400
Subject: [PATCH] CI: Run test (and test.summary) locally

The test.summary rule was causing errors in our Gitlab testing due to
multiple runs (concurrent or otherwise) in the same workspace directory.
This patch removes the WORKSPACE directory variable, and each .testing
run happens in its own directory.

Other minor changes:

- The script to generate the summary was moved out of the Makefile and
  into a separate script.

- Unrelated to these changes, error output was extended from 20 to 40
  lines, to provide more readable backtrace output.
---
 .gitlab-ci.yml                        | 13 ++++-----
 .testing/Makefile                     | 33 +++++----------------
 .testing/tools/report_test_results.sh | 42 +++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 33 deletions(-)
 create mode 100755 .testing/tools/report_test_results.sh

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6be281c8cd..5bc90daca4 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -10,7 +10,6 @@ stages:
 # We use the "fetch" strategy to speed up the startup of stages
 variables:
   JOB_DIR: "/lustre/f2/scratch/oar.gfdl.ogrp-account/runner/builds/$CI_PIPELINE_ID"
-  WORKSPACE: "/lustre/f2/scratch/oar.gfdl.ogrp-account/runner/$CI_RUNNER_ID"
   GIT_STRATEGY: fetch
 
 # Always eport value of $JOB_DIR
@@ -185,9 +184,9 @@ actions:gnu:
     - make -s -j
     - MPIRUN= make preproc -s -j
     - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K"
-    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORKSPACE=$WORKSPACE test -s -j') > job.sh
-    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make WORKSPACE=$WORKSPACE test -s
-    - make WORKSPACE=$WORKSPACE test.summary
+    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh
+    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make test -s
+    - make test.summary
 
 actions:intel:
   stage: tests
@@ -205,9 +204,9 @@ actions:intel:
     - make -s -j
     - MPIRUN= make preproc -s -j
     - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K"
-    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORKSPACE=$WORKSPACE test -s -j') > job.sh
-    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make WORKSPACE=$WORKSPACE test -s
-    - make WORKSPACE=$WORKSPACE test.summary
+    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh
+    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make test -s
+    - make test.summary
 
 # Tests
 #
diff --git a/.testing/Makefile b/.testing/Makefile
index b877ecb5f2..d6b06893fe 100644
--- a/.testing/Makefile
+++ b/.testing/Makefile
@@ -554,8 +554,8 @@ $(WORKSPACE)/work/%/$(1)/ocean.stats $(WORKSPACE)/work/%/$(1)/chksum_diag: build
 	  && $(TIME) $(5) $(MPIRUN) -n $(6) $(abspath $$<) 2> std.err > std.out \
 	  || !( \
 	    mkdir -p ../../../results/$$*/ ; \
-	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 20 ; \
-	    cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 20 ; \
+	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 40 ; \
+	    cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 40 ; \
 	    rm ocean.stats chksum_diag ; \
 	    echo -e "$(FAIL): $$*.$(1) failed at runtime." \
 	  )
@@ -630,8 +630,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	# Run the first half-period
 	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std1.err > std1.out \
 	  || !( \
-	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 20 ; \
-	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 20 ; \
+	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 40 ; \
+	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 40 ; \
 	    echo -e "$(FAIL): $*.restart failed at runtime." \
 	  )
 	# Setup the next inputs
@@ -641,8 +641,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	# Run the second half-period
 	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std2.err > std2.out \
 	  || !( \
-	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 20 ; \
-	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 20 ; \
+	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 40 ; \
+	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 40 ; \
 	    echo -e "$(FAIL): $*.restart failed at runtime." \
 	  )
 
@@ -652,26 +652,7 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 # Not a true rule; only call this after `make test` to summarize test results.
 .PHONY: test.summary
 test.summary:
-	@if ls $(WORKSPACE)/results/*/* &> /dev/null; then \
-	  if ls $(WORKSPACE)/results/*/std.*.err &> /dev/null; then \
-	    echo "The following tests failed to complete:" ; \
-	    ls $(WORKSPACE)/results/*/std.*.out \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
-	  fi; \
-	  if ls $(WORKSPACE)/results/*/ocean.stats.*.diff &> /dev/null; then \
-	    echo "The following tests report solution regressions:" ; \
-	    ls $(WORKSPACE)/results/*/ocean.stats.*.diff \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[3]; if(length(t)>4) v=v"."t[4]; print a[2],":",v}'; \
-	  fi; \
-	  if ls $(WORKSPACE)/results/*/chksum_diag.*.diff &> /dev/null; then \
-	    echo "The following tests report diagnostic regressions:" ; \
-	    ls $(WORKSPACE)/results/*/chksum_diag.*.diff \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
-	  fi; \
-	  false ; \
-	else \
-	  echo -e "$(PASS): All tests passed!"; \
-	fi
+	@./tools/report_test_results.sh $(WORKSPACE)/results
 
 
 #---
diff --git a/.testing/tools/report_test_results.sh b/.testing/tools/report_test_results.sh
new file mode 100755
index 0000000000..24bab45507
--- /dev/null
+++ b/.testing/tools/report_test_results.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+RESULTS=${1:-${PWD}/results}
+
+GREEN="\033[0;32m"
+RESET="\033[0m"
+PASS="${GREEN}PASS${RESET}"
+
+if [ -d ${RESULTS} ]; then
+  if ls ${RESULTS}/*/std.*.err &> /dev/null; then
+    echo "The following tests failed to complete:"
+	ls ${RESULTS}/*/std.*.out \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[2]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  if ls ${RESULTS}/*/ocean.stats.*.diff &> /dev/null; then
+    echo "The following tests report solution regressions:"
+    ls ${RESULTS}/*/ocean.stats.*.diff \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[3]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  if ls ${RESULTS}/*/chksum_diag.*.diff &> /dev/null; then
+    echo "The following tests report diagnostic regressions:"
+    ls ${RESULTS}/*/chksum_diag.*.diff \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[2]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  exit 1
+else
+  printf "${PASS}: All tests passed!\n"
+fi