From d342b296fbaa94776cbbe3df1db131b3e10edb54 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Thu, 17 Aug 2023 16:12:00 -0400 Subject: [PATCH] CI: Run test (and test.summary) locally The test.summary rule was causing errors in our Gitlab testing due to multiple runs (concurrent or otherwise) in the same workspace directory. This patch removes the WORKSPACE directory variable, and each .testing run happens in its own directory. Other minor changes: - The script to generate the summary was moved out of the Makefile and into a separate script. - Unrelated to these changes, error output was extended from 20 to 40 lines, to provide more readable backtrace output. --- .gitlab-ci.yml | 13 ++++----- .testing/Makefile | 33 +++++---------------- .testing/tools/report_test_results.sh | 42 +++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 33 deletions(-) create mode 100755 .testing/tools/report_test_results.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6be281c8cd..5bc90daca4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,7 +10,6 @@ stages: # We use the "fetch" strategy to speed up the startup of stages variables: JOB_DIR: "/lustre/f2/scratch/oar.gfdl.ogrp-account/runner/builds/$CI_PIPELINE_ID" - WORKSPACE: "/lustre/f2/scratch/oar.gfdl.ogrp-account/runner/$CI_RUNNER_ID" GIT_STRATEGY: fetch # Always eport value of $JOB_DIR @@ -185,9 +184,9 @@ actions:gnu: - make -s -j - MPIRUN= make preproc -s -j - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K" - - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORKSPACE=$WORKSPACE test -s -j') > job.sh - - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make WORKSPACE=$WORKSPACE test -s - - make WORKSPACE=$WORKSPACE test.summary + - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh + - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make test -s + - make test.summary actions:intel: stage: tests @@ -205,9 +204,9 @@ actions:intel: - make -s -j - MPIRUN= make preproc -s -j - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K" - - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORKSPACE=$WORKSPACE test -s -j') > job.sh - - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make WORKSPACE=$WORKSPACE test -s - - make WORKSPACE=$WORKSPACE test.summary + - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh + - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make test -s + - make test.summary # Tests # diff --git a/.testing/Makefile b/.testing/Makefile index b877ecb5f2..d6b06893fe 100644 --- a/.testing/Makefile +++ b/.testing/Makefile @@ -554,8 +554,8 @@ $(WORKSPACE)/work/%/$(1)/ocean.stats $(WORKSPACE)/work/%/$(1)/chksum_diag: build && $(TIME) $(5) $(MPIRUN) -n $(6) $(abspath $$<) 2> std.err > std.out \ || !( \ mkdir -p ../../../results/$$*/ ; \ - cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 20 ; \ - cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 20 ; \ + cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 40 ; \ + cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 40 ; \ rm ocean.stats chksum_diag ; \ echo -e "$(FAIL): $$*.$(1) failed at runtime." \ ) @@ -630,8 +630,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc # Run the first half-period cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std1.err > std1.out \ || !( \ - cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 20 ; \ - cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 20 ; \ + cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 40 ; \ + cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 40 ; \ echo -e "$(FAIL): $*.restart failed at runtime." \ ) # Setup the next inputs @@ -641,8 +641,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc # Run the second half-period cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std2.err > std2.out \ || !( \ - cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 20 ; \ - cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 20 ; \ + cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 40 ; \ + cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 40 ; \ echo -e "$(FAIL): $*.restart failed at runtime." \ ) @@ -652,26 +652,7 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc # Not a true rule; only call this after `make test` to summarize test results. .PHONY: test.summary test.summary: - @if ls $(WORKSPACE)/results/*/* &> /dev/null; then \ - if ls $(WORKSPACE)/results/*/std.*.err &> /dev/null; then \ - echo "The following tests failed to complete:" ; \ - ls $(WORKSPACE)/results/*/std.*.out \ - | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \ - fi; \ - if ls $(WORKSPACE)/results/*/ocean.stats.*.diff &> /dev/null; then \ - echo "The following tests report solution regressions:" ; \ - ls $(WORKSPACE)/results/*/ocean.stats.*.diff \ - | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[3]; if(length(t)>4) v=v"."t[4]; print a[2],":",v}'; \ - fi; \ - if ls $(WORKSPACE)/results/*/chksum_diag.*.diff &> /dev/null; then \ - echo "The following tests report diagnostic regressions:" ; \ - ls $(WORKSPACE)/results/*/chksum_diag.*.diff \ - | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \ - fi; \ - false ; \ - else \ - echo -e "$(PASS): All tests passed!"; \ - fi + @./tools/report_test_results.sh $(WORKSPACE)/results #--- diff --git a/.testing/tools/report_test_results.sh b/.testing/tools/report_test_results.sh new file mode 100755 index 0000000000..24bab45507 --- /dev/null +++ b/.testing/tools/report_test_results.sh @@ -0,0 +1,42 @@ +#!/bin/sh +RESULTS=${1:-${PWD}/results} + +GREEN="\033[0;32m" +RESET="\033[0m" +PASS="${GREEN}PASS${RESET}" + +if [ -d ${RESULTS} ]; then + if ls ${RESULTS}/*/std.*.err &> /dev/null; then + echo "The following tests failed to complete:" + ls ${RESULTS}/*/std.*.out \ + | awk '{ \ + split($$0,a,"/"); \ + split(a[length(a)],t,"."); \ + v=t[2]; \ + if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}' + fi + + if ls ${RESULTS}/*/ocean.stats.*.diff &> /dev/null; then + echo "The following tests report solution regressions:" + ls ${RESULTS}/*/ocean.stats.*.diff \ + | awk '{ \ + split($$0,a,"/"); \ + split(a[length(a)],t,"."); \ + v=t[3]; \ + if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}' + fi + + if ls ${RESULTS}/*/chksum_diag.*.diff &> /dev/null; then + echo "The following tests report diagnostic regressions:" + ls ${RESULTS}/*/chksum_diag.*.diff \ + | awk '{ \ + split($$0,a,"/"); \ + split(a[length(a)],t,"."); \ + v=t[2]; \ + if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}' + fi + + exit 1 +else + printf "${PASS}: All tests passed!\n" +fi