nextstrain · corneliusroemer · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -84,7 +84,7 @@ jobs:
     - run: conda info
     - run: conda list
     - run: pytest --cov=augur
-    - run: cram tests/
+    - run: ./parallel_cram.sh tests/
       env:
         AUGUR: coverage run -a ${{ github.workspace }}/bin/augur
     # Only upload coverage for one job

diff --git a/CHANGES.md b/CHANGES.md
@@ -9,9 +9,11 @@
 
 ### Bug Fixes
 
+* tests: Add `./parallel-cram.sh` wrapper that allows running cram tests in parallel. This allows nearly 2x speedup in CI and 5x or more speedup on developer machines with many cores. Total CI runner minutes are reduced by around a third. [#1667][] (@corneliusroemer)
 * index: Previously specifying a directory that does not exist in the path to `--output` would result in an incorrect error stating that the input file does not exist. It now shows the correct path responsible for the error. [#1644][] (@victorlin)
 * curate format-dates: Update help docs and improve failure messages to show use of `--expected-date-formats`. [#1653][] (@joverlee521)
 
+[#1667]: https://github.com/nextstrain/augur/pull/1667
 [#1644]: https://github.com/nextstrain/augur/issues/1644
 [#1653]: https://github.com/nextstrain/augur/pull/1653
 [#1656]: https://github.com/nextstrain/augur/pull/1656

diff --git a/parallel_cram.sh b/parallel_cram.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+# Default values
+QUIET=0
+VERBOSE=0
+KEEP_TMPDIR=0
+SHELL_PATH="/bin/bash"
+SHELL_OPTS=""
+INDENT=2
+TEST_DIR="tests"
+PRIORITY_PATTERNS="merge|tree"  # Slow tests that should start first
+# Get default number of processors
+NCPU=$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)
+PARALLEL_JOBS=$NCPU  # Default to CPU count if not specified
+
+# Function to show usage
+usage() {
+    cat << EOF
+Usage: $(basename "$0") [OPTIONS] [TESTS...]
+Wrapper script to run cram tests in parallel.
+Options:
+  -h, --help          show this help message and exit
+  -q, --quiet         don't print diffs
+  -v, --verbose       show filenames and test status
+  -j, --jobs=N        number of tests to run in parallel (default: number of CPUs)
+  --keep-tmpdir       keep temporary directories
+  --shell=PATH        shell to use for running tests (default: /bin/bash)
+  --shell-opts=OPTS   arguments to invoke shell with
+  --indent=NUM        number of spaces to use for indentation (default: 2)
+EOF
+    exit 1
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            usage
+            ;;
+        -q|--quiet)
+            QUIET=1
+            shift
+            ;;
+        -v|--verbose)
+            VERBOSE=1
+            shift
+            ;;
+        -j|--jobs=*)
+            if [[ "$1" == *"="* ]]; then
+                PARALLEL_JOBS="${1#*=}"
+            else
+                shift
+                PARALLEL_JOBS="$1"
+            fi
+            if ! [[ "$PARALLEL_JOBS" =~ ^[0-9]+$ ]]; then
+                echo "Error: -j/--jobs requires a numeric argument"
+                exit 1
+            fi
+            if [ "$PARALLEL_JOBS" -lt 1 ]; then
+                echo "Error: Number of jobs must be at least 1"
+                exit 1
+            fi
+            shift
+            ;;
+        -j*)  # Handle -j4 format
+            PARALLEL_JOBS="${1#-j}"
+            if ! [[ "$PARALLEL_JOBS" =~ ^[0-9]+$ ]]; then
+                echo "Error: -j requires a numeric argument"
+                exit 1
+            fi
+            if [ "$PARALLEL_JOBS" -lt 1 ]; then
+                echo "Error: Number of jobs must be at least 1"
+                exit 1
+            fi
+            shift
+            ;;
+        --keep-tmpdir)
+            KEEP_TMPDIR=1
+            shift
+            ;;
+        --shell=*)
+            SHELL_PATH="${1#*=}"
+            shift
+            ;;
+        --shell-opts=*)
+            SHELL_OPTS="${1#*=}"
+            shift
+            ;;
+        --indent=*)
+            INDENT="${1#*=}"
+            shift
+            ;;
+        -*)
+            echo "Unknown option: $1"
+            usage
+            ;;
+        *)
+            TEST_DIR="$1"
+            shift
+            ;;
+    esac
+done
+
+# Validate test directory
+if [ ! -d "$TEST_DIR" ]; then
+    echo "Error: Directory $TEST_DIR does not exist"
+    exit 1
+fi
+
+# Build cram command with options
+CRAM_CMD="cram"
+[ $QUIET -eq 1 ] && CRAM_CMD="$CRAM_CMD -q"
+[ $VERBOSE -eq 1 ] && CRAM_CMD="$CRAM_CMD -v"
+[ $KEEP_TMPDIR -eq 1 ] && CRAM_CMD="$CRAM_CMD --keep-tmpdir"
+[ -n "$SHELL_PATH" ] && CRAM_CMD="$CRAM_CMD --shell=$SHELL_PATH"
+[ -n "$SHELL_OPTS" ] && CRAM_CMD="$CRAM_CMD --shell-opts=$SHELL_OPTS"
+[ -n "$INDENT" ] && CRAM_CMD="$CRAM_CMD --indent=$INDENT"
+
+# Find all .t files and order with priority files first
+TEST_FILES=$(find "$TEST_DIR" -name "*.t" | sort)
+PRIORITY_FILES=$(echo "$TEST_FILES" | grep -E "$PRIORITY_PATTERNS" || true)
+OTHER_FILES=$(echo "$TEST_FILES" | grep -vE "$PRIORITY_PATTERNS" || true)
+ALL_FILES=$(echo -e "${PRIORITY_FILES}\n${OTHER_FILES}")
+
+# Create a lock directory for synchronized output
+LOCK_DIR="/tmp/cram_lock_$$"
+mkdir "$LOCK_DIR"
+trap 'rm -rf "$LOCK_DIR"' EXIT
+
+# Create files to store temporary directories and test results
+TMPDIR_FILE="$LOCK_DIR/tmpdirs"
+RESULTS_FILE="$LOCK_DIR/results"
+touch "$TMPDIR_FILE" "$RESULTS_FILE"
+
+# Export variables for subshell
+export CRAM_CMD LOCK_DIR TMPDIR_FILE RESULTS_FILE KEEP_TMPDIR
+
+# Run tests in parallel with specified number of jobs
+echo "$ALL_FILES" | xargs -P "$PARALLEL_JOBS" -I {} sh -c '
+    file="$1"
+    lock_dir="$LOCK_DIR"
+    output=$($CRAM_CMD "$file" 2>&1)
+    status=$?
+
+    while ! mkdir "$lock_dir/lock" 2>/dev/null; do
+        sleep 0.05
+    done
+
+    # Record test status
+    echo "$status" >> "$RESULTS_FILE"
+
+    if [ $status -eq 0 ]; then
+        printf "."
+    else
+        echo "$output" | grep -v "^# Ran "
+    fi
+
+    # If --keep-tmpdir is enabled, capture the temporary directory
+    if [ $KEEP_TMPDIR -eq 1 ]; then
+        echo "$output" | grep "Kept temporary directory:" >> "$TMPDIR_FILE"
+    fi
+
+    rm -rf "$lock_dir/lock"
+' - {}
+echo
+
+# Display captured temporary directories if --keep-tmpdir was used
+if [ $KEEP_TMPDIR -eq 1 ]; then
+    sort -u "$TMPDIR_FILE" | while read -r line; do
+        echo "$line"
+    done
+fi
+
+# Check if any tests failed, i.e. non-zero exit status
+# Count the number of failed tests
+# Count the number of passed tests
+failed_tests=$(grep -c -v "^0$" "$RESULTS_FILE")
+passed_tests=$(grep -c "^0$" "$RESULTS_FILE")
+
+# Print summary of test results
+echo "Passed $passed_tests, failed $failed_tests tests"
+if [ "$failed_tests" -gt 0 ]; then
+    exit 1
+fi
+
+exit 0
diff --git a/run_tests.sh b/run_tests.sh
@@ -22,7 +22,7 @@ python3 -m pytest $coverage_arg "$@"
 # Only run functional tests if we are not running a subset of tests for pytest.
 if [ "$partial_test" = 0 ]; then
     echo "Running functional tests with cram"
-    cram tests/
+    ./parallel_cram.sh tests/
 else
     echo "Skipping functional tests when running a subset of unit tests"
 fi

diff --git a/tests/functional/tree/cram/_setup.sh b/tests/functional/tree/cram/_setup.sh
@@ -1,2 +1,7 @@
 export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
 set -o pipefail
+
+# IQ-Tree writes to the input file directory, so we need to copy the data
+# to allow running tests in parallel.
+# This also avoids the data directory being polluted with output files.
+cp -r "$TESTDIR/../data" "$TMP"
diff --git a/tests/functional/tree/cram/iqtree-compressed-input.t b/tests/functional/tree/cram/iqtree-compressed-input.t
@@ -5,6 +5,6 @@ Setup
 Build a tree with excluded sites using a compressed input file.
 
   $ ${AUGUR} tree \
-  >  --alignment "$TESTDIR/../data/aligned.fasta.xz" \
-  >  --exclude-sites "$TESTDIR/../data/excluded_sites.txt" \
+  >  --alignment "$TMP/data/aligned.fasta.xz" \
+  >  --exclude-sites "$TMP/data/excluded_sites.txt" \
   >  --output tree_raw.nwk &> /dev/null
diff --git a/tests/functional/tree/cram/iqtree-extend-args.t b/tests/functional/tree/cram/iqtree-extend-args.t
@@ -6,6 +6,6 @@ Build a tree, augmenting existing default arguments with custom arguments.
 
   $ ${AUGUR} tree \
   >  --method iqtree \
-  >  --alignment "$TESTDIR/../data/aligned.fasta" \
+  >  --alignment "$TMP/data/aligned.fasta" \
   >  --tree-builder-args="-czb" \
   >  --output tree_raw.nwk > /dev/null
diff --git a/tests/functional/tree/cram/iqtree-model-auto.t b/tests/functional/tree/cram/iqtree-model-auto.t
@@ -5,7 +5,7 @@ Setup
 Try building a tree with IQ-TREE using its ModelTest functionality, by supplying a substitution model of "auto".
 
   $ ${AUGUR} tree \
-  >  --alignment "$TESTDIR/../data/aligned.fasta" \
+  >  --alignment "$TMP/data/aligned.fasta" \
   >  --method iqtree \
   >  --substitution-model auto \
   >  --output tree_raw.nwk \

diff --git a/tests/functional/tree/cram/iqtree-more-threads.t b/tests/functional/tree/cram/iqtree-more-threads.t
@@ -5,7 +5,7 @@ Setup
 Try building a tree with IQ-TREE with more threads (4) than there are input sequences (3).
 
   $ ${AUGUR} tree \
-  >  --alignment "$TESTDIR/../data/aligned.fasta" \
+  >  --alignment "$TMP/data/aligned.fasta" \
   >  --method iqtree \
   >  --output tree_raw.nwk \
   >  --nthreads 4 > /dev/null
diff --git a/tests/functional/tree/cram/iqtree-override-args.t b/tests/functional/tree/cram/iqtree-override-args.t
@@ -7,7 +7,7 @@ Since the following custom arguments are incompatible with the default IQ-TREE a
 
   $ ${AUGUR} tree \
   >  --method iqtree \
-  >  --alignment "$TESTDIR/../data/full_aligned.fasta" \
+  >  --alignment "$TMP/data/full_aligned.fasta" \
   >  --tree-builder-args="-czb -bb 1000 -bnni" \
   >  --override-default-args \
   >  --output tree_raw.nwk > /dev/null
diff --git a/tests/functional/tree/cram/iqtree-preserve-fa.t b/tests/functional/tree/cram/iqtree-preserve-fa.t
@@ -5,10 +5,10 @@ Setup
 Build a tree with an input file that doesn't end in .fasta, and ensure it's not overwritten.
 
   $ ${AUGUR} tree \
-  >  --alignment "$TESTDIR/../data/aligned.fa" \
+  >  --alignment "$TMP/data/aligned.fa" \
   >  --method iqtree \
   >  --output tree_raw.nwk \
   >  --nthreads 1 > /dev/null
 
-  $ sha256sum "$TESTDIR/../data/aligned.fa" | awk '{print $1}'
+  $ sha256sum "$TMP/data/aligned.fa" | awk '{print $1}'
   169a9f5f70b94e26a2c4ab2b3180d4b463112581438515557a9797adc834863d
diff --git a/tests/functional/tree/cram/iqtree.t b/tests/functional/tree/cram/iqtree.t
@@ -5,7 +5,7 @@ Setup
 Try building a tree with IQ-TREE.
 
   $ ${AUGUR} tree \
-  >  --alignment "$TESTDIR/../data/aligned.fasta" \
+  >  --alignment "$TMP/data/aligned.fasta" \
   >  --method iqtree \
   >  --output tree_raw.nwk \
   >  --nthreads 1 > /dev/null