From a34f5d67b9bde99bc39cfd976832d4fa579f58e9 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Thu, 23 Jun 2022 08:05:00 +1000
Subject: [PATCH 1/4] Split full sync into checkpoint and full validation

---
 .github/workflows/deploy-gcp-tests.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml
index 6c1777e5b11..48d8e32c724 100644
--- a/.github/workflows/deploy-gcp-tests.yml
+++ b/.github/workflows/deploy-gcp-tests.yml
@@ -67,6 +67,8 @@ env:
   MACHINE_TYPE: c2d-standard-16
 
 jobs:
+  # run the test without any cached state
+  # each test runs one of these test-* jobs, and skips the other
   test-without-cached-state:
     name: Run ${{ inputs.test_id }} test
     if: ${{ !inputs.needs_zebra_state }}
@@ -132,6 +134,10 @@ jobs:
           --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
           ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
 
+  # run the test using cached state
+  # each test runs one of these test-* jobs, and skips the other
+  #
+  # TODO: if we're running full syncs with cached state, split this job into checkpoint and full validation
   test-with-cached-state:
     name: Run ${{ inputs.test_id }} test
     if: ${{ inputs.needs_zebra_state }}
@@ -316,6 +322,7 @@ jobs:
           --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
           ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
 
+  # create a state image from the instance's state disk, if requested by the caller
   create-state-image:
     name: Create ${{ inputs.test_id }} cached state image
     runs-on: ubuntu-latest
@@ -402,6 +409,7 @@ jobs:
           --storage-location=us \
           --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
 
+  # delete the Google Cloud instance for this test
   delete-instance:
     name: Delete ${{ inputs.test_id }} instance
     runs-on: ubuntu-latest

From 8924ed79da73d2e555afbe72397980b453c04772 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Thu, 23 Jun 2022 16:15:59 +1000
Subject: [PATCH 2/4] Sort workflow variables into categories and add
 descriptions

---
 .github/workflows/deploy-gcp-tests.yml | 40 +++++++++++++++++---------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml
index 48d8e32c724..c5adb97fb1e 100644
--- a/.github/workflows/deploy-gcp-tests.yml
+++ b/.github/workflows/deploy-gcp-tests.yml
@@ -3,62 +3,76 @@ name: Deploy GCP tests
 on:
   workflow_call:
     inputs:
-      network:
-        required: false
-        type: string
-        default: Mainnet
-      app_name:
-        required: false
-        type: string
-        default: 'zebra'
+      # Status and logging
       test_id:
         required: true
         type: string
+        description: 'Unique identifier for the test'
       test_description:
         required: true
         type: string
+        description: 'Explains what the test does'
+      # Test selection and parameters
       test_variables:
         required: true
         type: string
+        description: 'Environmental variables used to select and configure the test'
+      network:
+        required: false
+        type: string
+        default: Mainnet
+        description: 'Zcash network to test against'
+      # Cached state
+      #
       # TODO: find a better name
       root_state_path:
         required: false
         type: string
         default: '/zebrad-cache'
+        description: 'Cached state base directory path'
       # TODO: find a better name
       zebra_state_dir:
         required: false
         type: string
         default: ''
-        description: 'Name of the Zebra cached state directory and input image prefix to search in GCP'
+        description: 'Zebra cached state directory and input image prefix to search in GCP'
       # TODO: find a better name
       lwd_state_dir:
         required: false
         type: string
         default: ''
-        description: 'Name of the Lightwalletd cached state directory and input image prefix to search in GCP'
+        description: 'Lightwalletd cached state directory and input image prefix to search in GCP'
       disk_prefix:
         required: false
         type: string
         default: 'zebrad-cache'
-        description: 'Used to name the image, and for tests that do not use a `zebra_state_dir` to work, but builds a cached state'
+        description: 'Image name prefix, and `zebra_state_dir` name for newly created cached states'
       disk_suffix:
         required: false
         type: string
+        description: 'Image name suffix'
       needs_zebra_state:
         required: true
         type: boolean
-        description: 'Indicates if a test needs a disk with a Zebra cached state to run'
+        description: 'Does the test use Zebra cached state?'
       needs_lwd_state:
         required: false
         type: boolean
-        description: 'Indicates if a test needs a disk with Lightwalletd cached state to run (which also includes a Zebra cached state)'
+        description: 'Does the test use Lightwalletd and Zebra cached state?'
       saves_to_disk:
         required: true
         type: boolean
+        description: 'Does the test create a new cached state disk?'
+      # Metadata
       height_grep_text:
         required: false
         type: string
+        description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata'
+      app_name:
+        required: false
+        type: string
+        default: 'zebra'
+        description: 'Application name for Google Cloud instance metadata'
 
 env:
   IMAGE_NAME: zebrad-test

From 6ab13334093ebe7905651d2449c2f7b4cd6f6ee5 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Thu, 23 Jun 2022 16:17:14 +1000
Subject: [PATCH 3/4] Split Create instance/volume and Run test into separate
 jobs

---
 .github/workflows/deploy-gcp-tests.yml | 174 +++++++++++++++++++------
 1 file changed, 133 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml
index c5adb97fb1e..fae24b1e266 100644
--- a/.github/workflows/deploy-gcp-tests.yml
+++ b/.github/workflows/deploy-gcp-tests.yml
@@ -81,10 +81,10 @@ env:
   MACHINE_TYPE: c2d-standard-16
 
 jobs:
-  # run the test without any cached state
-  # each test runs one of these test-* jobs, and skips the other
-  test-without-cached-state:
-    name: Run ${{ inputs.test_id }} test
+  # set up the test without any cached state
+  # each test runs one of the *-with/without-cached-state job series, and skips the other
+  setup-without-cached-state:
+    name: Setup ${{ inputs.test_id }} test
     if: ${{ !inputs.needs_zebra_state }}
     runs-on: ubuntu-latest
     permissions:
@@ -114,7 +114,8 @@ jobs:
           service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
           token_format: 'access_token'
 
-      - name: Create GCP compute instance
+      # Create a Compute Engine virtual machine
+      - name: Create ${{ inputs.test_id }} GCP compute instance
         id: create-instance
         run: |
           gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
@@ -130,7 +131,7 @@ jobs:
           --zone ${{ env.ZONE }}
           sleep 60
 
-      - name: Run ${{ inputs.test_id }} test
+      - name: Create ${{ inputs.test_id }} Docker volume
         run: |
           gcloud compute ssh \
           ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
@@ -143,17 +144,60 @@ jobs:
           && \
           docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
           ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
-          && \
+          "
+
+  test-without-cached-state:
+    name: Run ${{ inputs.test_id }} test
+    needs: [ setup-without-cached-state ]
+    # if the previous step fails, we also want to run and fail this step,
+    # so that the branch protection rule fails in Mergify and GitHub
+    if: ${{ !cancelled() }}
+    runs-on: ubuntu-latest
+    permissions:
+      contents: 'read'
+      id-token: 'write'
+    steps:
+      - uses: actions/checkout@v3.0.2
+        with:
+          persist-credentials: false
+
+      - name: Inject slug/short variables
+        uses: rlespinasse/github-slug-action@v4
+        with:
+          short-length: 7
+
+      - name: Downcase network name for disks
+        run: |
+          NETWORK_CAPS=${{ inputs.network }}
+          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
+
+      # Setup gcloud CLI
+      - name: Authenticate to Google Cloud
+        id: auth
+        uses: google-github-actions/auth@v0.8.0
+        with:
+          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
+          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
+          token_format: 'access_token'
+
+      - name: Run ${{ inputs.test_id }} test
+        run: |
+          gcloud compute ssh \
+          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
+          --zone ${{ env.ZONE }} \
+          --quiet \
+          --ssh-flag="-o ServerAliveInterval=5" \
+          --command \
+          "\
           docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
           --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
-          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
+          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
+          "
 
-  # run the test using cached state
-  # each test runs one of these test-* jobs, and skips the other
-  #
-  # TODO: if we're running full syncs with cached state, split this job into checkpoint and full validation
-  test-with-cached-state:
-    name: Run ${{ inputs.test_id }} test
+  # set up the test using cached state
+  # each test runs one of the *-with/without-cached-state job series, and skips the other
+  setup-with-cached-state:
+    name: Setup ${{ inputs.test_id }} test
     if: ${{ inputs.needs_zebra_state }}
     runs-on: ubuntu-latest
     permissions:
@@ -200,7 +244,7 @@ jobs:
       #
       # Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable
       # Passes the state version to subsequent steps using $STATE_VERSION env variable
-      - name: Find cached state disk
+      - name: Find ${{ inputs.test_id }} cached state disk
         id: get-disk-name
         run: |
           LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
@@ -235,9 +279,9 @@ jobs:
           echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
           echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV
 
-      # Creates Compute Engine virtual machine and attach a cached state disk using the
+      # Create a Compute Engine virtual machine and attach a cached state disk using the
       # $CACHED_DISK_NAME variable as the source image to populate the disk cached state
-      - name: Create GCP compute instance
+      - name: Create ${{ inputs.test_id }} GCP compute instance
         id: create-instance
         run: |
           gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
@@ -253,9 +297,62 @@ jobs:
           --zone ${{ env.ZONE }}
           sleep 60
 
-      # SSH into the just created VM, and create a Docker container to run the incoming test 
-      # from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk.
-      # The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}.
+      # Create a docker volume with the selected cached state.
+      #
+      # SSH into the just created VM, and create a docker volume with the recently attached disk.
+      - name: Create ${{ inputs.test_id }} Docker volume
+        run: |
+          gcloud compute ssh \
+          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
+          --zone ${{ env.ZONE }} \
+          --quiet \
+          --ssh-flag="-o ServerAliveInterval=5" \
+          --command \
+          "\
+          docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
+          ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
+          "
+
+  test-with-cached-state:
+    name: Run ${{ inputs.test_id }} test
+    needs: [ setup-with-cached-state ]
+    # if the previous step fails, we also want to run and fail this step,
+    # so that the branch protection rule fails in Mergify and GitHub
+    if: ${{ !cancelled() }}
+    runs-on: ubuntu-latest
+    permissions:
+      contents: 'read'
+      id-token: 'write'
+    steps:
+      - uses: actions/checkout@v3.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: '2'
+
+      - name: Inject slug/short variables
+        uses: rlespinasse/github-slug-action@v4
+        with:
+          short-length: 7
+
+      - name: Downcase network name for disks
+        run: |
+          NETWORK_CAPS=${{ inputs.network }}
+          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
+
+      # Setup gcloud CLI
+      - name: Authenticate to Google Cloud
+        id: auth
+        uses: google-github-actions/auth@v0.8.0
+        with:
+          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
+          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
+          token_format: 'access_token'
+
+      # Run the test with the previously created Zebra-only cached state.
+      # Each test runs one of the "Run test" steps, and skips the other.
+      #
+      # SSH into the just created VM, and create a Docker container to run the incoming test
+      # from ${{ inputs.test_id }}, then mount the docker volume created in the previous job.
       #
       # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
       # container in one path:
@@ -264,14 +361,13 @@ jobs:
       # This path must match the variable used by the tests in Rust, which are also set in
       # `continous-integration-docker.yml` to be able to run this tests.
       #
-      # Although we're mounting the disk root, Zebra will only respect the values from 
+      # Although we're mounting the disk root, Zebra will only respect the values from
       # $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used
       # to match that variable paths.
       - name: Run ${{ inputs.test_id }} test
-        # This step mounts the volume only when a single cached state is needed, in this case
-        # the cached state from Zebra.
-        # lightwalletd-full-sync test is an exception to this rule, as it does not need a lwd cached state,
-        # but it does saves a lwd cached state
+        # This step only runs for tests that just read or write a Zebra state.
+        #
+        # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially.
         # TODO: we should find a better logic for this use cases
         if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }}
         run: |
@@ -282,17 +378,16 @@ jobs:
           --ssh-flag="-o ServerAliveInterval=5" \
           --command \
           "\
-          docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
-          ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
-          && \
           docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
           --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
-          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
+          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
+          "
 
-      # SSH into the just created VM, and create a Docker container to run the incoming test 
-      # from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk.
-      # The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }},
-      # and ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }}
+      # Run the test with the previously created Lightwalletd and Zebra cached state.
+      # Each test runs one of the "Run test" steps, and skips the other.
+      #
+      # SSH into the just created VM, and create a Docker container to run the incoming test
+      # from ${{ inputs.test_id }}, then mount the docker volume created in the previous job.
       #
       # In this step we're using the same disk for simplicity, as mounting multiple disks to the
       # VM and to the container might require more steps in this workflow, and additional
@@ -314,10 +409,9 @@ jobs:
       # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR,
       # the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths.
       - name: Run ${{ inputs.test_id }} test
-        # This step mounts the volume only when both cached states are needed, in this case
-        # the cached state from Zebra and Lightwalletd
-        # lightwalletd-full-sync test is an exception to this rule, as it does not need a lwd cached state,
-        # but it does saves a lwd cached state
+        # This step only runs for tests that read or write Lightwalletd and Zebra states.
+        #
+        # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially.
         # TODO: we should find a better logic for this use cases
         if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }}
         run: |
@@ -328,13 +422,11 @@ jobs:
           --ssh-flag="-o ServerAliveInterval=5" \
           --command \
           "\
-          docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
-          ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
-          && \
           docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
           --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
           --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
-          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
+          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
+          "
 
   # create a state image from the instance's state disk, if requested by the caller
   create-state-image:

From e4f7268289bc6c11900df8d046b03b383e9ffc45 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Thu, 23 Jun 2022 16:25:11 +1000
Subject: [PATCH 4/4] Copy initial conditions to all jobs in the series

---
 .github/workflows/deploy-gcp-tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml
index fae24b1e266..7a57cd8796d 100644
--- a/.github/workflows/deploy-gcp-tests.yml
+++ b/.github/workflows/deploy-gcp-tests.yml
@@ -151,7 +151,7 @@ jobs:
     needs: [ setup-without-cached-state ]
     # if the previous step fails, we also want to run and fail this step,
     # so that the branch protection rule fails in Mergify and GitHub
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && !inputs.needs_zebra_state }}
     runs-on: ubuntu-latest
     permissions:
       contents: 'read'
@@ -318,7 +318,7 @@ jobs:
     needs: [ setup-with-cached-state ]
     # if the previous step fails, we also want to run and fail this step,
     # so that the branch protection rule fails in Mergify and GitHub
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.needs_zebra_state }}
     runs-on: ubuntu-latest
     permissions:
       contents: 'read'