From a34f5d67b9bde99bc39cfd976832d4fa579f58e9 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Jun 2022 08:05:00 +1000 Subject: [PATCH 1/4] Split full sync into checkpoint and full validation --- .github/workflows/deploy-gcp-tests.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 6c1777e5b11..48d8e32c724 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -67,6 +67,8 @@ env: MACHINE_TYPE: c2d-standard-16 jobs: + # run the test without any cached state + # each test runs one of these test-* jobs, and skips the other test-without-cached-state: name: Run ${{ inputs.test_id }} test if: ${{ !inputs.needs_zebra_state }} @@ -132,6 +134,10 @@ jobs: --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}" + # run the test using cached state + # each test runs one of these test-* jobs, and skips the other + # + # TODO: if we're running full syncs with cached state, split this job into checkpoint and full validation test-with-cached-state: name: Run ${{ inputs.test_id }} test if: ${{ inputs.needs_zebra_state }} @@ -316,6 +322,7 @@ jobs: --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}" + # create a state image from the instance's state disk, if requested by the caller create-state-image: name: Create ${{ inputs.test_id }} cached state image runs-on: ubuntu-latest @@ -402,6 +409,7 @@ jobs: --storage-location=us \ --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" + # delete the Google Cloud instance for this test delete-instance: name: Delete ${{ inputs.test_id }} instance runs-on: ubuntu-latest From 8924ed79da73d2e555afbe72397980b453c04772 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Jun 2022 16:15:59 +1000 Subject: [PATCH 2/4] Sort workflow variables into categories and add descriptions --- .github/workflows/deploy-gcp-tests.yml | 40 +++++++++++++++++--------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 48d8e32c724..c5adb97fb1e 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -3,62 +3,76 @@ name: Deploy GCP tests on: workflow_call: inputs: - network: - required: false - type: string - default: Mainnet - app_name: - required: false - type: string - default: 'zebra' + # Status and logging test_id: required: true type: string + description: 'Unique identifier for the test' test_description: required: true type: string + description: 'Explains what the test does' + # Test selection and parameters test_variables: required: true type: string + description: 'Environmental variables used to select and configure the test' + network: + required: false + type: string + default: Mainnet + description: 'Zcash network to test against' + # Cached state + # # TODO: find a better name root_state_path: required: false type: string default: '/zebrad-cache' + description: 'Cached state base directory path' # TODO: find a better name zebra_state_dir: required: false type: string default: '' - description: 'Name of the Zebra cached state directory and input image prefix to search in GCP' + description: 'Zebra cached state directory and input image prefix to search in GCP' # TODO: find a better name lwd_state_dir: required: false type: string default: '' - description: 'Name of the Lightwalletd cached state directory and input image prefix to search in GCP' + description: 'Lightwalletd cached state directory and input image prefix to search in GCP' disk_prefix: required: false type: string default: 'zebrad-cache' - description: 'Used to name the image, and for tests that do not use a `zebra_state_dir` to work, but builds a cached state' + description: 'Image name prefix, and `zebra_state_dir` name for newly created cached states' disk_suffix: required: false type: string + description: 'Image name suffix' needs_zebra_state: required: true type: boolean - description: 'Indicates if a test needs a disk with a Zebra cached state to run' + description: 'Does the test use Zebra cached state?' needs_lwd_state: required: false type: boolean - description: 'Indicates if a test needs a disk with Lightwalletd cached state to run (which also includes a Zebra cached state)' + description: 'Does the test use Lightwalletd and Zebra cached state?' saves_to_disk: required: true type: boolean + description: 'Does the test create a new cached state disk?' + # Metadata height_grep_text: required: false type: string + description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata' + app_name: + required: false + type: string + default: 'zebra' + description: 'Application name for Google Cloud instance metadata' env: IMAGE_NAME: zebrad-test From 6ab13334093ebe7905651d2449c2f7b4cd6f6ee5 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Jun 2022 16:17:14 +1000 Subject: [PATCH 3/4] Split Create instance/volume and Run test into separate jobs --- .github/workflows/deploy-gcp-tests.yml | 174 +++++++++++++++++++------ 1 file changed, 133 insertions(+), 41 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index c5adb97fb1e..fae24b1e266 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -81,10 +81,10 @@ env: MACHINE_TYPE: c2d-standard-16 jobs: - # run the test without any cached state - # each test runs one of these test-* jobs, and skips the other - test-without-cached-state: - name: Run ${{ inputs.test_id }} test + # set up the test without any cached state + # each test runs one of the *-with/without-cached-state job series, and skips the other + setup-without-cached-state: + name: Setup ${{ inputs.test_id }} test if: ${{ !inputs.needs_zebra_state }} runs-on: ubuntu-latest permissions: @@ -114,7 +114,8 @@ jobs: service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' token_format: 'access_token' - - name: Create GCP compute instance + # Create a Compute Engine virtual machine + - name: Create ${{ inputs.test_id }} GCP compute instance id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ @@ -130,7 +131,7 @@ jobs: --zone ${{ env.ZONE }} sleep 60 - - name: Run ${{ inputs.test_id }} test + - name: Create ${{ inputs.test_id }} Docker volume run: | gcloud compute ssh \ ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ @@ -143,17 +144,60 @@ jobs: && \ docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ - && \ + " + + test-without-cached-state: + name: Run ${{ inputs.test_id }} test + needs: [ setup-without-cached-state ] + # if the previous step fails, we also want to run and fail this step, + # so that the branch protection rule fails in Mergify and GitHub + if: ${{ !cancelled() }} + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + steps: + - uses: actions/checkout@v3.0.2 + with: + persist-credentials: false + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + with: + short-length: 7 + + - name: Downcase network name for disks + run: | + NETWORK_CAPS=${{ inputs.network }} + echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV + + # Setup gcloud CLI + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v0.8.0 + with: + workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc' + service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' + token_format: 'access_token' + + - name: Run ${{ inputs.test_id }} test + run: | + gcloud compute ssh \ + ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ env.ZONE }} \ + --quiet \ + --ssh-flag="-o ServerAliveInterval=5" \ + --command \ + "\ docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \ --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}" + ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ + " - # run the test using cached state - # each test runs one of these test-* jobs, and skips the other - # - # TODO: if we're running full syncs with cached state, split this job into checkpoint and full validation - test-with-cached-state: - name: Run ${{ inputs.test_id }} test + # set up the test using cached state + # each test runs one of the *-with/without-cached-state job series, and skips the other + setup-with-cached-state: + name: Setup ${{ inputs.test_id }} test if: ${{ inputs.needs_zebra_state }} runs-on: ubuntu-latest permissions: @@ -200,7 +244,7 @@ jobs: # # Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable # Passes the state version to subsequent steps using $STATE_VERSION env variable - - name: Find cached state disk + - name: Find ${{ inputs.test_id }} cached state disk id: get-disk-name run: | LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) @@ -235,9 +279,9 @@ jobs: echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV - # Creates Compute Engine virtual machine and attach a cached state disk using the + # Create a Compute Engine virtual machine and attach a cached state disk using the # $CACHED_DISK_NAME variable as the source image to populate the disk cached state - - name: Create GCP compute instance + - name: Create ${{ inputs.test_id }} GCP compute instance id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ @@ -253,9 +297,62 @@ jobs: --zone ${{ env.ZONE }} sleep 60 - # SSH into the just created VM, and create a Docker container to run the incoming test - # from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk. - # The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}. + # Create a docker volume with the selected cached state. + # + # SSH into the just created VM, and create a docker volume with the recently attached disk. + - name: Create ${{ inputs.test_id }} Docker volume + run: | + gcloud compute ssh \ + ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ env.ZONE }} \ + --quiet \ + --ssh-flag="-o ServerAliveInterval=5" \ + --command \ + "\ + docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ + ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ + " + + test-with-cached-state: + name: Run ${{ inputs.test_id }} test + needs: [ setup-with-cached-state ] + # if the previous step fails, we also want to run and fail this step, + # so that the branch protection rule fails in Mergify and GitHub + if: ${{ !cancelled() }} + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + steps: + - uses: actions/checkout@v3.0.2 + with: + persist-credentials: false + fetch-depth: '2' + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + with: + short-length: 7 + + - name: Downcase network name for disks + run: | + NETWORK_CAPS=${{ inputs.network }} + echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV + + # Setup gcloud CLI + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v0.8.0 + with: + workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc' + service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' + token_format: 'access_token' + + # Run the test with the previously created Zebra-only cached state. + # Each test runs one of the "Run test" steps, and skips the other. + # + # SSH into the just created VM, and create a Docker container to run the incoming test + # from ${{ inputs.test_id }}, then mount the docker volume created in the previous job. # # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker # container in one path: @@ -264,14 +361,13 @@ jobs: # This path must match the variable used by the tests in Rust, which are also set in # `continous-integration-docker.yml` to be able to run this tests. # - # Although we're mounting the disk root, Zebra will only respect the values from + # Although we're mounting the disk root, Zebra will only respect the values from # $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used # to match that variable paths. - name: Run ${{ inputs.test_id }} test - # This step mounts the volume only when a single cached state is needed, in this case - # the cached state from Zebra. - # lightwalletd-full-sync test is an exception to this rule, as it does not need a lwd cached state, - # but it does saves a lwd cached state + # This step only runs for tests that just read or write a Zebra state. + # + # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. # TODO: we should find a better logic for this use cases if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} run: | @@ -282,17 +378,16 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --command \ "\ - docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ - ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ - && \ docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \ --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}" + ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ + " - # SSH into the just created VM, and create a Docker container to run the incoming test - # from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk. - # The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}, - # and ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} + # Run the test with the previously created Lightwalletd and Zebra cached state. + # Each test runs one of the "Run test" steps, and skips the other. + # + # SSH into the just created VM, and create a Docker container to run the incoming test + # from ${{ inputs.test_id }}, then mount the docker volume created in the previous job. # # In this step we're using the same disk for simplicity, as mounting multiple disks to the # VM and to the container might require more steps in this workflow, and additional @@ -314,10 +409,9 @@ jobs: # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR, # the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths. - name: Run ${{ inputs.test_id }} test - # This step mounts the volume only when both cached states are needed, in this case - # the cached state from Zebra and Lightwalletd - # lightwalletd-full-sync test is an exception to this rule, as it does not need a lwd cached state, - # but it does saves a lwd cached state + # This step only runs for tests that read or write Lightwalletd and Zebra states. + # + # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. # TODO: we should find a better logic for this use cases if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} run: | @@ -328,13 +422,11 @@ jobs: --ssh-flag="-o ServerAliveInterval=5" \ --command \ "\ - docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ - ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ - && \ docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \ --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ - ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}" + ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ + " # create a state image from the instance's state disk, if requested by the caller create-state-image: From e4f7268289bc6c11900df8d046b03b383e9ffc45 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Jun 2022 16:25:11 +1000 Subject: [PATCH 4/4] Copy initial conditions to all jobs in the series --- .github/workflows/deploy-gcp-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index fae24b1e266..7a57cd8796d 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -151,7 +151,7 @@ jobs: needs: [ setup-without-cached-state ] # if the previous step fails, we also want to run and fail this step, # so that the branch protection rule fails in Mergify and GitHub - if: ${{ !cancelled() }} + if: ${{ !cancelled() && !inputs.needs_zebra_state }} runs-on: ubuntu-latest permissions: contents: 'read' @@ -318,7 +318,7 @@ jobs: needs: [ setup-with-cached-state ] # if the previous step fails, we also want to run and fail this step, # so that the branch protection rule fails in Mergify and GitHub - if: ${{ !cancelled() }} + if: ${{ !cancelled() && inputs.needs_zebra_state }} runs-on: ubuntu-latest permissions: contents: 'read'