From 8dcaa7e8abca4cd25acc6af39e682c991d4fd7dc Mon Sep 17 00:00:00 2001 From: Harry Date: Wed, 31 Jul 2024 09:16:12 -0700 Subject: [PATCH] Remove docker runner container image (#149) *Issue description:* To resolve issues where downloading dependencies caused failures in E2E workflows due to transient issues, we began running E2E workflows in a docker image container with those dependencies already installed in this [PR](https://github.com/aws-observability/aws-application-signals-test-framework/pull/33). As we started scaling our canary to encompass more platforms and regions however, the public ECR storing the image started throttling. Despite efforts to mitigate this throttle by increasing the API limit and also distributing the image to multiple public ECRs, the throttling still occurs and we are currently unable to determine the precise reason why. After discussion, we have decided to revert from using image containers and explore other solutions to mitigate these transient issues such as caching. *Description of changes:* Stop using image containers in the github runners and add back the retry logic to install Terraform and other dependencies. Test run: https://github.com/aws-observability/aws-application-signals-test-framework/actions/runs/10171443800 By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .github/workflows/java-ec2-asg-e2e-test.yml | 10 ++++-- .../workflows/java-ec2-default-e2e-test.yml | 10 ++++-- .github/workflows/java-eks-e2e-test.yml | 25 +++++++++++++-- .github/workflows/java-k8s-e2e-test.yml | 10 ++++-- .../java-metric-limiter-e2e-test.yml | 25 +++++++++++++-- .github/workflows/python-ec2-asg-e2e-test.yml | 10 ++++-- .../workflows/python-ec2-default-e2e-test.yml | 10 ++++-- .github/workflows/python-eks-e2e-test.yml | 31 ++++++++++++++++--- .github/workflows/python-k8s-e2e-test.yml | 10 ++++-- 9 files changed, 120 insertions(+), 21 deletions(-) diff --git a/.github/workflows/java-ec2-asg-e2e-test.yml b/.github/workflows/java-ec2-asg-e2e-test.yml index 5927fee05..0d8abc293 100644 --- a/.github/workflows/java-ec2-asg-e2e-test.yml +++ b/.github/workflows/java-ec2-asg-e2e-test.yml @@ -33,8 +33,6 @@ env: jobs: java-ec2-asg: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id run: echo TESTING_ID="${{ github.job }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV @@ -95,6 +93,14 @@ jobs: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ env.E2E_TEST_AWS_REGION }}.s3.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV fi + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: diff --git a/.github/workflows/java-ec2-default-e2e-test.yml b/.github/workflows/java-ec2-default-e2e-test.yml index 36f49730b..0c1e16104 100644 --- a/.github/workflows/java-ec2-default-e2e-test.yml +++ b/.github/workflows/java-ec2-default-e2e-test.yml @@ -33,8 +33,6 @@ env: jobs: java-ec2-default: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id run: echo TESTING_ID="${{ github.job }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV @@ -95,6 +93,14 @@ jobs: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV fi + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: diff --git a/.github/workflows/java-eks-e2e-test.yml b/.github/workflows/java-eks-e2e-test.yml index a66616823..4c835c649 100644 --- a/.github/workflows/java-eks-e2e-test.yml +++ b/.github/workflows/java-eks-e2e-test.yml @@ -47,8 +47,6 @@ env: jobs: java-eks: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id and sample app namespace run: | @@ -111,9 +109,24 @@ jobs: role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} aws-region: ${{ env.E2E_TEST_AWS_REGION }} + # local directory to store the kubernetes config + - name: Create kubeconfig directory + run: mkdir -p ${{ github.workspace }}/.kube + + - name: Set KUBECONFIG environment variable + run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV + - name: Set up kubeconfig run: aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} --region ${{ env.E2E_TEST_AWS_REGION }} + - name: Download and install eksctl + uses: ./.github/workflows/actions/execute_and_retry + with: + pre-command: 'mkdir ${{ github.workspace }}/eksctl' + command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + && tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz' + cleanup: 'rm -f eksctl_Linux_amd64.tar.gz' + - name: Add eksctl to Github Path run: | echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH @@ -137,6 +150,14 @@ jobs: --region ${{ env.E2E_TEST_AWS_REGION }}" sleep_time: 60 + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Get RDS database cluster metadata continue-on-error: true run: | diff --git a/.github/workflows/java-k8s-e2e-test.yml b/.github/workflows/java-k8s-e2e-test.yml index 76a61fd00..af8b6c531 100644 --- a/.github/workflows/java-k8s-e2e-test.yml +++ b/.github/workflows/java-k8s-e2e-test.yml @@ -44,8 +44,6 @@ env: jobs: java-k8s: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id run: echo TESTING_ID="${{ env.E2E_TEST_AWS_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV @@ -95,6 +93,14 @@ jobs: aws s3api put-object --bucket aws-appsignals-sample-app-prod-${{ env.E2E_TEST_AWS_REGION }} --key frontend-service-depl-${{ github.event.repository.name }}.yaml --body frontend-service-depl.yaml aws s3api put-object --bucket aws-appsignals-sample-app-prod-${{ env.E2E_TEST_AWS_REGION }} --key remote-service-depl-${{ github.event.repository.name }}.yaml --body remote-service-depl.yaml + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: diff --git a/.github/workflows/java-metric-limiter-e2e-test.yml b/.github/workflows/java-metric-limiter-e2e-test.yml index 537941a82..dd4bb37e5 100644 --- a/.github/workflows/java-metric-limiter-e2e-test.yml +++ b/.github/workflows/java-metric-limiter-e2e-test.yml @@ -49,8 +49,6 @@ env: jobs: java-metric-limiter: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id and sample app namespace run: | @@ -113,9 +111,24 @@ jobs: role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} aws-region: ${{ env.E2E_TEST_AWS_REGION }} + # local directory to store the kubernetes config + - name: Create kubeconfig directory + run: mkdir -p ${{ github.workspace }}/.kube + + - name: Set KUBECONFIG environment variable + run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV + - name: Set up kubeconfig run: aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} --region ${{ env.E2E_TEST_AWS_REGION }} + - name: Download and install eksctl + uses: ./.github/workflows/actions/execute_and_retry + with: + pre-command: 'mkdir ${{ github.workspace }}/eksctl' + command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + && tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz' + cleanup: 'rm -f eksctl_Linux_amd64.tar.gz' + - name: Add eksctl to Github Path run: | echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH @@ -139,6 +152,14 @@ jobs: --region ${{ env.E2E_TEST_AWS_REGION }}" sleep_time: 60 + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: diff --git a/.github/workflows/python-ec2-asg-e2e-test.yml b/.github/workflows/python-ec2-asg-e2e-test.yml index 6110b0997..dc6411735 100644 --- a/.github/workflows/python-ec2-asg-e2e-test.yml +++ b/.github/workflows/python-ec2-asg-e2e-test.yml @@ -37,8 +37,6 @@ env: jobs: python-ec2-asg: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id run: echo TESTING_ID="${{ github.job }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV @@ -99,6 +97,14 @@ jobs: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ env.E2E_TEST_AWS_REGION }}.s3.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV fi + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: diff --git a/.github/workflows/python-ec2-default-e2e-test.yml b/.github/workflows/python-ec2-default-e2e-test.yml index e0f72670a..31cd584fd 100644 --- a/.github/workflows/python-ec2-default-e2e-test.yml +++ b/.github/workflows/python-ec2-default-e2e-test.yml @@ -37,8 +37,6 @@ env: jobs: python-ec2-default: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - uses: actions/checkout@v4 with: @@ -98,6 +96,14 @@ jobs: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ env.E2E_TEST_AWS_REGION }}.s3.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV fi + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: diff --git a/.github/workflows/python-eks-e2e-test.yml b/.github/workflows/python-eks-e2e-test.yml index f0064533a..5d31e66ff 100644 --- a/.github/workflows/python-eks-e2e-test.yml +++ b/.github/workflows/python-eks-e2e-test.yml @@ -47,8 +47,6 @@ env: jobs: python-eks: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id and python sample app namespace run: | @@ -112,8 +110,23 @@ jobs: role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} aws-region: ${{ env.E2E_TEST_AWS_REGION }} + # local directory to store the kubernetes config + - name: Create kubeconfig directory + run: mkdir -p ${{ github.workspace }}/.kube + + - name: Set KUBECONFIG environment variable + run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV + - name: Set up kubeconfig - run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ env.E2E_TEST_AWS_REGION }} + run: aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} --region ${{ env.E2E_TEST_AWS_REGION }} + + - name: Download and install eksctl + uses: ./.github/workflows/actions/execute_and_retry + with: + pre-command: 'mkdir ${{ github.workspace }}/eksctl' + command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + && tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz' + cleanup: 'rm -f eksctl_Linux_amd64.tar.gz' - name: Add eksctl to Github Path run: | @@ -126,7 +139,7 @@ jobs: command: "eksctl create iamserviceaccount \ --name service-account-${{ env.TESTING_ID }} \ --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-cluster-name }} \ + --cluster ${{ env.CLUSTER_NAME }} \ --role-name eks-s3-access-${{ env.TESTING_ID }} \ --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ --region ${{ env.E2E_TEST_AWS_REGION }} \ @@ -134,10 +147,18 @@ jobs: cleanup: "eksctl delete iamserviceaccount \ --name service-account-${{ env.TESTING_ID }} \ --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-cluster-name }} \ + --cluster ${{ env.CLUSTER_NAME }} \ --region ${{ env.E2E_TEST_AWS_REGION }}" sleep_time: 60 + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Get RDS database cluster metadata continue-on-error: true run: | diff --git a/.github/workflows/python-k8s-e2e-test.yml b/.github/workflows/python-k8s-e2e-test.yml index 86d835ae1..317325060 100644 --- a/.github/workflows/python-k8s-e2e-test.yml +++ b/.github/workflows/python-k8s-e2e-test.yml @@ -44,8 +44,6 @@ env: jobs: python-k8s: runs-on: ubuntu-latest - container: - image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest steps: - name: Generate testing id run: echo TESTING_ID="${{ env.E2E_TEST_AWS_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV @@ -95,6 +93,14 @@ jobs: aws s3api put-object --bucket aws-appsignals-sample-app-prod-${{ env.E2E_TEST_AWS_REGION }} --key python-frontend-service-depl-${{ github.event.repository.name }}.yaml --body python-frontend-service-depl.yaml aws s3api put-object --bucket aws-appsignals-sample-app-prod-${{ env.E2E_TEST_AWS_REGION }} --key python-remote-service-depl-${{ github.event.repository.name }}.yaml --body python-remote-service-depl.yaml + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + sleep_time: 60 + - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: