From c2d398c1c1107b49657147f1910f6b0b9266bfc3 Mon Sep 17 00:00:00 2001 From: Dariusz Karas <78362586+dkaras-splunk@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:12:08 +0100 Subject: [PATCH] feat: upgrade tests (#344) TA-> TA Upgrade implementation ADDON-73792 This pull request introduces a new feature to our CI/CD pipeline, focusing on automating upgrade testing for Technology Add-ons (TA). The goal is to streamline the release process by incorporating scalable and user-friendly solutions capable of executing upgrade scenarios Key Features: Automated Upgrade Testing: Introduces jobs for upgrade testing within the GitHub CI/CD pipeline. Flexible Version Testing: Allows users to provide multiple TA versions on which upgrade tests should be executed. In this repo, ta_upgrade_version parameter is responsible for passing the info about versions being tested. Also, both GitHub and Splunkbase releases are supported, depending on the format of the version string provided: vX.X.X - for GitHub releases X.X.X - for Splunkbase releases additionally, latest can be passed to use the latest version from GitHub example run - https://github.com/splunk/splunk-add-on-for-amazon-web-services/actions/runs/12045945177 Part of https://github.com/splunk/ta-automation-app-of-apps/pull/27 https://github.com/splunk/ta-automation-k8s-manifests/pull/102 https://github.com/splunk/wfe-test-runner-action/pull/35 --------- Co-authored-by: Adam Wownysz --- .../workflows/reusable-build-test-release.yml | 294 +++++++++++++++++- 1 file changed, 289 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable-build-test-release.yml b/.github/workflows/reusable-build-test-release.yml index b07bcd20..0fb16542 100644 --- a/.github/workflows/reusable-build-test-release.yml +++ b/.github/workflows/reusable-build-test-release.yml @@ -46,6 +46,13 @@ on: type: string default: >- ["ubuntu:14.04", "ubuntu:16.04","ubuntu:18.04","ubuntu:22.04", "ubuntu:24.04", "redhat:8.4", "redhat:8.5", "redhat:8.6", "redhat:8.8"] + upgrade-tests-ta-versions: + required: false + description: "List with TA versions (in 'X.X.X' format) that should be used as starting points for upgrade tests. If not provided, + the latest TA version will be used. Example: ['7.6.0', '7.7.0']" + type: string + default: >- + ["latest"] secrets: GH_TOKEN_ADMIN: description: Github admin token @@ -120,6 +127,7 @@ jobs: execute-ucc-modinput-labeled: ${{ steps.configure-tests-on-labels.outputs.execute_ucc_modinput_functional_labeled }} execute-scripted_inputs-labeled: ${{ steps.configure-tests-on-labels.outputs.execute_scripted_inputs_labeled }} execute-requirement-labeled: ${{ steps.configure-tests-on-labels.outputs.execute_requirement_test_labeled }} + execute-upgrade-labeled: ${{ steps.configure-tests-on-labels.outputs.execute_upgrade_test_labeled }} s3_bucket_k8s: ${{ steps.k8s-environment.outputs.s3_bucket }} argo_server_domain_k8s: ${{ steps.k8s-environment.outputs.argo_server_domain }} argo_token_secret_id_k8s: ${{ steps.k8s-environment.outputs.argo_token_secret_id }} @@ -148,7 +156,7 @@ jobs: run: | set +e declare -A EXECUTE_LABELED - TESTSET=("execute_knowledge" "execute_ui" "execute_modinput_functional" "execute_ucc_modinput_functional" "execute_scripted_inputs" "execute_requirement_test") + TESTSET=("execute_knowledge" "execute_ui" "execute_modinput_functional" "execute_ucc_modinput_functional" "execute_scripted_inputs" "execute_requirement_test" "execute_upgrade") for test_type in "${TESTSET[@]}"; do EXECUTE_LABELED["$test_type"]="false" done @@ -164,7 +172,10 @@ jobs: done elif ${{ github.base_ref == 'main' }} || ${{ contains(github.event.pull_request.labels.*.name, 'execute_all_tests') }}; then for test_type in "${TESTSET[@]}"; do - EXECUTE_LABELED["$test_type"]="true" + # Exclude upgrade tests on PRs to main + if [[ "$test_type" != "execute_upgrade" ]]; then + EXECUTE_LABELED["$test_type"]="true" + fi done else for test_type in "${TESTSET[@]}"; do @@ -178,19 +189,28 @@ jobs: if ${{ github.ref_name == 'main' }} || ${{ github.ref_name == 'develop' }} || ${{ startsWith(github.ref_name, 'release/') && inputs.execute-tests-on-push-to-release == 'true' }} ; then for test_type in "${TESTSET[@]}"; do - EXECUTE_LABELED["$test_type"]="true" + # Exclude upgrade tests on push to main + if [[ "$test_type" != "execute_upgrade" ]]; then + EXECUTE_LABELED["$test_type"]="true" + fi done fi ;; "schedule") for test_type in "${TESTSET[@]}"; do - EXECUTE_LABELED["$test_type"]="true" + # Exclude upgrade tests in scheduled runs + if [[ "$test_type" != "execute_upgrade" ]]; then + EXECUTE_LABELED["$test_type"]="true" + fi done ;; "workflow_dispatch") if ${{ inputs.custom-version != '' }} ; then for test_type in "${TESTSET[@]}"; do - EXECUTE_LABELED["$test_type"]="true" + # Exclude upgrade tests in custom releases + if [[ "$test_type" != "execute_upgrade" ]]; then + EXECUTE_LABELED["$test_type"]="true" + fi done fi ;; @@ -345,6 +365,7 @@ jobs: requirement_test: ${{ steps.testset.outputs.requirement_test }} scripted_inputs: ${{ steps.testset.outputs.scripted_inputs }} ucc_modinput_functional: ${{ steps.testset.outputs.ucc_modinput_functional }} + upgrade: ${{ steps.testset.outputs.upgrade }} steps: - uses: actions/checkout@v4 - id: testset @@ -2538,6 +2559,269 @@ jobs: with: name: | summary-ucc_modinput* + + run-upgrade-tests: + if: ${{ !cancelled() && needs.build.result == 'success' && needs.test-inventory.outputs.upgrade == 'true' }} + needs: + - build + - test-inventory + - setup + - meta + - setup-workflow + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + splunk: ${{ fromJson(needs.meta.outputs.matrix_supportedSplunk) }} + vendor-version: ${{ fromJson(needs.meta.outputs.matrix_supportedModinputFunctionalVendors) }} + ta-version-from-upgrade: ${{ fromJson(inputs.upgrade-tests-ta-versions) }} + container: + image: ghcr.io/splunk/workflow-engine-base:4.1.0 + env: + ARGO_SERVER: ${{ needs.setup.outputs.argo-server }} + ARGO_HTTP1: ${{ needs.setup.outputs.argo-http1 }} + ARGO_SECURE: ${{ needs.setup.outputs.argo-secure }} + ARGO_BASE_HREF: ${{ needs.setup.outputs.argo-href }} + ARGO_NAMESPACE: ${{ needs.setup.outputs.argo-namespace }} + SPLUNK_VERSION_BASE: ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} + TEST_TYPE: "upgrade" + TEST_ARGS: "" + permissions: + actions: read + deployments: read + contents: read + packages: read + statuses: read + checks: write + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: configure git # This step configures git to omit "dubious git ownership error" in later test-reporter stage + id: configure-git + run: | + git --version + git_path="$(pwd)" + echo "$git_path" + git config --global --add safe.directory "$git_path" + - name: capture start time + id: capture-start-time + run: | + echo "start_time=$(date +%s)" >> "$GITHUB_OUTPUT" + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + - name: Read secrets from AWS Secrets Manager into environment variables + id: get-argo-token + run: | + ARGO_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${{ needs.setup-workflow.outputs.argo_token_secret_id_k8s }}" | jq -r '.SecretString') + echo "argo-token=$ARGO_TOKEN" >> "$GITHUB_OUTPUT" + - name: create job name + id: create-job-name + shell: bash + run: | + RANDOM_STRING=$(head -3 /dev/urandom | tr -cd '[:lower:]' | cut -c -4) + JOB_NAME=${{ needs.setup.outputs.job-name }}-${RANDOM_STRING} + JOB_NAME=${JOB_NAME//TEST-TYPE/${{ env.TEST_TYPE }}} + JOB_NAME=${JOB_NAME//[_.]/-} + JOB_NAME=$(echo "$JOB_NAME" | tr '[:upper:]' '[:lower:]') + echo "job-name=$JOB_NAME" >> "$GITHUB_OUTPUT" + - name: run-tests + id: run-tests + timeout-minutes: 340 + continue-on-error: true + env: + ARGO_TOKEN: ${{ steps.get-argo-token.outputs.argo-token }} + uses: splunk/wfe-test-runner-action@feat/ADDON-73868-add-inputs-for-upgrade-tests # TODO: add correct branch name + with: + splunk: ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} + test-type: ${{ env.TEST_TYPE }} + test-args: ${{ env.TEST_ARGS }} + job-name: ${{ steps.create-job-name.outputs.job-name }} + labels: ${{ needs.setup.outputs.labels }} + workflow-tmpl-name: ${{ needs.setup.outputs.argo-workflow-tmpl-name }} + workflow-template-ns: ${{ needs.setup.outputs.argo-namespace }} + addon-url: ${{ needs.setup.outputs.addon-upload-path }} + addon-name: ${{ needs.setup.outputs.addon-name }} + vendor-version: ${{ matrix.vendor-version.image }} + sc4s-version: "No" + k8s-manifests-branch: ${{ needs.setup.outputs.k8s-manifests-branch }} + ta-upgrade-version: ${{ matrix.ta-version-from-upgrade }} + - name: Read secrets from AWS Secrets Manager again into environment variables in case credential rotation + id: update-argo-token + if: ${{ !cancelled() }} + run: | + ARGO_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${{ needs.setup-workflow.outputs.argo_token_secret_id_k8s }}" | jq -r '.SecretString') + echo "argo-token=$ARGO_TOKEN" >> "$GITHUB_OUTPUT" + - name: calculate timeout + id: calculate-timeout + run: | + start_time=${{ steps.capture-start-time.outputs.start_time }} + current_time=$(date +%s) + remaining_time_minutes=$(( 350-((current_time-start_time)/60) )) + echo "remaining_time_minutes=$remaining_time_minutes" >> "$GITHUB_OUTPUT" + - name: Check if pod was deleted + id: is-pod-deleted + timeout-minutes: ${{ fromJson(steps.calculate-timeout.outputs.remaining_time_minutes) }} + if: ${{ !cancelled() }} + shell: bash + env: + ARGO_TOKEN: ${{ steps.update-argo-token.outputs.argo-token }} + run: | + set -o xtrace + if argo watch ${{ steps.run-tests.outputs.workflow-name }} -n workflows | grep "pod deleted"; then + echo "retry-workflow=true" >> "$GITHUB_OUTPUT" + fi + - name: Cancel workflow + env: + ARGO_TOKEN: ${{ steps.get-argo-token.outputs.argo-token }} + if: ${{ cancelled() || steps.is-pod-deleted.outcome != 'success' }} + run: | + cancel_response=$(argo submit -v -o json --from wftmpl/${{ needs.setup.outputs.argo-cancel-workflow-tmpl-name }} -l workflows.argoproj.io/workflow-template=${{ needs.setup.outputs.argo-cancel-workflow-tmpl-name }} --argo-base-href '' -p workflow-to-cancel=${{ steps.run-tests.outputs.workflow-name }}) + cancel_workflow_name=$( echo "$cancel_response" |jq -r '.metadata.name' ) + cancel_logs=$(argo logs --follow "$cancel_workflow_name" -n workflows) + if echo "$cancel_logs" | grep -q "workflow ${{ steps.run-tests.outputs.workflow-name }} stopped"; then + echo "Workflow ${{ steps.run-tests.outputs.workflow-name }} stopped" + else + echo "Workflow ${{ steps.run-tests.outputs.workflow-name }} didn't stop" + exit 1 + fi + - name: Retrying workflow + id: retry-wf + shell: bash + env: + ARGO_TOKEN: ${{ steps.update-argo-token.outputs.argo-token }} + if: ${{ !cancelled() }} + run: | + set -o xtrace + set +e + if [[ "${{ steps.is-pod-deleted.outputs.retry-workflow }}" == "true" ]] + then + WORKFLOW_NAME=$(argo resubmit -v -o json -n workflows "${{ steps.run-tests.outputs.workflow-name }}" | jq -r .metadata.name) + echo "workflow-name=$WORKFLOW_NAME" >> "$GITHUB_OUTPUT" + argo logs --follow "${WORKFLOW_NAME}" -n workflows || echo "... there was an error fetching logs, the workflow is still in progress. please wait for the workflow to complete ..." + else + echo "No retry required" + argo wait "${{ steps.run-tests.outputs.workflow-name }}" -n workflows + argo watch "${{ steps.run-tests.outputs.workflow-name }}" -n workflows | grep "test-addon" + fi + - name: check if workflow completed + env: + ARGO_TOKEN: ${{ steps.update-argo-token.outputs.argo-token }} + if: ${{ !cancelled() }} + shell: bash + run: | + set +e + # shellcheck disable=SC2157 + if [ -z "${{ steps.retry-wf.outputs.workflow-name }}" ]; then + WORKFLOW_NAME=${{ steps.run-tests.outputs.workflow-name }} + else + WORKFLOW_NAME="${{ steps.retry-wf.outputs.workflow-name }}" + fi + ARGO_STATUS=$(argo get "${WORKFLOW_NAME}" -n workflows -o json | jq -r '.status.phase') + echo "Status of workflow:" "$ARGO_STATUS" + while [ "$ARGO_STATUS" == "Running" ] || [ "$ARGO_STATUS" == "Pending" ] + do + echo "... argo Workflow ${WORKFLOW_NAME} is running, waiting for it to complete." + argo wait "${WORKFLOW_NAME}" -n workflows || true + ARGO_STATUS=$(argo get "${WORKFLOW_NAME}" -n workflows -o json | jq -r '.status.phase') + done + - name: pull artifacts from s3 bucket + if: ${{ !cancelled() }} + run: | + echo "pulling artifacts" + aws s3 cp s3://${{ needs.setup.outputs.s3-bucket }}/artifacts-${{ steps.create-job-name.outputs.job-name }}/${{ steps.create-job-name.outputs.job-name }}.tgz ${{ needs.setup.outputs.directory-path }}/ + tar -xf ${{ needs.setup.outputs.directory-path }}/${{ steps.create-job-name.outputs.job-name }}.tgz -C ${{ needs.setup.outputs.directory-path }} + - name: pull logs from s3 bucket + if: ${{ !cancelled() }} + run: | + # shellcheck disable=SC2157 + if [ -z "${{ steps.retry-wf.outputs.workflow-name }}" ]; then + WORKFLOW_NAME=${{ steps.run-tests.outputs.workflow-name }} + else + WORKFLOW_NAME="${{ steps.retry-wf.outputs.workflow-name }}" + fi + echo "pulling logs" + mkdir -p ${{ needs.setup.outputs.directory-path }}/argo-logs + aws s3 cp s3://${{ needs.setup.outputs.s3-bucket }}/${WORKFLOW_NAME}/ ${{ needs.setup.outputs.directory-path }}/argo-logs/ --recursive + - uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: archive splunk ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} ${{ env.TEST_TYPE }} ${{ matrix.vendor-version.image }} ${{ matrix.ta-version-from-upgrade }} tests artifacts + path: | + ${{ needs.setup.outputs.directory-path }}/test-results + - uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: archive splunk ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} ${{ env.TEST_TYPE }} ${{ matrix.vendor-version.image }} ${{ matrix.ta-version-from-upgrade }} tests logs + path: | + ${{ needs.setup.outputs.directory-path }}/argo-logs + - name: Test Report + id: test_report + uses: dorny/test-reporter@v1.9.1 + if: ${{ !cancelled() }} + with: + name: splunk ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} ${{ env.TEST_TYPE }} ${{ matrix.vendor-version.image }} test report + path: "${{ needs.setup.outputs.directory-path }}/test-results/*.xml" + reporter: java-junit + - name: Parse JUnit XML + if: ${{ !cancelled() }} + run: | + apt-get install -y libxml2-utils + junit_xml_path="${{ needs.setup.outputs.directory-path }}/test-results" + junit_xml_file=$(find "$junit_xml_path" -name "*.xml" -type f 2>/dev/null | head -n 1) + if [ -n "$junit_xml_file" ]; then + total_tests=$(xmllint --xpath "count(//testcase)" "$junit_xml_file") + failures=$(xmllint --xpath "count(//testcase[failure])" "$junit_xml_file") + errors=$(xmllint --xpath "count(//testcase[error])" "$junit_xml_file") + skipped=$(xmllint --xpath "count(//testcase[skipped])" "$junit_xml_file") + passed=$((total_tests - failures - errors - skipped)) + echo "splunk ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} ${{ matrix.ta-version-from-upgrade }} ${{ matrix.vendor-version.image }} |$total_tests |$passed |$failures |$errors | $skipped |${{steps.test_report.outputs.url_html}}" > job_summary.txt + else + echo "no XML File found, exiting" + exit 1 + fi + - name: Upload-artifact-for-github-summary + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: summary-${{ env.TEST_TYPE }}-${{ matrix.splunk.version }}-${{ secrets.OTHER_TA_REQUIRED_CONFIGS }}-${{ matrix.vendor-version.image }}-${{ matrix.ta-version-from-upgrade }}-artifact + path: job_summary.txt + - name: pull diag from s3 bucket + if: ${{ failure() && steps.test_report.outputs.conclusion == 'failure' }} + run: | + echo "pulling diag" + aws s3 cp s3://${{ needs.setup.outputs.s3-bucket }}/diag-${{ steps.create-job-name.outputs.job-name }}/diag-${{ steps.create-job-name.outputs.job-name }}.tgz ${{ needs.setup.outputs.directory-path }}/ + - uses: actions/upload-artifact@v4 + if: ${{ failure() && steps.test_report.outputs.conclusion == 'failure' }} + with: + name: archive splunk ${{ matrix.splunk.version }}${{ secrets.OTHER_TA_REQUIRED_CONFIGS }} ${{ env.TEST_TYPE }} ${{ matrix.vendor-version.image }} tests diag + path: | + ${{ needs.setup.outputs.directory-path }}/diag* + + upgrade-tests-report: + needs: run-upgrade-tests + runs-on: ubuntu-latest + if: ${{ !cancelled() && needs.run-upgrade-tests.result != 'skipped' }} + steps: + - name: Download all summaries + uses: actions/download-artifact@v4 + with: + pattern: summary-upgrade* + - name: Combine summaries into a table + run: | + echo "| Job | Total Tests | Passed Tests | Failed Tests | Errored Tests | Skipped Tests | Report Link" >> "$GITHUB_STEP_SUMMARY" + echo "| ---------- | ----------- | ------ | ------ | ------ | ------- | ------ |" >> "$GITHUB_STEP_SUMMARY" + for file in summary-upgrade*/job_summary.txt; do + cat "$file" >> "$GITHUB_STEP_SUMMARY" + done + - uses: geekyeggo/delete-artifact@v5 + with: + name: | + summary-upgrade* run-scripted-input-tests-full-matrix: if: ${{ !cancelled() && needs.build.result == 'success' && needs.test-inventory.outputs.scripted_inputs == 'true' && needs.setup-workflow.outputs.execute-scripted_inputs-labeled == 'true' }}