Skip to content

Commit

Permalink
(test) : Collect and upload logs when test fails for k8s (#11618)
Browse files Browse the repository at this point in the history
fix:



add log upload functionality



changes



test



update permission



update permission



fail test



remove exit



update permission



update



update



add condition



test



test



Remove exit



test



fix pod name



Remove exit



refactor



test



remove exit

Signed-off-by: chahatsagarmain <chahatsagar2003@gmail.com>
  • Loading branch information
chahatsagarmain authored Feb 18, 2025
1 parent b131566 commit 8ca7ec1
Show file tree
Hide file tree
Showing 8 changed files with 227 additions and 15 deletions.
62 changes: 62 additions & 0 deletions .github/resources/scripts/collect-logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env bash

set -e

NS=""
OUTPUT_FILE="/tmp/tmp.log/tmp_pod_log.txt"

while [[ "$#" -gt 0 ]]; do
case $1 in
--ns) NS="$2"; shift ;;
--output) OUTPUT_FILE="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done

mkdir -p /tmp/tmp.log

if [[ -z "$NS" ]]; then
echo "Both --ns parameters are required."
exit 1
fi

function check_namespace {
if ! kubectl get namespace "$1" &>/dev/null; then
echo "Namespace '$1' does not exist."
exit 1
fi
}

function display_pod_info {
local NAMESPACE=$1
local POD_NAMES

POD_NAMES=$(kubectl get pods -n "${NS}" -o custom-columns=":metadata.name" --no-headers)

if [[ -z "${POD_NAMES}" ]]; then
echo "No pods found in namespace '${NAMESPACE}'." | tee -a "$OUTPUT_FILE"
return
fi

echo "Pod Information for Namespace: ${NAMESPACE}" > "$OUTPUT_FILE"

for POD_NAME in ${POD_NAMES}; do
{
echo "===== Pod: ${POD_NAME} in ${NAMESPACE} ====="
echo "----- EVENTS -----"
kubectl describe pod "${POD_NAME}" -n "${NAMESPACE}" | grep -A 100 Events || echo "No events found for pod ${POD_NAME}."

echo "----- LOGS -----"
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" || echo "No logs found for pod ${POD_NAME}."

echo "==========================="
echo ""
} | tee -a "$OUTPUT_FILE"
done

echo "Pod information stored in $OUTPUT_FILE"
}

check_namespace "$NS"
display_pod_info "$NS"
25 changes: 24 additions & 1 deletion .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,52 @@ jobs:
pip install -e sdk/python
- name: Create KFP cluster
uses: ./.github/actions/kfp-tekton-cluster

- name: "flip coin test"
id: flip-coin
run: |
. .venv/bin/activate
TEST_SCRIPT="test-flip-coin.sh" ./.github/resources/scripts/e2e-test.sh
continue-on-error: true

- name: "static loop test"
id: static-loop
run: |
. .venv/bin/activate
TEST_SCRIPT="test-static-loop.sh" ./.github/resources/scripts/e2e-test.sh
continue-on-error: true

- name: "dynamic loop test"
id: dynamic-loop
run: |
. .venv/bin/activate
TEST_SCRIPT="test-dynamic-loop.sh" ./.github/resources/scripts/e2e-test.sh
continue-on-error: true

- name: "use env"
id: use-env
run: |
. .venv/bin/activate
TEST_SCRIPT="test-env.sh" ./.github/resources/scripts/e2e-test.sh
continue-on-error: true

- name: "use volume"
id: use-volume
run: |
. .venv/bin/activate
TEST_SCRIPT="test-volume.sh" ./.github/resources/scripts/e2e-test.sh
continue-on-error: true

- name: Collect failed logs
if: ${{ (steps.flip-coin.outcome != 'success') || (steps.static-loop.outcome != 'success') || (steps.dynamic-loop.outcome != 'success') || (steps.use-env.outcome != 'success') || (steps.use-volume.outcome != 'success') }}
run: |
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp.log/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-tekton-backend-artifacts
path: /tmp/tmp.*/*
path: /tmp/tmp*/*

69 changes: 59 additions & 10 deletions .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,25 @@ jobs:

- name: Forward API port
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888

- name: Initialization tests v1
id: tests
working-directory: ./backend/test/initialization
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-initialization-tests-v1-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*
path: /tmp/tmp*/*

initialization-tests-v2:
runs-on: ubuntu-latest
Expand All @@ -71,20 +79,27 @@ jobs:
with:
k8s_version: ${{ matrix.k8s_version }}


- name: Forward API port
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888

- name: Initialization tests v2
id: tests
working-directory: ./backend/test/v2/initialization
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-initialization-tests-v2-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*
path: /tmp/tmp*/*

api-integration-tests-v1:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -113,15 +128,23 @@ jobs:
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "mysql" 3306 3306

- name: API integration tests v1
id: tests
working-directory: ./backend/test/integration
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-api-integration-tests-v1-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*
path: /tmp/tmp*/*

api-integration-tests-v2:
runs-on: ubuntu-latest
Expand All @@ -147,15 +170,23 @@ jobs:
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888

- name: API integration tests v2
id: tests
working-directory: ./backend/test/v2/integration
run: go test -v ./... -namespace kubeflow -args -runIntegrationTests=true
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-api-integration-tests-v2-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*
path: /tmp/tmp*/*

frontend-integration-test:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -186,17 +217,26 @@ jobs:
- name: Build frontend integration tests image
working-directory: ./test/frontend-integration-test
run: docker build . -t kfp-frontend-integration-test:local
continue-on-error: true

- name: Frontend integration tests
id: tests
run: docker run --net=host kfp-frontend-integration-test:local --remote-run true
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-frontend-integration-test-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*

path: /tmp/tmp*/*
basic-sample-tests:
runs-on: ubuntu-latest
strategy:
Expand Down Expand Up @@ -224,14 +264,23 @@ jobs:
run: pip3 install -r ./test/sample-test/requirements.txt

- name: Basic sample tests - sequential
id: sequential-test
run: python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name sequential --results-gcs-dir output

- name: Basic sample tests - exit_handler
id: sample-test
run: python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name exit_handler --expected-result failed --results-gcs-dir output
continue-on-error: true

- name: Collect failed logs
if: ${{ (steps.sequential-test.outcome != 'success') || (steps.sample-test.outcome != 'success')}}
run: |
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-basic-sample-tests-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*
name: kfp-e2e-tests-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp*/*
17 changes: 16 additions & 1 deletion .github/workflows/kfp-kubernetes-execution-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
run: sudo apt-get install protobuf-compiler -y

- name: Install setuptools
run: |
run: |
pip3 install setuptools
pip3 freeze
Expand Down Expand Up @@ -73,7 +73,22 @@ jobs:
run: pip install -r ./test/kfp-kubernetes-execution-tests/requirements.txt

- name: Run tests
id: test
run: |
export KFP_ENDPOINT="http://localhost:8888"
export TIMEOUT_SECONDS=2700
pytest ./test/kfp-kubernetes-execution-tests/sdk_execution_tests.py --asyncio-task-timeout $TIMEOUT_SECONDS
continue-on-error: true

- name: Collect failed logs
if: steps.test.outcome != 'success'
run: |
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-execution-tests-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp*/*
16 changes: 16 additions & 0 deletions .github/workflows/kfp-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,21 @@ jobs:
run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888

- name: Run Samples Tests
id: tests
run: |
./backend/src/v2/test/sample-test.sh
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp*/*

13 changes: 12 additions & 1 deletion .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,27 @@ jobs:
uses: ./.github/actions/kfp-cluster
with:
k8s_version: ${{ matrix.k8s_version }}

- name: Port forward kfp apiserver
run: |
nohup kubectl port-forward --namespace kubeflow svc/ml-pipeline 8888:8888 &
- name: Run Functional Tests
id: tests
run: |
log_dir=$(mktemp -d)
./test/kfp-functional-test/kfp-functional-test.sh > $log_dir/periodic_tests.txt
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: periodic-functional-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp.*/*
path: /tmp/tmp*/*
17 changes: 16 additions & 1 deletion .github/workflows/sdk-execution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
run: sudo apt-get install protobuf-compiler -y

- name: Install setuptools
run: |
run: |
pip3 install setuptools
pip3 freeze
Expand All @@ -65,7 +65,22 @@ jobs:
run: pip install -r ./test/sdk-execution-tests/requirements.txt

- name: Run tests
id: tests
run: |
export KFP_ENDPOINT="http://localhost:8888"
export TIMEOUT_SECONDS=2700
pytest ./test/sdk-execution-tests/sdk_execution_tests.py --asyncio-task-timeout $TIMEOUT_SECONDS
continue-on-error: true

- name: Collect failed logs
if: steps.tests.outcome != 'success'
run: |
./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt
exit 1
- name: Collect test results
if: always()
uses: actions/upload-artifact@v4
with:
name: periodic-functional-artifacts-k8s-${{ matrix.k8s_version }}
path: /tmp/tmp*/*
Loading

0 comments on commit 8ca7ec1

Please sign in to comment.