From 0d8fe044962bc732fec72a78827b8b412f2a41e8 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 3 Feb 2025 16:08:28 -0800 Subject: [PATCH 1/6] Fix out of space error in running integration test github workflow --- .github/workflows/installation-tests.yml | 66 +++++++++++++++++++----- development/app/Dockerfile | 2 +- development/app/requirements.txt | 4 -- hack/kind_config.yaml | 2 + 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/.github/workflows/installation-tests.yml b/.github/workflows/installation-tests.yml index 719d2fc3..de3a654b 100644 --- a/.github/workflows/installation-tests.yml +++ b/.github/workflows/installation-tests.yml @@ -28,6 +28,28 @@ on: - 'go.sum' jobs: + build-workload-image: + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Build Workload + run: | + cd development/app + docker build -t aibrix/vllm-mock:nightly -f Dockerfile . + + - name: Save image + run: | + docker save aibrix/vllm-mock:nightly > vllm-mock.tar + + - name: Upload image artifact + uses: actions/upload-artifact@v4 + with: + name: vllm-mock-image + path: vllm-mock.tar + retention-days: 1 + build-images: runs-on: ubuntu-latest strategy: @@ -54,9 +76,9 @@ jobs: name: ${{ matrix.image }}-image path: ${{ matrix.image }}.tar retention-days: 1 - + installation-test: - needs: build-images + needs: [build-images, build-workload-image] runs-on: ubuntu-latest steps: - name: Check out code @@ -64,6 +86,25 @@ jobs: - name: Download all image artifacts uses: actions/download-artifact@v4 + + - name: Free disk space + run: |- + # https://github.com/actions/runner-images/issues/2840#issuecomment-2272410832 + # Remove software and language runtimes we're not using + sudo rm -rf \ + /opt/google/chrome \ + /opt/microsoft/msedge \ + /opt/microsoft/powershell \ + /opt/pipx \ + /usr/lib/mono \ + /usr/local/julia* \ + /usr/local/lib/android \ + /usr/local/lib/node_modules \ + /usr/local/share/chromium \ + /usr/local/share/powershell \ + /usr/share/dotnet \ + /usr/share/swift + df -h / - name: Install kind run: | @@ -86,17 +127,18 @@ jobs: - name: Load image into Kind run: | for image in controller-manager gateway-plugins runtime metadata-service; do - docker load < ${image}-image/${image}.tar - - # Retag the image - # This is for application integration, since it is not that easy to override all commits in manifest - # It is better to use nightly to represent the latest image - docker tag aibrix/${image}:${{ github.sha }} aibrix/${image}:nightly + docker load < ${image}-image/${image}.tar - kind load docker-image aibrix/${image}:${{ github.sha }} --name installation-test - kind load docker-image aibrix/${image}:nightly --name installation-test + # Retag the image + # This is for application integration, since it is not that easy to override all commits in manifest + # It is better to use nightly to represent the latest image + docker tag aibrix/${image}:${{ github.sha }} aibrix/${image}:nightly done + docker load < vllm-mock-image/vllm-mock.tar + kind load docker-image aibrix/controller-manager:${{ github.sha }} aibrix/gateway-plugins:${{ github.sha }} aibrix/metadata-service:${{ github.sha }} aibrix/runtime:${{ github.sha }} --name installation-test + kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly aibrix/vllm-mock:nightly --name installation-test + - name: Deploy controller with the built image run: | kubectl create -k config/dependency @@ -110,13 +152,11 @@ jobs: - name: Deploy Workload run: | cd development/app - docker build -t aibrix/vllm-mock:nightly -f Dockerfile . - kind load docker-image aibrix/vllm-mock:nightly --name installation-test kubectl create -k config/mock - name: Check pod status run: | - sleep 60s + sleep 30s kubectl get pods --all-namespaces kubectl wait pod --all --for=condition=ready --all-namespaces --timeout=300s diff --git a/development/app/Dockerfile b/development/app/Dockerfile index 783bf959..4f2808c5 100644 --- a/development/app/Dockerfile +++ b/development/app/Dockerfile @@ -13,7 +13,7 @@ WORKDIR /app COPY requirements.txt /app/ # Install dependencies -RUN apt update && apt install -y curl jq git git-lfs +RUN apt update && apt install -y curl jq git git-lfs && apt-get clean RUN pip install --no-cache-dir -r requirements.txt diff --git a/development/app/requirements.txt b/development/app/requirements.txt index efb2f1b1..e529cf21 100644 --- a/development/app/requirements.txt +++ b/development/app/requirements.txt @@ -1,15 +1,11 @@ flask Flask-HTTPAuth kubernetes -numpy pandas scikit-learn wandb -kaleido ddsketch plotly_express -matplotlib -seaborn fasteners transformers git+https://github.com/zhangjyr/vidur.git \ No newline at end of file diff --git a/hack/kind_config.yaml b/hack/kind_config.yaml index cebee6d5..2808f799 100644 --- a/hack/kind_config.yaml +++ b/hack/kind_config.yaml @@ -3,3 +3,5 @@ apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane - role: worker + - role: worker + - role: worker From f9de71510e6b6045b94f9a1ab05b0ebda803d043 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 4 Feb 2025 09:43:50 -0800 Subject: [PATCH 2/6] remove build-workload-image job --- .github/workflows/installation-tests.yml | 29 ++++-------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/.github/workflows/installation-tests.yml b/.github/workflows/installation-tests.yml index de3a654b..9eb032dd 100644 --- a/.github/workflows/installation-tests.yml +++ b/.github/workflows/installation-tests.yml @@ -28,28 +28,6 @@ on: - 'go.sum' jobs: - build-workload-image: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v4 - - - name: Build Workload - run: | - cd development/app - docker build -t aibrix/vllm-mock:nightly -f Dockerfile . - - - name: Save image - run: | - docker save aibrix/vllm-mock:nightly > vllm-mock.tar - - - name: Upload image artifact - uses: actions/upload-artifact@v4 - with: - name: vllm-mock-image - path: vllm-mock.tar - retention-days: 1 - build-images: runs-on: ubuntu-latest strategy: @@ -78,7 +56,7 @@ jobs: retention-days: 1 installation-test: - needs: [build-images, build-workload-image] + needs: [build-images] runs-on: ubuntu-latest steps: - name: Check out code @@ -135,9 +113,8 @@ jobs: docker tag aibrix/${image}:${{ github.sha }} aibrix/${image}:nightly done - docker load < vllm-mock-image/vllm-mock.tar kind load docker-image aibrix/controller-manager:${{ github.sha }} aibrix/gateway-plugins:${{ github.sha }} aibrix/metadata-service:${{ github.sha }} aibrix/runtime:${{ github.sha }} --name installation-test - kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly aibrix/vllm-mock:nightly --name installation-test + kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly --name installation-test - name: Deploy controller with the built image run: | @@ -152,6 +129,8 @@ jobs: - name: Deploy Workload run: | cd development/app + docker build -t aibrix/vllm-mock:nightly -f Dockerfile . + kind load docker-image aibrix/vllm-mock:nightly --name installation-test kubectl create -k config/mock - name: Check pod status From 8b02a731c3b93631fa5894cccc0decf633b54dbe Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 4 Feb 2025 10:57:50 -0800 Subject: [PATCH 3/6] add manual workflow trigger --- .github/workflows/lint-and-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint-and-tests.yml b/.github/workflows/lint-and-tests.yml index d0200e72..398d22ab 100644 --- a/.github/workflows/lint-and-tests.yml +++ b/.github/workflows/lint-and-tests.yml @@ -1,6 +1,7 @@ name: Linter and Unit Tests on: + workflow_dispatch: # Allows manual trigger push: branches: [ "main" ] paths: From 9b597f40edb951566216ffe7ffc67d816f9e9563 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Wed, 5 Feb 2025 10:01:04 -0800 Subject: [PATCH 4/6] remove blanks --- .github/workflows/installation-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/installation-tests.yml b/.github/workflows/installation-tests.yml index 9eb032dd..ff3ca328 100644 --- a/.github/workflows/installation-tests.yml +++ b/.github/workflows/installation-tests.yml @@ -54,7 +54,7 @@ jobs: name: ${{ matrix.image }}-image path: ${{ matrix.image }}.tar retention-days: 1 - + installation-test: needs: [build-images] runs-on: ubuntu-latest From 426d78bdd6bc69854e4cdd79f990271ae521c786 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Wed, 5 Feb 2025 10:21:45 -0800 Subject: [PATCH 5/6] parallelize make docker-build-all --- Makefile | 3 ++- test/run-e2e-tests.sh | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index aaee8460..fe63a8eb 100644 --- a/Makefile +++ b/Makefile @@ -137,7 +137,8 @@ define push_image endef .PHONY: docker-build-all -docker-build-all: docker-build-controller-manager docker-build-gateway-plugins docker-build-runtime docker-build-metadata-service ## Build all docker images +docker-build-all: + make -j 4 docker-build-controller-manager docker-build-gateway-plugins docker-build-runtime docker-build-metadata-service ## Build all docker images .PHONY: docker-build-controller-manager docker-build-controller-manager: ## Build docker image with the manager. diff --git a/test/run-e2e-tests.sh b/test/run-e2e-tests.sh index df0fd650..cac44392 100755 --- a/test/run-e2e-tests.sh +++ b/test/run-e2e-tests.sh @@ -54,10 +54,7 @@ fi # build images if [ -n "$INSTALL_AIBRIX" ]; then make docker-build-all - kind load docker-image aibrix/controller-manager:nightly - kind load docker-image aibrix/gateway-plugins:nightly - kind load docker-image aibrix/metadata-service:nightly - kind load docker-image aibrix/runtime:nightly + kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly kubectl create -k config/dependency kubectl create -k config/default From 3c6823d1dc36b9d82f057cb2f05a367accd668e4 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 6 Feb 2025 10:22:38 -0800 Subject: [PATCH 6/6] use nproc --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fe63a8eb..0e253756 100644 --- a/Makefile +++ b/Makefile @@ -138,7 +138,7 @@ endef .PHONY: docker-build-all docker-build-all: - make -j 4 docker-build-controller-manager docker-build-gateway-plugins docker-build-runtime docker-build-metadata-service ## Build all docker images + make -j $(nproc) docker-build-controller-manager docker-build-gateway-plugins docker-build-runtime docker-build-metadata-service ## Build all docker images .PHONY: docker-build-controller-manager docker-build-controller-manager: ## Build docker image with the manager. @@ -157,7 +157,8 @@ docker-build-metadata-service: ## Build docker image with the metadata-service. $(call build_and_tag,metadata-service,Dockerfile.metadata) .PHONY: docker-push-all -docker-push-all: docker-push-controller-manager docker-push-gateway-plugins docker-push-runtime docker-push-metadata-service ## Push all docker images +docker-push-all: + make -j $(nproc) docker-push-controller-manager docker-push-gateway-plugins docker-push-runtime docker-push-metadata-service ## Push all docker images .PHONY: docker-push-controller-manager docker-push-controller-manager: ## Push docker image with the manager.