vllm-project · Jeffwan · Nov 12, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 10, 2024
diff --git a/.github/workflows/docker-build-images.yml b/.github/workflows/docker-build-images.yml
@@ -6,6 +6,9 @@ on:
 
 jobs:
   build:
+    # This prevents the job from running as other steps cover its functionality.
+    # We use 'if: false' to keep the file for future reference without deleting it.
+    if: false
     runs-on: ubuntu-latest
     steps:
     - name: Check out code

diff --git a/.github/workflows/release-build.yaml b/.github/workflows/release-build.yaml
@@ -24,16 +24,34 @@ jobs:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
 
-      # Build container images
+      # Log in to Github Registry
+      - name: Login to the Container registry
+        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Build container images with docker registry namespace
       - name: Build Container Images
         run: |
           GIT_COMMIT_HASH=${{ github.ref_name }} make docker-build-all
 
-      # Push container image to container registry
+      # Push container image to DockerHub
       - name: Push container image to container registry
         run: |
           GIT_COMMIT_HASH=${{ github.ref_name }} make docker-push-all
 
+      # Build container images with Github registry namespace
+      - name: Build Container Images with Github Container Registry prefix
+        run: |
+          GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
+
+      # Push container image to Github container registry
+      - name: Push Container Images to Github Container Registry
+        run: |
+          GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
+
   python-wheel-release:
     runs-on: ubuntu-latest
     strategy:

diff --git a/.gitignore b/.gitignore
@@ -33,3 +33,8 @@ __pycache__
 docs/build/
 !**/*.template.rst
 
+
+# benchmark logs, result and figs
+benchmarks/autoscaling/logs
+benchmarks/autoscaling/output_stats
+benchmarks/autoscaling/workload_plot
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -10,6 +10,19 @@ build:
   os: ubuntu-22.04
   tools:
     python: "3.10"
+  jobs:
+    post_checkout:
+      # Cancel building pull requests when there aren't changed in the docs directory or YAML file.
+      # You can add any other files or directories that you'd like here as well,
+      # like your docs requirements file, or other files that will change your docs build.
+      #
+      # If there are no changes (git diff exits with 0) we force the command to return with 183.
+      # This is a special exit code on Read the Docs that will cancel the build immediately.
+      - |
+        if [ "$READTHEDOCS_VERSION_TYPE" = "external" ] && git diff --quiet origin/main -- docs/ .readthedocs.yaml;
+        then
+          exit 183;
+        fi
 
 # Build documentation in the "docs/" directory with Sphinx
 sphinx:

diff --git a/README.md b/README.md
@@ -34,10 +34,10 @@ kubectl create -k config/default
 Install stable distribution
 ```shell
 # Install component dependencies
-kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.1.0-rc.1"
+kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.1.0-rc.4"
 
 # Install aibrix components
-kubectl create -k "github.com/aibrix/aibrix/config/default?ref=v0.1.0-rc.1"
+kubectl create -k "github.com/aibrix/aibrix/config/default?ref=v0.1.0-rc.4"
 ```
 
 ## Documentation

diff --git a/benchmarks/autoscaling/7b.yaml b/benchmarks/autoscaling/7b.yaml
@@ -0,0 +1,174 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
+    model.aibrix.ai/port: "8000"
+  name: aibrix-model-deepseek-coder-7b-instruct
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      model.aibrix.ai/name: deepseek-coder-7b-instruct
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8000"
+        prometheus.io/path: "/metrics"
+      labels:
+        model.aibrix.ai/name: deepseek-coder-7b-instruct
+    spec:
+      containers:
+        - command:
+            - python3
+            - -m
+            - vllm.entrypoints.openai.api_server
+            - --host
+            - "0.0.0.0"
+            - --port
+            - "8000"
+            - --model
+            - /models/deepseek-coder-6.7b-instruct
+            - --served-model-name
+            - deepseek-coder-7b-instruct
+            - --trust-remote-code
+            - --max-model-len
+            - "10240"
+            - --api-key
+            - sk-kFJ12nKsFVfVmGpj3QzX65s4RbN2xJqWzPYCjYu7wT3BlbLi
+          image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.6.2-distributed
+          imagePullPolicy: Always
+          livenessProbe:
+            failureThreshold: 3
+            httpGet:
+              path: /health
+              port: 8000
+              scheme: HTTP
+            initialDelaySeconds: 90
+            periodSeconds: 5
+            successThreshold: 1
+            timeoutSeconds: 1
+          name: vllm-openai
+          ports:
+            - containerPort: 8000
+              protocol: TCP
+          readinessProbe:
+            failureThreshold: 3
+            httpGet:
+              path: /health
+              port: 8000
+              scheme: HTTP
+            initialDelaySeconds: 90
+            periodSeconds: 5
+            successThreshold: 1
+            timeoutSeconds: 1
+          resources:
+            limits:
+              nvidia.com/gpu: "1"
+            requests:
+              nvidia.com/gpu: "1"
+          # We need to use dataset cache
+          volumeMounts:
+            - mountPath: /models
+              name: model-hostpath
+            - name: dshm
+              mountPath: /dev/shm
+        - name: aibrix-runtime
+          image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.0-rc.4
+          command:
+            - gunicorn
+            - -b
+            - :8080
+            - app:app
+            - -k
+            - uvicorn.workers.UvicornWorker
+          ports:
+            - containerPort: 8080
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /models
+              name: model-hostpath
+      initContainers:
+        - name: init-model
+          image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.0-rc.4
+          command:
+            - python
+            - -m
+            - aibrix.downloader
+            - --model-uri
+            - tos://aibrix-artifact-testing/models/deepseek-ai/deepseek-coder-6.7b-instruct/
+            - --local-dir
+            - /models/
+          env:
+            - name: DOWNLOADER_MODEL_NAME
+              value: deepseek-coder-6.7b-instruct
+            - name: DOWNLOADER_NUM_THREADS
+              value: "16"
+            - name: DOWNLOADER_ALLOW_FILE_SUFFIX
+              value: json, safetensors
+            - name: TOS_ACCESS_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: tos-credential
+                  key: TOS_ACCESS_KEY
+            - name: TOS_SECRET_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: tos-credential
+                  key: TOS_SECRET_KEY
+            - name: TOS_ENDPOINT
+              value: tos-cn-beijing.ivolces.com
+            - name: TOS_REGION
+              value: cn-beijing
+          volumeMounts:
+            - mountPath: /models
+              name: model-hostpath
+      volumes:
+        - name: model-hostpath
+          hostPath:
+            path: /root/models
+            type: DirectoryOrCreate
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: "4Gi"
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  - key: machine.cluster.vke.volcengine.com/gpu-name
+                    operator: In
+                    values:
+                      - NVIDIA-A10
+
+---
+
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
+    prometheus-discovery: "true"
+  annotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/port: "8000"
+  name: deepseek-coder-7b-instruct
+  namespace: default
+spec:
+  ports:
+    - name: serve
+      port: 8000
+      protocol: TCP
+      targetPort: 8000
+    - name: http
+      port: 8080
+      protocol: TCP
+      targetPort: 8080
+  selector:
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
+  type: LoadBalancer
diff --git a/benchmarks/autoscaling/apa.yaml b/benchmarks/autoscaling/apa.yaml
@@ -0,0 +1,18 @@
+apiVersion: autoscaling.aibrix.ai/v1alpha1
+kind: PodAutoscaler
+metadata:
+  name: deepseek-coder-7b-instruct-apa
+  labels:
+    app.kubernetes.io/name: aibrix
+    app.kubernetes.io/managed-by: kustomize
+  namespace: default
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: aibrix-model-deepseek-coder-7b-instruct
+  minReplicas: 1
+  maxReplicas: 10
+  targetMetric: "vllm:gpu_cache_usage_perc"
+  targetValue: "50"
+  scalingStrategy: "APA"