Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Package AI Runtime #118

Merged
merged 13 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Image URL to use all building/pushing image targets
AIBRIX_REPO ?= aibrix
IMG ?= controller:latest
PLUGINS_IMG ?= aibrix/plugins:v0.1.0
RUNTIME_IMG ?= ${AIBRIX_REPO}/runtime:latest
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.29.0

Expand Down Expand Up @@ -114,6 +116,10 @@ docker-build: ## Build docker image with the manager.
docker-build-plugins: ## Build docker image with the manager.
$(CONTAINER_TOOL) build -t ${PLUGINS_IMG} -f gateway.Dockerfile .

.PHONY: docker-build-runtime
docker-build-runtime: ## Build docker image with the AI Runime.
$(CONTAINER_TOOL) build -t ${RUNTIME_IMG} -f runtime.Dockerfile .

.PHONY: docker-push
docker-push: ## Push docker image with the manager.
$(CONTAINER_TOOL) push ${IMG}
Expand Down Expand Up @@ -141,6 +147,10 @@ build-installer: manifests generate kustomize ## Generate a consolidated YAML wi
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default > dist/install.yaml

.PHONY: docker-buildx-runtime
docker-buildx-runtime:
$(CONTAINER_TOOL) buildx build --push --platform=${PLATFORMS} -f runtime.Dockerfile . -t ${AIBRIX_REPO}/${RUNTIME_IMG}

##@ Deployment

ifndef ignore-not-found
Expand Down
21 changes: 21 additions & 0 deletions docs/tutorial/runtime/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# AIBrix Runtime Demo

## Model Download
AIBrix runtime support to download model from different sources.

- Download model from HuggingFace
```shell
kubectl apply -f runtime-hf-download.yaml
```

- Download model from AWS S3
```shell
kubectl apply -f runtime-s3-download.yaml
```

- Download model from TOS
```shell
kubectl apply -f runtime-tos-download.yaml
```

## Metrics Merge
146 changes: 146 additions & 0 deletions docs/tutorial/runtime/runtime-hf-download.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
name: aibricks-model-deepseek-coder-6.7b-instruct
namespace: default
spec:
replicas: 1
selector:
matchLabels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
spec:
containers:
- command:
- python3
- -m
- vllm.entrypoints.openai.api_server
- --host
- "0.0.0.0"
- --port
- "8000"
- --model
- /models/deepseek-ai/deepseek-coder-6.7b-instruct
- --served-model-name
- deepseek-ai/deepseek-coder-6.7b-instruct
- --distributed-executor-backend
- ray
- --trust-remote-code
image: vllm/vllm-openai:v0.5.5
imagePullPolicy: Always
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 90
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
name: vllm-openai
ports:
- containerPort: 8000
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 90
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
nvidia.com/gpu: "1"
requests:
nvidia.com/gpu: "1"
# We need to use dataset cache
volumeMounts:
- mountPath: /models
name: model-hostpath
- name: dshm
mountPath: /dev/shm
- name: aibrix-runtime
image: aibrix/runtime:latest
command:
- bash
- entrypoint.sh
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
initContainers:
- name: model-init
image: aibrix/runtime:latest
command:
- python
- -m
- aibrix.downloader
- --model-uri
- deepseek-ai/deepseek-coder-6.7b-instruct
- --local-dir
- /models/
env:
- name: DOWNLOADER_MODEL_NAME
value: deepseek-ai/deepseek-coder-6.7b-instruct
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
value: json, safetensors
- name: HF_TOKEN
value: <input your hf token, if needed>
- name: HF_ENDPOINT
value: <input your hf endpoint, if needed>
- name: HF_REVISION
value: <input your mdoel revision, if needed>
volumeMounts:
- mountPath: /models
name: model-hostpath
volumes:
- emptyDir: {}
name: model-hostpath
- name: dshm
emptyDir:
medium: Memory
sizeLimit: "10Gi"

---

apiVersion: v1
kind: Service
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
name: aibricks-model-deepseek-coder-6-7b-instruct-svc
namespace: default
spec:
ports:
- name: serve
port: 8000
protocol: TCP
targetPort: 8000
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
models.aibricks.ai: deepseek-coder-6.7b-instruct
type: ClusterIP
148 changes: 148 additions & 0 deletions docs/tutorial/runtime/runtime-s3-download.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
name: aibricks-model-deepseek-coder-6.7b-instruct
namespace: default
spec:
replicas: 1
selector:
matchLabels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
spec:
containers:
- command:
- python3
- -m
- vllm.entrypoints.openai.api_server
- --host
- "0.0.0.0"
- --port
- "8000"
- --model
- /models/deepseek-ai/deepseek-coder-6.7b-instruct
- --served-model-name
- deepseek-ai/deepseek-coder-6.7b-instruct
- --distributed-executor-backend
- ray
- --trust-remote-code
image: vllm/vllm-openai:v0.5.5
imagePullPolicy: Always
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 90
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
name: vllm-openai
ports:
- containerPort: 8000
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 90
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
nvidia.com/gpu: "1"
requests:
nvidia.com/gpu: "1"
# We need to use dataset cache
volumeMounts:
- mountPath: /models
name: model-hostpath
- name: dshm
mountPath: /dev/shm
- name: aibrix-runtime
image: aibrix/runtime:latest
command:
- bash
- entrypoint.sh
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
initContainers:
- name: model-init
image: aibrix/runtime:latest
command:
- python
- -m
- aibrix.downloader
- --model-uri
- s3://<input your s3 bucket name>/<input your s3 bucket path>
- --local-dir
- /models/
env:
- name: DOWNLOADER_MODEL_NAME
value: deepseek-ai/deepseek-coder-6.7b-instruct
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
value: json, safetensors
- name: AWS_ACCESS_KEY_ID
value: <input your s3 access key>
- name: AWS_SECRET_ACCESS_KEY
value: <input your s3 secret key>
- name: AWS_ENDPOINT_URL
value: <input your s3 endpoint>
- name: AWS_REGION
value: <input your s3 region>
volumeMounts:
- mountPath: /models
name: model-hostpath
volumes:
- emptyDir: {}
name: model-hostpath
- name: dshm
emptyDir:
medium: Memory
sizeLimit: "10Gi"

---

apiVersion: v1
kind: Service
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
name: aibricks-model-deepseek-coder-6-7b-instruct-svc
namespace: default
spec:
ports:
- name: serve
port: 8000
protocol: TCP
targetPort: 8000
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
models.aibricks.ai: deepseek-coder-6.7b-instruct
type: ClusterIP
Loading
Loading