Skip to content

Commit

Permalink
Add health check api interface for runtime (#451)
Browse files Browse the repository at this point in the history
* Add health check api interface for runtime

1. Add /healthz and /ready in runtime app server
2. Update existing test deployments to include the health check and necessary env

* Fix the format issue

* Remove model mount in the mocked app deployment

* Use night tag to test current image

* Update ci workflow to load nightly images
  • Loading branch information
Jeffwan authored Dec 2, 2024
1 parent f1659ab commit 808d71f
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/installation-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,14 @@ jobs:
run: |
for image in controller-manager plugins runtime users; do
docker load < ${image}-image/${image}.tar
# Retag the image
# This is for application integration, since it is not that easy to override all commits in manifest
# It is better to use nightly to represent the latest image
docker tag aibrix/${image}:${{ github.sha }} aibrix/${image}:nightly
kind load docker-image aibrix/${image}:${{ github.sha }} --name installation-test
kind load docker-image aibrix/${image}:nightly --name installation-test
done
- name: Deploy controller with the built image
Expand Down
17 changes: 17 additions & 0 deletions benchmarks/autoscaling/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: init-model
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.1
Expand Down
17 changes: 17 additions & 0 deletions benchmarks/gateway/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: init-model
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.1
Expand Down
26 changes: 26 additions & 0 deletions docs/development/app/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,32 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: aibrix-runtime
image: aibrix/runtime:nightly
command:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
---
# Debug only: Make sure pod can be visited from controller that deployed in mac.
apiVersion: v1
Expand Down
17 changes: 17 additions & 0 deletions docs/tutorial/runtime/runtime-hf-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: model-init
image: aibrix/runtime:latest
Expand Down
17 changes: 17 additions & 0 deletions docs/tutorial/runtime/runtime-s3-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: model-init
image: aibrix/runtime:latest
Expand Down
17 changes: 17 additions & 0 deletions docs/tutorial/runtime/runtime-tos-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: model-init
image: aibrix/runtime:latest
Expand Down
26 changes: 26 additions & 0 deletions python/aibrix/aibrix/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ def init_app_state(state: State) -> None:
)


def inference_engine_ready() -> bool:
try:
# Check if the engine is initialized
# Seems no need to check engine's status since main container has its own checks.
return (
True if envs.INFERENCE_ENGINE and envs.INFERENCE_ENGINE_ENDPOINT else False
)
except Exception as e:
logger.error(f"Readiness check failed: {e}")
return False


@router.post("/v1/lora_adapter/load")
async def load_lora_adapter(request: LoadLoraAdapterRequest, raw_request: Request):
response = await inference_engine(raw_request).load_lora_adapter(request)
Expand All @@ -108,6 +120,20 @@ async def unload_lora_adapter(request: UnloadLoraAdapterRequest, raw_request: Re
return Response(status_code=200, content=response)


@router.get("/healthz")
async def liveness_check():
# Simply return a 200 status for liveness check
return JSONResponse(content={"status": "ok"}, status_code=200)


@router.get("/ready")
async def readiness_check():
# Check if the inference engine is ready
if inference_engine_ready():
return JSONResponse(content={"status": "ready"}, status_code=200)
return JSONResponse(content={"status": "not ready"}, status_code=503)


def build_app(args: argparse.Namespace):
if args.enable_fastapi_docs:
app = FastAPI(debug=False)
Expand Down

0 comments on commit 808d71f

Please sign in to comment.