Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add health check api interface for runtime #451

Merged
merged 5 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/installation-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,14 @@ jobs:
run: |
for image in controller-manager plugins runtime users; do
docker load < ${image}-image/${image}.tar
# Retag the image
# This is for application integration, since it is not that easy to override all commits in manifest
# It is better to use nightly to represent the latest image
docker tag aibrix/${image}:${{ github.sha }} aibrix/${image}:nightly
kind load docker-image aibrix/${image}:${{ github.sha }} --name installation-test
kind load docker-image aibrix/${image}:nightly --name installation-test
done
- name: Deploy controller with the built image
Expand Down
17 changes: 17 additions & 0 deletions benchmarks/autoscaling/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: init-model
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.1
Expand Down
17 changes: 17 additions & 0 deletions benchmarks/gateway/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: init-model
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime:v0.1.1
Expand Down
26 changes: 26 additions & 0 deletions docs/development/app/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,32 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: aibrix-runtime
image: aibrix/runtime:nightly
command:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
---
# Debug only: Make sure pod can be visited from controller that deployed in mac.
apiVersion: v1
Expand Down
17 changes: 17 additions & 0 deletions docs/tutorial/runtime/runtime-hf-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: model-init
image: aibrix/runtime:latest
Expand Down
17 changes: 17 additions & 0 deletions docs/tutorial/runtime/runtime-s3-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: model-init
image: aibrix/runtime:latest
Expand Down
17 changes: 17 additions & 0 deletions docs/tutorial/runtime/runtime-tos-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ spec:
- aibrix_runtime
- --port
- "8080"
env:
- name: INFERENCE_ENGINE
value: vllm
- name: INFERENCE_ENGINE_ENDPOINT
value: http://localhost:8000
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /models
name: model-hostpath
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 3
periodSeconds: 2
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
initContainers:
- name: model-init
image: aibrix/runtime:latest
Expand Down
26 changes: 26 additions & 0 deletions python/aibrix/aibrix/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ def init_app_state(state: State) -> None:
)


def inference_engine_ready() -> bool:
try:
# Check if the engine is initialized
# Seems no need to check engine's status since main container has its own checks.
return (
True if envs.INFERENCE_ENGINE and envs.INFERENCE_ENGINE_ENDPOINT else False
)
except Exception as e:
logger.error(f"Readiness check failed: {e}")
return False


@router.post("/v1/lora_adapter/load")
async def load_lora_adapter(request: LoadLoraAdapterRequest, raw_request: Request):
response = await inference_engine(raw_request).load_lora_adapter(request)
Expand All @@ -108,6 +120,20 @@ async def unload_lora_adapter(request: UnloadLoraAdapterRequest, raw_request: Re
return Response(status_code=200, content=response)


@router.get("/healthz")
async def liveness_check():
# Simply return a 200 status for liveness check
return JSONResponse(content={"status": "ok"}, status_code=200)


@router.get("/ready")
async def readiness_check():
# Check if the inference engine is ready
if inference_engine_ready():
return JSONResponse(content={"status": "ready"}, status_code=200)
return JSONResponse(content={"status": "not ready"}, status_code=503)


def build_app(args: argparse.Namespace):
if args.enable_fastapi_docs:
app = FastAPI(debug=False)
Expand Down