Skip to content

Commit

Permalink
feat: RAG service health check (#704)
Browse files Browse the repository at this point in the history
**Reason for Change**:
Health check of RAG Service

**Requirements**

- [ ] added unit tests and e2e tests (if applicable).

**Issue Fixed**:
<!-- If this PR fixes GitHub issue 4321, add "Fixes #4321" to the next
line. -->

**Notes for Reviewers**:

Signed-off-by: Bangqi Zhu <bangqizhu@microsoft.com>
Co-authored-by: Bangqi Zhu <bangqizhu@microsoft.com>
  • Loading branch information
bangqipropel and Bangqi Zhu authored Nov 20, 2024
1 parent 1517106 commit c25e7e9
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 11 deletions.
9 changes: 5 additions & 4 deletions pkg/ragengine/controllers/preset-rag.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,19 @@ import (

const (
ProbePath = "/health"
Port5000 = int32(5000)
Port5000 = 5000
)

var (
containerPorts = []corev1.ContainerPort{{
ContainerPort: Port5000,
ContainerPort: int32(Port5000),
},
}

livenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Port: intstr.FromInt(5000),
Port: intstr.FromInt(Port5000),
Path: ProbePath,
},
},
Expand All @@ -43,7 +43,7 @@ var (
readinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Port: intstr.FromInt(5000),
Port: intstr.FromInt(Port5000),
Path: ProbePath,
},
},
Expand Down Expand Up @@ -100,6 +100,7 @@ func CreatePresetRAG(ctx context.Context, ragEngineObj *kaitov1alpha1.RAGEngine,
commands := utils.ShellCmd("python3 main.py")
// TODO: provide this image
image := "mcr.microsoft.com/aks/kaito/kaito-rag-service:0.0.1"

imagePullSecretRefs := []corev1.LocalObjectReference{}

depObj := manifests.GenerateRAGDeploymentManifest(ctx, ragEngineObj, revisionNum, image, imagePullSecretRefs, *ragEngineObj.Spec.Compute.Count, commands,
Expand Down
19 changes: 17 additions & 2 deletions pkg/ragengine/services/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from embedding.huggingface_remote import RemoteHuggingFaceEmbedding
from fastapi import FastAPI, HTTPException
from models import (IndexRequest, ListDocumentsResponse,
QueryRequest, QueryResponse, DocumentResponse)
QueryRequest, QueryResponse, DocumentResponse, HealthStatus)
from vector_store.faiss_store import FaissVectorStoreHandler

from services.config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID
Expand All @@ -29,6 +29,21 @@
# Initialize RAG operations
rag_ops = VectorStoreManager(vector_store_handler)

@app.get("/health", response_model=HealthStatus)
async def health_check():
try:

if embedding_manager is None:
raise HTTPException(status_code=500, detail="Embedding manager not initialized")

if rag_ops is None:
raise HTTPException(status_code=500, detail="RAG operations not initialized")

return HealthStatus(status="Healthy")

except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

@app.post("/index", response_model=List[DocumentResponse])
async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling)
try:
Expand Down Expand Up @@ -59,4 +74,4 @@ async def list_all_indexed_documents():

if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
uvicorn.run(app, host="0.0.0.0", port=5000)
6 changes: 5 additions & 1 deletion pkg/ragengine/services/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,8 @@ class NodeWithScore(BaseModel):
class QueryResponse(BaseModel):
response: str
source_nodes: List[NodeWithScore]
metadata: Optional[dict] = None
metadata: Optional[dict] = None

class HealthStatus(BaseModel):
status: str
detail: Optional[str] = None
8 changes: 4 additions & 4 deletions pkg/workspace/inference/preset-inferences.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,20 @@ import (

const (
ProbePath = "/healthz"
Port5000 = int32(5000)
Port5000 = 5000
InferenceFile = "inference_api.py"
)

var (
containerPorts = []corev1.ContainerPort{{
ContainerPort: Port5000,
ContainerPort: int32(Port5000),
},
}

livenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Port: intstr.FromInt(5000),
Port: intstr.FromInt(Port5000),
Path: ProbePath,
},
},
Expand All @@ -48,7 +48,7 @@ var (
readinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Port: intstr.FromInt(5000),
Port: intstr.FromInt(Port5000),
Path: ProbePath,
},
},
Expand Down

0 comments on commit c25e7e9

Please sign in to comment.