Skip to content

Commit

Permalink
RAG remote service secret support
Browse files Browse the repository at this point in the history
Signed-off-by: Bangqi Zhu <bangqizhu@microsoft.com>
  • Loading branch information
Bangqi Zhu committed Feb 28, 2025
1 parent 0bbd6ef commit a31958f
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 29 deletions.
2 changes: 1 addition & 1 deletion pkg/ragengine/controllers/preset-rag.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func CreatePresetRAG(ctx context.Context, ragEngineObj *v1alpha1.RAGEngine, revi
}
commands := utils.ShellCmd("python3 main.py")

image := "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.1.2" //TODO: Change to the mcr image when release
image := "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.3.0" //TODO: Change to the mcr image when release

imagePullSecretRefs := []corev1.LocalObjectReference{}

Expand Down
2 changes: 1 addition & 1 deletion pkg/ragengine/controllers/preset-rag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func TestCreatePresetRAG(t *testing.T) {
c.On("Create", mock.IsType(context.TODO()), mock.IsType(&appsv1.Deployment{}), mock.Anything).Return(nil)
},
expectedCmd: "/bin/sh -c python3 main.py",
expectedImage: "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.1.2", //TODO: Change to the mcr image when release
expectedImage: "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.3.0", //TODO: Change to the mcr image when release
},
}

Expand Down
11 changes: 9 additions & 2 deletions pkg/ragengine/manifests/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,15 @@ func RAGSetEnv(ragEngineObj *kaitov1alpha1.RAGEngine) []corev1.EnvVar {

if ragEngineObj.Spec.InferenceService.AccessSecret != "" {
accessSecretEnv := corev1.EnvVar{
Name: "INFERENCE_ACCESS_SECRET",
Value: ragEngineObj.Spec.InferenceService.AccessSecret,
Name: "LLM_ACCESS_SECRET",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: ragEngineObj.Spec.InferenceService.AccessSecret,
},
Key: "LLM_ACCESS_SECRET",
},
},
}
envs = append(envs, accessSecretEnv)
}
Expand Down
83 changes: 58 additions & 25 deletions test/rage2e/rag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,25 @@ var _ = Describe("RAGEngine", func() {
}
})

It("should create RAG with localembedding and huggingface API successfully", func() {
numOfReplica := 1

createAndValidateSecret()
ragengineObj := createLocalEmbeddingHFURLRAGEngine()

defer cleanupResources(nil, ragengineObj)

validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.ConditionTypeResourceStatus), "ragengineObj resource status to be ready")
validateAssociatedService(ragengineObj.ObjectMeta)
validateInferenceandRAGResource(ragengineObj.ObjectMeta, int32(numOfReplica), false)
validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.RAGEngineConditionTypeSucceeded), "ragengine to be ready")

createIndexPod(ragengineObj)
searchQuerySuccess := "\\n\\nKaito is an operator that is designed to automate the AI/ML model inference or tuning workload in a Kubernetes cluster."
createAndValidateQueryPod(ragengineObj, searchQuerySuccess)

})

It("should create RAG with localembedding and kaito VLLM workspace successfully", func() {
numOfReplica := 1
workspaceObj := createPhi3WorkspaceWithPresetPublicModeAndVLLM(numOfReplica)
Expand Down Expand Up @@ -107,26 +126,11 @@ var _ = Describe("RAGEngine", func() {
validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.RAGEngineConditionTypeSucceeded), "ragengine to be ready")

createIndexPod(ragengineObj)
createAndValidateQueryPod(ragengineObj)
searchQuerySuccess := "\\nKaito is an operator that automates the AI/ML model inference or tuning workload in a Kubernetes cluster.\\n\\n\\n"
createAndValidateQueryPod(ragengineObj, searchQuerySuccess)

})

It("should create RAG with localembedding and huggingface API successfully", func() {
numOfReplica := 1

ragengineObj := createLocalEmbeddingHFURLRAGEngine()

defer cleanupResources(nil, ragengineObj)

validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.ConditionTypeResourceStatus), "ragengineObj resource status to be ready")
validateAssociatedService(ragengineObj.ObjectMeta)
validateInferenceandRAGResource(ragengineObj.ObjectMeta, int32(numOfReplica), false)
validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.RAGEngineConditionTypeSucceeded), "ragengine to be ready")

createIndexPod(ragengineObj)
//TODO: add the createAndValidateQueryPod here in the next PR

})
})

func createPhi3WorkspaceWithPresetPublicModeAndVLLM(numOfReplica int) *kaitov1alpha1.Workspace {
Expand Down Expand Up @@ -241,7 +245,8 @@ func createLocalEmbeddingHFURLRAGEngine() *kaitov1alpha1.RAGEngine {
MatchLabels: map[string]string{"apps": "phi-3"},
},
&kaitov1alpha1.InferenceServiceSpec{
URL: hfURL,
URL: hfURL,
AccessSecret: "huggingface-token",
},
)

Expand Down Expand Up @@ -456,20 +461,20 @@ func createIndexPod(ragengineObj *kaitov1alpha1.RAGEngine) error {
}, utils.PollTimeout, utils.PollInterval).
Should(Succeed(), "Failed to create index pod")
})
time.Sleep(60 * time.Second)
time.Sleep(600 * time.Second)

return nil
}

func createAndValidateQueryPod(ragengineObj *kaitov1alpha1.RAGEngine) error {
func createAndValidateQueryPod(ragengineObj *kaitov1alpha1.RAGEngine, expectedSearchQueries string) error {
By("Creating query pod", func() {
pod := GenerateQueryPodManifest(ragengineObj.Namespace, ragengineObj.Name)
Eventually(func() error {
return utils.TestingCluster.KubeClient.Create(ctx, pod, &client.CreateOptions{})
}, utils.PollTimeout, utils.PollInterval).
Should(Succeed(), "Failed to create query pod")
})
time.Sleep(30 * time.Second)

By("Checking the query logs", func() {
Eventually(func() bool {
coreClient, err := utils.GetK8sClientset()
Expand All @@ -484,9 +489,7 @@ func createAndValidateQueryPod(ragengineObj *kaitov1alpha1.RAGEngine) error {
return false
}

searchQuerySuccess := "\\nKaito is an operator that automates the AI/ML model inference or tuning workload in a Kubernetes cluster.\\n\\n\\n"

return strings.Contains(logs, searchQuerySuccess)
return strings.Contains(logs, expectedSearchQueries)
}, 2*time.Minute, utils.PollInterval).Should(BeTrue(), "Failed to wait for query logs to be ready")
})

Expand Down Expand Up @@ -539,7 +542,8 @@ func GenerateQueryPodManifest(namespace, serviceName string) *v1.Pod { // TODO:
"llm_params": {
"max_tokens": 50,
"temperature": 0
}
},
"rerank_params": {"top_n": 3}
}'`

queryPod := &v1.Pod{
Expand All @@ -562,3 +566,32 @@ func GenerateQueryPodManifest(namespace, serviceName string) *v1.Pod { // TODO:

return queryPod
}

func createAndValidateSecret() {
hfToken := os.Getenv("HF_TOKEN")
GinkgoWriter.Printf("HF_TOKEN %q \n", hfToken)
secret := &v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "huggingface-token",
Namespace: namespaceName,
},
Data: map[string][]byte{
"LLM_ACCESS_SECRET": []byte(hfToken),
},
Type: v1.SecretTypeOpaque,
}
By("Creating secret", func() {
Eventually(func() error {
return utils.TestingCluster.KubeClient.Create(ctx, secret, &client.CreateOptions{})
}, utils.PollTimeout, utils.PollInterval).
Should(Succeed(), "Failed to create secret %s", secret.Name)

By("Validating secret creation", func() {
err := utils.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: secret.Namespace,
Name: secret.Name,
}, secret, &client.GetOptions{})
Expect(err).NotTo(HaveOccurred())
})
})
}

0 comments on commit a31958f

Please sign in to comment.