diff --git a/.github/workflows/ragengine-e2e-workflow.yml b/.github/workflows/ragengine-e2e-workflow.yml index 0b5a59f79..b2d9c1235 100644 --- a/.github/workflows/ragengine-e2e-workflow.yml +++ b/.github/workflows/ragengine-e2e-workflow.yml @@ -100,6 +100,8 @@ jobs: - name: Run e2e test run: | + echo "HF_TOKEN1=$HF_TOKEN" + echo "HF_TOKEN=hf_m" AI_MODELS_REGISTRY=$E2E_ACR_AMRT_USERNAME.azurecr.io \ AI_MODELS_REGISTRY_SECRET=$E2E_AMRT_SECRET_NAME \ make kaito-ragengine-e2e-test diff --git a/pkg/ragengine/controllers/preset-rag.go b/pkg/ragengine/controllers/preset-rag.go index e7e810fff..9971a390b 100644 --- a/pkg/ragengine/controllers/preset-rag.go +++ b/pkg/ragengine/controllers/preset-rag.go @@ -99,7 +99,7 @@ func CreatePresetRAG(ctx context.Context, ragEngineObj *v1alpha1.RAGEngine, revi } commands := utils.ShellCmd("python3 main.py") - image := "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.1.2" //TODO: Change to the mcr image when release + image := "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.3.0" //TODO: Change to the mcr image when release imagePullSecretRefs := []corev1.LocalObjectReference{} diff --git a/pkg/ragengine/controllers/preset-rag_test.go b/pkg/ragengine/controllers/preset-rag_test.go index c48c58ba5..9822c3ef7 100644 --- a/pkg/ragengine/controllers/preset-rag_test.go +++ b/pkg/ragengine/controllers/preset-rag_test.go @@ -31,7 +31,7 @@ func TestCreatePresetRAG(t *testing.T) { c.On("Create", mock.IsType(context.TODO()), mock.IsType(&appsv1.Deployment{}), mock.Anything).Return(nil) }, expectedCmd: "/bin/sh -c python3 main.py", - expectedImage: "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.1.2", //TODO: Change to the mcr image when release + expectedImage: "aimodelsregistrytest.azurecr.io/kaito-rag-service:0.3.0", //TODO: Change to the mcr image when release }, } diff --git a/pkg/ragengine/manifests/manifests.go b/pkg/ragengine/manifests/manifests.go index 541093b4a..2dba0703d 100644 --- a/pkg/ragengine/manifests/manifests.go +++ b/pkg/ragengine/manifests/manifests.go @@ -158,8 +158,15 @@ func RAGSetEnv(ragEngineObj *kaitov1alpha1.RAGEngine) []corev1.EnvVar { if ragEngineObj.Spec.InferenceService.AccessSecret != "" { accessSecretEnv := corev1.EnvVar{ - Name: "INFERENCE_ACCESS_SECRET", - Value: ragEngineObj.Spec.InferenceService.AccessSecret, + Name: "LLM_ACCESS_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: ragEngineObj.Spec.InferenceService.AccessSecret, + }, + Key: "LLM_ACCESS_SECRET", + }, + }, } envs = append(envs, accessSecretEnv) } diff --git a/test/rage2e/rag_test.go b/test/rage2e/rag_test.go index ae48b36e6..dc22cd900 100644 --- a/test/rage2e/rag_test.go +++ b/test/rage2e/rag_test.go @@ -72,6 +72,24 @@ var _ = Describe("RAGEngine", func() { } }) + It("should create RAG with localembedding and huggingface API successfully", func() { + numOfReplica := 1 + + createAndValidateSecret() + ragengineObj := createLocalEmbeddingHFURLRAGEngine() + + defer cleanupResources(nil, ragengineObj) + + validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.ConditionTypeResourceStatus), "ragengineObj resource status to be ready") + validateAssociatedService(ragengineObj.ObjectMeta) + validateInferenceandRAGResource(ragengineObj.ObjectMeta, int32(numOfReplica), false) + validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.RAGEngineConditionTypeSucceeded), "ragengine to be ready") + + createIndexPod(ragengineObj) + //TODO: add the createAndValidateQueryPod here in the next PR + + }) + It("should create RAG with localembedding and kaito VLLM workspace successfully", func() { numOfReplica := 1 workspaceObj := createPhi3WorkspaceWithPresetPublicModeAndVLLM(numOfReplica) @@ -111,22 +129,6 @@ var _ = Describe("RAGEngine", func() { }) - It("should create RAG with localembedding and huggingface API successfully", func() { - numOfReplica := 1 - - ragengineObj := createLocalEmbeddingHFURLRAGEngine() - - defer cleanupResources(nil, ragengineObj) - - validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.ConditionTypeResourceStatus), "ragengineObj resource status to be ready") - validateAssociatedService(ragengineObj.ObjectMeta) - validateInferenceandRAGResource(ragengineObj.ObjectMeta, int32(numOfReplica), false) - validateRAGEngineCondition(ragengineObj, string(kaitov1alpha1.RAGEngineConditionTypeSucceeded), "ragengine to be ready") - - createIndexPod(ragengineObj) - //TODO: add the createAndValidateQueryPod here in the next PR - - }) }) func createPhi3WorkspaceWithPresetPublicModeAndVLLM(numOfReplica int) *kaitov1alpha1.Workspace { @@ -241,7 +243,8 @@ func createLocalEmbeddingHFURLRAGEngine() *kaitov1alpha1.RAGEngine { MatchLabels: map[string]string{"apps": "phi-3"}, }, &kaitov1alpha1.InferenceServiceSpec{ - URL: hfURL, + URL: hfURL, + AccessSecret: "huggingface-token", }, ) @@ -456,7 +459,7 @@ func createIndexPod(ragengineObj *kaitov1alpha1.RAGEngine) error { }, utils.PollTimeout, utils.PollInterval). Should(Succeed(), "Failed to create index pod") }) - time.Sleep(60 * time.Second) + time.Sleep(600 * time.Second) return nil } @@ -562,3 +565,32 @@ func GenerateQueryPodManifest(namespace, serviceName string) *v1.Pod { // TODO: return queryPod } + +func createAndValidateSecret() { + hfToken := os.Getenv("HF_TOKEN") + GinkgoWriter.Printf("HF_TOKEN %q \n", hfToken) + secret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "huggingface-token", + Namespace: namespaceName, + }, + Data: map[string][]byte{ + "LLM_ACCESS_SECRET": []byte(hfToken), + }, + Type: v1.SecretTypeOpaque, + } + By("Creating secret", func() { + Eventually(func() error { + return utils.TestingCluster.KubeClient.Create(ctx, secret, &client.CreateOptions{}) + }, utils.PollTimeout, utils.PollInterval). + Should(Succeed(), "Failed to create secret %s", secret.Name) + + By("Validating secret creation", func() { + err := utils.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{ + Namespace: secret.Namespace, + Name: secret.Name, + }, secret, &client.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + }) + }) +}