chore: e2e test for adapters and validation of adapters (#483)

**Reason for Change**:  **Requirements** - [ ] added unit tests and e2e tests (if applicable). **Issue Fixed**:  **Notes for Reviewers**: --------- Signed-off-by: Bangqi Zhu <bangqizhu@microsoft.com> Co-authored-by: Bangqi Zhu <bangqizhu@microsoft.com>
kaito-project · Jun 29, 2024 · f613679 · f613679
1 parent 5242169
commit f613679
Show file tree

Hide file tree

Showing 11 changed files with 250 additions and 41 deletions.
diff --git a/.github/workflows/e2e-workflow.yml b/.github/workflows/e2e-workflow.yml
@@ -127,6 +127,14 @@ jobs:
         env:
           REGISTRY: ${{ env.REGISTRY }}
           VERSION: ${{ env.VERSION }}
+
+      - name: build adapter image
+        if: ${{ !inputs.isRelease }}
+        shell: bash
+        run: |
+          make docker-build-adapter
+        env:
+          REGISTRY: ${{ env.REGISTRY }}
 
       - name: create cluster
         shell: bash
@@ -200,6 +208,7 @@ jobs:
         env:
           AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
           RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }}
+          REGISTRY: ${{ env.REGISTRY }}
           AI_MODELS_REGISTRY: ${{ secrets.E2E_ACR_AMRT_USERNAME }}.azurecr.io
           AI_MODELS_REGISTRY_SECRET: ${{ secrets.E2E_AMRT_SECRET_NAME }}
 

diff --git a/Makefile b/Makefile
@@ -190,6 +190,15 @@ docker-build-kaito: docker-buildx
 		--pull \
 		--tag $(REGISTRY)/$(IMG_NAME):$(IMG_TAG) .
 
+.PHONY: docker-build-adapter
+docker-build-adapter: docker-buildx
+	docker buildx build \
+		--file ./docker/adapter/Dockerfile \
+		--output=$(OUTPUT_TYPE) \
+		--platform="linux/$(ARCH)" \
+		--pull \
+		--tag $(REGISTRY)/e2e-adapter:0.0.1 .
+
 ##@ Deployment
 
 ifndef ignore-not-found

diff --git a/docker/adapter/Dockerfile b/docker/adapter/Dockerfile
@@ -0,0 +1,6 @@
+FROM busybox:latest
+
+RUN mkdir -p /data
+
+COPY docker/adapter/adapter_config.json /data/
+COPY docker/adapter/adapter_model.safetensors /data/
diff --git a/docker/adapter/README.md b/docker/adapter/README.md
@@ -0,0 +1,23 @@
+# E2E Adapter Test Files
+
+## Overview
+
+These files are part of a set used for conducting end-to-end (E2E) testing of an adapter component. The Dockerfile builds an image incorporating the configuration and model files, which is then used within an Init Container for testing. The adapter is training from [dolly-15k-oai-style](https://huggingface.co/datasets/philschmid/dolly-15k-oai-style) dataset
+and was trained using default [qlora-params.yaml](../../charts/kaito/workspace/templates/qlora-params.yaml) 
+
+## Files
+
+- **Dockerfile**: Builds the Docker image for the E2E tests.
+
+- **adapter_config.json**: Contains settings for configuring the adapter in the test environment.
+
+- **adapter_model.safetensors**: Provides the adapter's machine learning model in SafeTensors format.
+
+## Usage
+
+Build the Docker image with the following command:
+
+```bash
+
+make docker-build-adapter
+
diff --git a/docker/adapter/adapter_config.json b/docker/adapter/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": {
+    "base_model_class": "FalconForCausalLM",
+    "parent_library": "transformers.models.falcon.modeling_falcon"
+  },
+  "base_model_name_or_path": "/workspace/tfs/weights",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": null,
+  "use_rslora": false
+}
diff --git a/docker/adapter/adapter_model.safetensors b/docker/adapter/adapter_model.safetensors
diff --git a/presets/inference/text-generation/inference_api.py b/presets/inference/text-generation/inference_api.py
@@ -125,6 +125,10 @@ def __post_init__(self): # validate parameters
         # To avoid any potential future operations that use non-combined adapters
         for adapter in adapter_names:
             model.delete_adapter(adapter)
+
+        active_adapters = model.active_adapters
+        if len(active_adapters) != 1 or active_adapters[0] != "combined_adapter":
+            raise ValueError(f"Adpaters is input but not merged correctlly")
     else:
         print("Warning: Did not find any valid adapters mounted, using base model")
         model = base_model

diff --git a/test/e2e/inference_with_adapters.go b/test/e2e/inference_with_adapters.go
@@ -0,0 +1,103 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package e2e
+
+import (
+	"time"
+
+	kaitov1alpha1 "github.com/azure/kaito/api/v1alpha1"
+	"github.com/azure/kaito/test/e2e/utils"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+var DefaultStrength = "1.0"
+
+var imageName = "e2e-adapter"
+var fullImageName = utils.GetEnv("REGISTRY") + "/" + imageName + ":0.0.1"
+
+var validAdapters = []kaitov1alpha1.AdapterSpec{
+	{
+		Source: &kaitov1alpha1.DataSource{
+			Name:  imageName,
+			Image: fullImageName,
+		},
+		Strength: &DefaultStrength,
+	},
+}
+
+var expectedInitContainers = []corev1.Container{
+	{
+		Name:  imageName,
+		Image: fullImageName,
+	},
+}
+
+func validateAdapters(workspaceObj *kaitov1alpha1.Workspace, expectedInitContainers []corev1.Container) {
+	By("Checking the Adapters", func() {
+		Eventually(func() bool {
+			var err error
+			var initContainers []corev1.Container
+
+			dep := &appsv1.Deployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      workspaceObj.Name,
+					Namespace: workspaceObj.Namespace,
+				},
+			}
+			err = TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
+				Namespace: workspaceObj.Namespace,
+				Name:      workspaceObj.Name,
+			}, dep)
+			initContainers = dep.Spec.Template.Spec.InitContainers
+
+			if err != nil {
+				GinkgoWriter.Printf("Error fetching resource: %v\n", err)
+				return false
+			}
+
+			if len(initContainers) != len(expectedInitContainers) {
+				return false
+			}
+			initContainer, expectedInitContainer := initContainers[0], expectedInitContainers[0]
+
+			// GinkgoWriter.Printf("Resource '%s' not ready. Ready replicas: %d\n", workspaceObj.Name, readyReplicas)
+			return initContainer.Image == expectedInitContainer.Image && initContainer.Name == expectedInitContainer.Name
+		}, 20*time.Minute, utils.PollInterval).Should(BeTrue(), "Failed to wait for adapter resource to be ready")
+	})
+}
+
+var _ = Describe("Workspace Preset", func() {
+	BeforeEach(func() {
+		loadTestEnvVars()
+
+		loadModelVersions()
+	})
+
+	It("should create a falcon workspace with adapter", func() {
+		numOfNode := 1
+		workspaceObj := createCustomWorkspaceWithAdapter(numOfNode)
+
+		defer cleanupResources(workspaceObj)
+		time.Sleep(30 * time.Second)
+
+		validateMachineCreation(workspaceObj, numOfNode)
+		validateResourceStatus(workspaceObj)
+
+		time.Sleep(30 * time.Second)
+
+		validateAssociatedService(workspaceObj)
+
+		validateInferenceResource(workspaceObj, int32(numOfNode), false)
+
+		validateWorkspaceReadiness(workspaceObj)
+
+		validateAdapters(workspaceObj, expectedInitContainers)
+	})
+
+})
diff --git a/test/e2e/preset_test.go b/test/e2e/preset_test.go
@@ -34,17 +34,59 @@ const (
 	PresetMistral7BInstructModel = "mistral-7b-instruct"
 	PresetPhi2Model              = "phi-2"
 	PresetPhi3Mini4kModel        = "phi-3-mini-4k-instruct"
- 	PresetPhi3Mini128kModel      = "phi-3-mini-128k-instruct"
+	PresetPhi3Mini128kModel      = "phi-3-mini-128k-instruct"
 )
 
+func loadTestEnvVars() {
+	var err error
+	runLlama13B, err = strconv.ParseBool(os.Getenv("RUN_LLAMA_13B"))
+	if err != nil {
+		fmt.Print("Error: RUN_LLAMA_13B ENV Variable not set")
+		runLlama13B = false
+	}
+
+	aiModelsRegistry = utils.GetEnv("AI_MODELS_REGISTRY")
+	aiModelsRegistrySecret = utils.GetEnv("AI_MODELS_REGISTRY_SECRET")
+	supportedModelsYamlPath = utils.GetEnv("SUPPORTED_MODELS_YAML_PATH")
+}
+
+func loadModelVersions() {
+	// Load stable model versions
+	configs, err := utils.GetModelConfigInfo(supportedModelsYamlPath)
+	if err != nil {
+		fmt.Printf("Failed to load model configs: %v\n", err)
+		os.Exit(1)
+	}
+
+	modelInfo, err = utils.ExtractModelVersion(configs)
+	if err != nil {
+		fmt.Printf("Failed to extract stable model versions: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+func createCustomWorkspaceWithAdapter(numOfNode int) *kaitov1alpha1.Workspace {
+	workspaceObj := &kaitov1alpha1.Workspace{}
+	By("Creating a workspace with adapter", func() {
+		uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
+		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
+			&metav1.LabelSelector{
+				MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon"},
+			}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, validAdapters)
+
+		createAndValidateWorkspace(workspaceObj)
+	})
+	return workspaceObj
+}
+
 func createFalconWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
 	workspaceObj := &kaitov1alpha1.Workspace{}
 	By("Creating a workspace CR with Falcon 7B preset public mode", func() {
 		uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
 		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
 			&metav1.LabelSelector{
 				MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon"},
-			}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)
+			}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, nil)
 
 		createAndValidateWorkspace(workspaceObj)
 	})
@@ -58,7 +100,7 @@ func createMistralWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Wo
 		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
 			&metav1.LabelSelector{
 				MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-mistral"},
-			}, nil, PresetMistral7BInstructModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)
+			}, nil, PresetMistral7BInstructModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, nil)
 
 		createAndValidateWorkspace(workspaceObj)
 	})
@@ -72,7 +114,7 @@ func createPhi2WorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Works
 		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC6s_v3",
 			&metav1.LabelSelector{
 				MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-phi-2"},
-			}, nil, PresetPhi2Model, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)
+			}, nil, PresetPhi2Model, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, nil)
 
 		createAndValidateWorkspace(workspaceObj)
 	})
@@ -86,7 +128,7 @@ func createLlama7BWorkspaceWithPresetPrivateMode(registry, registrySecret, image
 		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2AChat, imageVersion),
 			numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{
 				MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-7b"},
-			}, nil, PresetLlama2AChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil)
+			}, nil, PresetLlama2AChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil, nil)
 
 		createAndValidateWorkspace(workspaceObj)
 	})
@@ -100,7 +142,7 @@ func createLlama13BWorkspaceWithPresetPrivateMode(registry, registrySecret, imag
 		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2BChat, imageVersion),
 			numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{
 				MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-13b"},
-			}, nil, PresetLlama2BChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil)
+			}, nil, PresetLlama2BChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil, nil)
 
 		createAndValidateWorkspace(workspaceObj)
 	})
@@ -114,7 +156,7 @@ func createCustomWorkspaceWithPresetCustomMode(imageName string, numOfNode int)
 		workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "",
 			numOfNode, "Standard_D4s_v3", &metav1.LabelSelector{
 				MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-custom"},
-			}, nil, "", utils.InferenceModeCustomTemplate, nil, utils.GeneratePodTemplate(uniqueID, namespaceName, imageName, nil))
+			}, nil, "", utils.InferenceModeCustomTemplate, nil, utils.GeneratePodTemplate(uniqueID, namespaceName, imageName, nil), nil)
 
 		createAndValidateWorkspace(workspaceObj)
 	})
@@ -344,35 +386,14 @@ var modelInfo map[string]string
 
 var _ = Describe("Workspace Preset", func() {
 	BeforeEach(func() {
-		var err error
-		runLlama13B, err = strconv.ParseBool(os.Getenv("RUN_LLAMA_13B"))
-		if err != nil {
-			// Handle error or set a default value
-			fmt.Print("Error: RUN_LLAMA_13B ENV Variable not set")
-			runLlama13B = false
-		}
-
-		aiModelsRegistry = utils.GetEnv("AI_MODELS_REGISTRY")
-		aiModelsRegistrySecret = utils.GetEnv("AI_MODELS_REGISTRY_SECRET")
-		supportedModelsYamlPath = utils.GetEnv("SUPPORTED_MODELS_YAML_PATH")
+		loadTestEnvVars()
 
-		// Load stable model versions
-		configs, err := utils.GetModelConfigInfo(supportedModelsYamlPath)
-		if err != nil {
-			fmt.Printf("Failed to load model configs: %v\n", err)
-			os.Exit(1)
-		}
-
-		modelInfo, err = utils.ExtractModelVersion(configs)
-		if err != nil {
-			fmt.Printf("Failed to extract stable model versions: %v\n", err)
-			os.Exit(1)
-		}
+		loadModelVersions()
 	})
 
-	It("should create a falcon workspace with preset public mode successfully", func() {
+	It("should create a mistral workspace with preset public mode successfully", func() {
 		numOfNode := 1
-		workspaceObj := createFalconWorkspaceWithPresetPublicMode(numOfNode)
+		workspaceObj := createMistralWorkspaceWithPresetPublicMode(numOfNode)
 
 		defer cleanupResources(workspaceObj)
 		time.Sleep(30 * time.Second)
@@ -389,9 +410,9 @@ var _ = Describe("Workspace Preset", func() {
 		validateWorkspaceReadiness(workspaceObj)
 	})
 
-	It("should create a mistral workspace with preset public mode successfully", func() {
+	It("should create a Phi-2 workspace with preset public mode successfully", func() {
 		numOfNode := 1
-		workspaceObj := createMistralWorkspaceWithPresetPublicMode(numOfNode)
+		workspaceObj := createPhi2WorkspaceWithPresetPublicMode(numOfNode)
 
 		defer cleanupResources(workspaceObj)
 		time.Sleep(30 * time.Second)
@@ -408,9 +429,9 @@ var _ = Describe("Workspace Preset", func() {
 		validateWorkspaceReadiness(workspaceObj)
 	})
 
-	It("should create a Phi-2 workspace with preset public mode successfully", func() {
+	It("should create a falcon workspace with preset public mode successfully", func() {
 		numOfNode := 1
-		workspaceObj := createPhi2WorkspaceWithPresetPublicMode(numOfNode)
+		workspaceObj := createFalconWorkspaceWithPresetPublicMode(numOfNode)
 
 		defer cleanupResources(workspaceObj)
 		time.Sleep(30 * time.Second)