Skip to content

Commit

Permalink
chore: e2e test for adapters and validation of adapters (#483)
Browse files Browse the repository at this point in the history
**Reason for Change**:
<!-- What does this PR improve or fix in Kaito? Why is it needed? -->

**Requirements**

- [ ] added unit tests and e2e tests (if applicable).

**Issue Fixed**:
<!-- If this PR fixes GitHub issue 4321, add "Fixes #4321" to the next
line. -->

**Notes for Reviewers**:

---------

Signed-off-by: Bangqi Zhu <bangqizhu@microsoft.com>
Co-authored-by: Bangqi Zhu <bangqizhu@microsoft.com>
  • Loading branch information
bangqipropel and Bangqi Zhu authored Jun 29, 2024
1 parent 5242169 commit f613679
Show file tree
Hide file tree
Showing 11 changed files with 250 additions and 41 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/e2e-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ jobs:
env:
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}

- name: build adapter image
if: ${{ !inputs.isRelease }}
shell: bash
run: |
make docker-build-adapter
env:
REGISTRY: ${{ env.REGISTRY }}

- name: create cluster
shell: bash
Expand Down Expand Up @@ -200,6 +208,7 @@ jobs:
env:
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }}
REGISTRY: ${{ env.REGISTRY }}
AI_MODELS_REGISTRY: ${{ secrets.E2E_ACR_AMRT_USERNAME }}.azurecr.io
AI_MODELS_REGISTRY_SECRET: ${{ secrets.E2E_AMRT_SECRET_NAME }}

Expand Down
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,15 @@ docker-build-kaito: docker-buildx
--pull \
--tag $(REGISTRY)/$(IMG_NAME):$(IMG_TAG) .

.PHONY: docker-build-adapter
docker-build-adapter: docker-buildx
docker buildx build \
--file ./docker/adapter/Dockerfile \
--output=$(OUTPUT_TYPE) \
--platform="linux/$(ARCH)" \
--pull \
--tag $(REGISTRY)/e2e-adapter:0.0.1 .

##@ Deployment

ifndef ignore-not-found
Expand Down
6 changes: 6 additions & 0 deletions docker/adapter/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM busybox:latest

RUN mkdir -p /data

COPY docker/adapter/adapter_config.json /data/
COPY docker/adapter/adapter_model.safetensors /data/
23 changes: 23 additions & 0 deletions docker/adapter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# E2E Adapter Test Files

## Overview

These files are part of a set used for conducting end-to-end (E2E) testing of an adapter component. The Dockerfile builds an image incorporating the configuration and model files, which is then used within an Init Container for testing. The adapter is training from [dolly-15k-oai-style](https://huggingface.co/datasets/philschmid/dolly-15k-oai-style) dataset
and was trained using default [qlora-params.yaml](../../charts/kaito/workspace/templates/qlora-params.yaml)

## Files

- **Dockerfile**: Builds the Docker image for the E2E tests.

- **adapter_config.json**: Contains settings for configuring the adapter in the test environment.

- **adapter_model.safetensors**: Provides the adapter's machine learning model in SafeTensors format.

## Usage

Build the Docker image with the following command:

```bash

make docker-build-adapter

29 changes: 29 additions & 0 deletions docker/adapter/adapter_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"alpha_pattern": {},
"auto_mapping": {
"base_model_class": "FalconForCausalLM",
"parent_library": "transformers.models.falcon.modeling_falcon"
},
"base_model_name_or_path": "/workspace/tfs/weights",
"bias": "none",
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 8,
"lora_dropout": 0.0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"query_key_value"
],
"task_type": null,
"use_rslora": false
}
Binary file added docker/adapter/adapter_model.safetensors
Binary file not shown.
4 changes: 4 additions & 0 deletions presets/inference/text-generation/inference_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def __post_init__(self): # validate parameters
# To avoid any potential future operations that use non-combined adapters
for adapter in adapter_names:
model.delete_adapter(adapter)

active_adapters = model.active_adapters
if len(active_adapters) != 1 or active_adapters[0] != "combined_adapter":
raise ValueError(f"Adpaters is input but not merged correctlly")
else:
print("Warning: Did not find any valid adapters mounted, using base model")
model = base_model
Expand Down
103 changes: 103 additions & 0 deletions test/e2e/inference_with_adapters.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

package e2e

import (
"time"

kaitov1alpha1 "github.com/azure/kaito/api/v1alpha1"
"github.com/azure/kaito/test/e2e/utils"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var DefaultStrength = "1.0"

var imageName = "e2e-adapter"
var fullImageName = utils.GetEnv("REGISTRY") + "/" + imageName + ":0.0.1"

var validAdapters = []kaitov1alpha1.AdapterSpec{
{
Source: &kaitov1alpha1.DataSource{
Name: imageName,
Image: fullImageName,
},
Strength: &DefaultStrength,
},
}

var expectedInitContainers = []corev1.Container{
{
Name: imageName,
Image: fullImageName,
},
}

func validateAdapters(workspaceObj *kaitov1alpha1.Workspace, expectedInitContainers []corev1.Container) {
By("Checking the Adapters", func() {
Eventually(func() bool {
var err error
var initContainers []corev1.Container

dep := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: workspaceObj.Name,
Namespace: workspaceObj.Namespace,
},
}
err = TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: workspaceObj.Namespace,
Name: workspaceObj.Name,
}, dep)
initContainers = dep.Spec.Template.Spec.InitContainers

if err != nil {
GinkgoWriter.Printf("Error fetching resource: %v\n", err)
return false
}

if len(initContainers) != len(expectedInitContainers) {
return false
}
initContainer, expectedInitContainer := initContainers[0], expectedInitContainers[0]

// GinkgoWriter.Printf("Resource '%s' not ready. Ready replicas: %d\n", workspaceObj.Name, readyReplicas)
return initContainer.Image == expectedInitContainer.Image && initContainer.Name == expectedInitContainer.Name
}, 20*time.Minute, utils.PollInterval).Should(BeTrue(), "Failed to wait for adapter resource to be ready")
})
}

var _ = Describe("Workspace Preset", func() {
BeforeEach(func() {
loadTestEnvVars()

loadModelVersions()
})

It("should create a falcon workspace with adapter", func() {
numOfNode := 1
workspaceObj := createCustomWorkspaceWithAdapter(numOfNode)

defer cleanupResources(workspaceObj)
time.Sleep(30 * time.Second)

validateMachineCreation(workspaceObj, numOfNode)
validateResourceStatus(workspaceObj)

time.Sleep(30 * time.Second)

validateAssociatedService(workspaceObj)

validateInferenceResource(workspaceObj, int32(numOfNode), false)

validateWorkspaceReadiness(workspaceObj)

validateAdapters(workspaceObj, expectedInitContainers)
})

})
93 changes: 57 additions & 36 deletions test/e2e/preset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,59 @@ const (
PresetMistral7BInstructModel = "mistral-7b-instruct"
PresetPhi2Model = "phi-2"
PresetPhi3Mini4kModel = "phi-3-mini-4k-instruct"
PresetPhi3Mini128kModel = "phi-3-mini-128k-instruct"
PresetPhi3Mini128kModel = "phi-3-mini-128k-instruct"
)

func loadTestEnvVars() {
var err error
runLlama13B, err = strconv.ParseBool(os.Getenv("RUN_LLAMA_13B"))
if err != nil {
fmt.Print("Error: RUN_LLAMA_13B ENV Variable not set")
runLlama13B = false
}

aiModelsRegistry = utils.GetEnv("AI_MODELS_REGISTRY")
aiModelsRegistrySecret = utils.GetEnv("AI_MODELS_REGISTRY_SECRET")
supportedModelsYamlPath = utils.GetEnv("SUPPORTED_MODELS_YAML_PATH")
}

func loadModelVersions() {
// Load stable model versions
configs, err := utils.GetModelConfigInfo(supportedModelsYamlPath)
if err != nil {
fmt.Printf("Failed to load model configs: %v\n", err)
os.Exit(1)
}

modelInfo, err = utils.ExtractModelVersion(configs)
if err != nil {
fmt.Printf("Failed to extract stable model versions: %v\n", err)
os.Exit(1)
}
}

func createCustomWorkspaceWithAdapter(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace with adapter", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon"},
}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, validAdapters)

createAndValidateWorkspace(workspaceObj)
})
return workspaceObj
}

func createFalconWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Falcon 7B preset public mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon"},
}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)
}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, nil)

createAndValidateWorkspace(workspaceObj)
})
Expand All @@ -58,7 +100,7 @@ func createMistralWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Wo
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-mistral"},
}, nil, PresetMistral7BInstructModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)
}, nil, PresetMistral7BInstructModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, nil)

createAndValidateWorkspace(workspaceObj)
})
Expand All @@ -72,7 +114,7 @@ func createPhi2WorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Works
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC6s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-phi-2"},
}, nil, PresetPhi2Model, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)
}, nil, PresetPhi2Model, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, nil)

createAndValidateWorkspace(workspaceObj)
})
Expand All @@ -86,7 +128,7 @@ func createLlama7BWorkspaceWithPresetPrivateMode(registry, registrySecret, image
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2AChat, imageVersion),
numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-7b"},
}, nil, PresetLlama2AChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil)
}, nil, PresetLlama2AChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil, nil)

createAndValidateWorkspace(workspaceObj)
})
Expand All @@ -100,7 +142,7 @@ func createLlama13BWorkspaceWithPresetPrivateMode(registry, registrySecret, imag
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2BChat, imageVersion),
numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-13b"},
}, nil, PresetLlama2BChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil)
}, nil, PresetLlama2BChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil, nil)

createAndValidateWorkspace(workspaceObj)
})
Expand All @@ -114,7 +156,7 @@ func createCustomWorkspaceWithPresetCustomMode(imageName string, numOfNode int)
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "",
numOfNode, "Standard_D4s_v3", &metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-custom"},
}, nil, "", utils.InferenceModeCustomTemplate, nil, utils.GeneratePodTemplate(uniqueID, namespaceName, imageName, nil))
}, nil, "", utils.InferenceModeCustomTemplate, nil, utils.GeneratePodTemplate(uniqueID, namespaceName, imageName, nil), nil)

createAndValidateWorkspace(workspaceObj)
})
Expand Down Expand Up @@ -344,35 +386,14 @@ var modelInfo map[string]string

var _ = Describe("Workspace Preset", func() {
BeforeEach(func() {
var err error
runLlama13B, err = strconv.ParseBool(os.Getenv("RUN_LLAMA_13B"))
if err != nil {
// Handle error or set a default value
fmt.Print("Error: RUN_LLAMA_13B ENV Variable not set")
runLlama13B = false
}

aiModelsRegistry = utils.GetEnv("AI_MODELS_REGISTRY")
aiModelsRegistrySecret = utils.GetEnv("AI_MODELS_REGISTRY_SECRET")
supportedModelsYamlPath = utils.GetEnv("SUPPORTED_MODELS_YAML_PATH")
loadTestEnvVars()

// Load stable model versions
configs, err := utils.GetModelConfigInfo(supportedModelsYamlPath)
if err != nil {
fmt.Printf("Failed to load model configs: %v\n", err)
os.Exit(1)
}

modelInfo, err = utils.ExtractModelVersion(configs)
if err != nil {
fmt.Printf("Failed to extract stable model versions: %v\n", err)
os.Exit(1)
}
loadModelVersions()
})

It("should create a falcon workspace with preset public mode successfully", func() {
It("should create a mistral workspace with preset public mode successfully", func() {
numOfNode := 1
workspaceObj := createFalconWorkspaceWithPresetPublicMode(numOfNode)
workspaceObj := createMistralWorkspaceWithPresetPublicMode(numOfNode)

defer cleanupResources(workspaceObj)
time.Sleep(30 * time.Second)
Expand All @@ -389,9 +410,9 @@ var _ = Describe("Workspace Preset", func() {
validateWorkspaceReadiness(workspaceObj)
})

It("should create a mistral workspace with preset public mode successfully", func() {
It("should create a Phi-2 workspace with preset public mode successfully", func() {
numOfNode := 1
workspaceObj := createMistralWorkspaceWithPresetPublicMode(numOfNode)
workspaceObj := createPhi2WorkspaceWithPresetPublicMode(numOfNode)

defer cleanupResources(workspaceObj)
time.Sleep(30 * time.Second)
Expand All @@ -408,9 +429,9 @@ var _ = Describe("Workspace Preset", func() {
validateWorkspaceReadiness(workspaceObj)
})

It("should create a Phi-2 workspace with preset public mode successfully", func() {
It("should create a falcon workspace with preset public mode successfully", func() {
numOfNode := 1
workspaceObj := createPhi2WorkspaceWithPresetPublicMode(numOfNode)
workspaceObj := createFalconWorkspaceWithPresetPublicMode(numOfNode)

defer cleanupResources(workspaceObj)
time.Sleep(30 * time.Second)
Expand Down
Loading

0 comments on commit f613679

Please sign in to comment.