Skip to content

Commit

Permalink
test: add e2e test for updating adapters (#566)
Browse files Browse the repository at this point in the history
**Reason for Change**:
Add e2e tests for updating adapters and validation of it
Add controller revision validation to e2e test

**Requirements**

- [ ] added unit tests and e2e tests (if applicable).

**Issue Fixed**:
<!-- If this PR fixes GitHub issue 4321, add "Fixes #4321" to the next
line. -->

**Notes for Reviewers**:

Signed-off-by: Bangqi Zhu <bangqizhu@microsoft.com>
Co-authored-by: Bangqi Zhu <bangqizhu@microsoft.com>
  • Loading branch information
bangqipropel and Bangqi Zhu authored Aug 28, 2024
1 parent ab3851a commit 61d9a1f
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 29 deletions.
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,19 @@ docker-build-kaito: docker-buildx
.PHONY: docker-build-adapter
docker-build-adapter: docker-buildx
docker buildx build \
--file ./docker/adapter/Dockerfile \
--build-arg ADAPTER_PATH=docker/adapters/adapter1 \
--file ./docker/adapters/Dockerfile \
--output=$(OUTPUT_TYPE) \
--platform="linux/$(ARCH)" \
--pull \
--tag $(REGISTRY)/e2e-adapter:0.0.1 .
docker buildx build \
--build-arg ADAPTER_PATH=docker/adapters/adapter2 \
--file ./docker/adapters/Dockerfile \
--output=$(OUTPUT_TYPE) \
--platform="linux/$(ARCH)" \
--pull \
--tag $(REGISTRY)/e2e-adapter2:0.0.1 .

.PHONY: docker-build-dataset
docker-build-dataset: docker-buildx
Expand Down
6 changes: 0 additions & 6 deletions docker/adapter/Dockerfile

This file was deleted.

8 changes: 8 additions & 0 deletions docker/adapters/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM busybox:latest

ARG ADAPTER_PATH=docker/adapters/adapter1

RUN mkdir -p /data

COPY ${ADAPTER_PATH}/adapter_config.json /data/
COPY ${ADAPTER_PATH}/adapter_model.safetensors /data/
4 changes: 2 additions & 2 deletions docker/adapter/README.md → docker/adapters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

## Overview

These files are part of a set used for conducting end-to-end (E2E) testing of an adapter component. The Dockerfile builds an image incorporating the configuration and model files, which is then used within an Init Container for testing. The adapter is training from [dolly-15k-oai-style](https://huggingface.co/datasets/philschmid/dolly-15k-oai-style) dataset
and was trained using default [qlora-params.yaml](../../charts/kaito/workspace/templates/qlora-params.yaml)
These files are part of a set used for conducting end-to-end (E2E) testing of an adapter component. The Dockerfile builds an image incorporating the configuration and model files, which is then used within an Init Container for testing. The adapter1 is training from [dolly-15k-oai-style](https://huggingface.co/datasets/philschmid/dolly-15k-oai-style) dataset
and was trained using default [qlora-params.yaml](../../charts/kaito/workspace/templates/qlora-params.yaml) . The adapter2 is from [falcon-7b-instruct-ft-adapters](https://huggingface.co/gmazur591/falcon-7b-instruct-ft-adapters) for test.

## Files

Expand Down
File renamed without changes.
File renamed without changes.
22 changes: 22 additions & 0 deletions docker/adapters/adapter2/adapter_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "/workspace/tfs/weights",
"bias": "none",
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layers_pattern": null,
"layers_to_transform": null,
"lora_alpha": 16,
"lora_dropout": 0.01,
"modules_to_save": null,
"peft_type": "LORA",
"r": 4,
"rank_pattern": {},
"revision": null,
"target_modules": [
"query_key_value"
],
"task_type": "CAUSAL_LM"
}
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ When adapters are specified in the `inference` spec, the Kaito controller adds a
<img src="../img/kaito-inference-adapter.png" width=40% title="Kaito inference adapter" alt="Kaito inference adapter">
</div>

If an image is specified as the adapter source, the corresponding initcontainer uses that image as its container image. These initcontainers ensure all adapter data is available locally before the inference service starts. The main container uses a supported model image, launching the [inference_api.py](https://github.com/Azure/kaito/presets/inference/text-generation/inference_api.py) script.
If an image is specified as the adapter source, the corresponding initcontainer uses that image as its container image. These initcontainers ensure all adapter data is available locally before the inference service starts. The main container uses a supported model image, launching the [inference_api.py](../../presets/inference/text-generation/inference_api.py) script.

All containers share local volumes by mounting the same `EmptyDir` volumes, avoiding file copies between containers.

Expand Down
3 changes: 2 additions & 1 deletion pkg/controllers/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
nodePluginInstallTimeout = 60 * time.Second
WorkspaceHashAnnotation = "workspace.kaito.io/hash"
WorkspaceNameLabel = "workspace.kaito.io/name"
revisionHashSuffix = 5
)

type WorkspaceReconciler struct {
Expand Down Expand Up @@ -243,7 +244,7 @@ func (c *WorkspaceReconciler) syncControllerRevision(ctx context.Context, wObj *
}
newRevision := &appsv1.ControllerRevision{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", wObj.Name, currentHash[:5]),
Name: fmt.Sprintf("%s-%s", wObj.Name, currentHash[:revisionHashSuffix]),
Namespace: wObj.Namespace,
Annotations: map[string]string{
WorkspaceHashAnnotation: currentHash,
Expand Down
60 changes: 48 additions & 12 deletions test/e2e/inference_with_adapters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,42 @@ import (

var DefaultStrength = "1.0"

var imageName = "e2e-adapter"
var fullImageName = utils.GetEnv("E2E_ACR_REGISTRY") + "/" + imageName + ":0.0.1"
var imageName1 = "e2e-adapter"
var fullImageName1 = utils.GetEnv("E2E_ACR_REGISTRY") + "/" + imageName1 + ":0.0.1"
var imageName2 = "e2e-adapter2"
var fullImageName2 = utils.GetEnv("E2E_ACR_REGISTRY") + "/" + imageName2 + ":0.0.1"

var validAdapters = []kaitov1alpha1.AdapterSpec{
var validAdapters1 = []kaitov1alpha1.AdapterSpec{
{
Source: &kaitov1alpha1.DataSource{
Name: imageName,
Image: fullImageName,
Name: imageName1,
Image: fullImageName1,
},
Strength: &DefaultStrength,
},
}

var expectedInitContainers = []corev1.Container{
var validAdapters2 = []kaitov1alpha1.AdapterSpec{
{
Name: imageName,
Image: fullImageName,
Source: &kaitov1alpha1.DataSource{
Name: imageName2,
Image: fullImageName2,
},
Strength: &DefaultStrength,
},
}

var expectedInitContainers1 = []corev1.Container{
{
Name: imageName1,
Image: fullImageName1,
},
}

var expectedInitContainers2 = []corev1.Container{
{
Name: imageName2,
Image: fullImageName2,
},
}

Expand Down Expand Up @@ -120,9 +139,9 @@ var _ = Describe("Workspace Preset", func() {
}
})

It("should create a falcon workspace with adapter", func() {
It("should create a falcon workspace with adapter, and update the workspace with another adapter", func() {
numOfNode := 1
workspaceObj := createCustomWorkspaceWithAdapter(numOfNode)
workspaceObj := createCustomWorkspaceWithAdapter(numOfNode, validAdapters1)

defer cleanupResources(workspaceObj)
time.Sleep(30 * time.Second)
Expand All @@ -142,8 +161,25 @@ var _ = Describe("Workspace Preset", func() {

validateWorkspaceReadiness(workspaceObj)

validateInitContainers(workspaceObj, expectedInitContainers)
validateAdapterAdded(workspaceObj, workspaceObj.Name, imageName)
validateRevision(workspaceObj, "1")

validateInitContainers(workspaceObj, expectedInitContainers1)
validateAdapterAdded(workspaceObj, workspaceObj.Name, imageName1)

workspaceObj = updateCustomWorkspaceWithAdapter(workspaceObj, validAdapters2)
validateResourceStatus(workspaceObj)

time.Sleep(30 * time.Second)

validateAssociatedService(workspaceObj)

validateInferenceResource(workspaceObj, int32(numOfNode), false)

validateWorkspaceReadiness(workspaceObj)

validateRevision(workspaceObj, "2")
validateInitContainers(workspaceObj, expectedInitContainers2)
validateAdapterAdded(workspaceObj, workspaceObj.Name, imageName2)
})

})
72 changes: 66 additions & 6 deletions test/e2e/preset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ const (
PresetMistral7BInstructModel = "mistral-7b-instruct"
PresetPhi2Model = "phi-2"
PresetPhi3Mini128kModel = "phi-3-mini-128k-instruct"
WorkspaceHashAnnotation = "workspace.kaito.io/hash"
// WorkspaceRevisionAnnotation represents the revision number of the workload managed by the workspace
WorkspaceRevisionAnnotation = "workspace.kaito.io/revision"
)

var (
Expand Down Expand Up @@ -72,7 +75,7 @@ func loadModelVersions() {
}
}

func createCustomWorkspaceWithAdapter(numOfNode int) *kaitov1alpha1.Workspace {
func createCustomWorkspaceWithAdapter(numOfNode int, validAdapters []kaitov1alpha1.AdapterSpec) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace with adapter", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
Expand All @@ -86,6 +89,28 @@ func createCustomWorkspaceWithAdapter(numOfNode int) *kaitov1alpha1.Workspace {
return workspaceObj
}

func updateCustomWorkspaceWithAdapter(workspaceObj *kaitov1alpha1.Workspace, validAdapters []kaitov1alpha1.AdapterSpec) *kaitov1alpha1.Workspace {
By("Updating a workspace with adapter", func() {
workspaceObj.Inference.Adapters = validAdapters

By("Updating workspace", func() {
Eventually(func() error {
return utils.TestingCluster.KubeClient.Update(ctx, workspaceObj)
}, utils.PollTimeout, utils.PollInterval).
Should(Succeed(), "Failed to update workspace %s", workspaceObj.Name)

By("Validating workspace update", func() {
err := utils.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: workspaceObj.Namespace,
Name: workspaceObj.Name,
}, workspaceObj, &client.GetOptions{})
Expect(err).NotTo(HaveOccurred())
})
})
})
return workspaceObj
}

func createFalconWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Falcon 7B preset public mode", func() {
Expand Down Expand Up @@ -271,7 +296,7 @@ func copySecretToNamespace(secretName, targetNamespace string) error {
return nil
}

// Logic to validate resource status
// validateResourceStatus validates resource status
func validateResourceStatus(workspaceObj *kaitov1alpha1.Workspace) {
By("Checking the resource status", func() {
Eventually(func() bool {
Expand Down Expand Up @@ -321,7 +346,7 @@ func validateAssociatedService(workspaceObj *kaitov1alpha1.Workspace) {
})
}

// Logic to validate inference deployment
// validateInferenceResource validates inference deployment
func validateInferenceResource(workspaceObj *kaitov1alpha1.Workspace, expectedReplicas int32, isStatefulSet bool) {
By("Checking the inference resource", func() {
Eventually(func() bool {
Expand Down Expand Up @@ -364,13 +389,48 @@ func validateInferenceResource(workspaceObj *kaitov1alpha1.Workspace, expectedRe
return true
}

// GinkgoWriter.Printf("Resource '%s' not ready. Ready replicas: %d\n", workspaceObj.Name, readyReplicas)
return false
}, 20*time.Minute, utils.PollInterval).Should(BeTrue(), "Failed to wait for inference resource to be ready")
})
}

// Logic to validate tuning deployment
// validateRevision validates the annotations of the workspace and deployment, as well as the corresponding controller revision
func validateRevision(workspaceObj *kaitov1alpha1.Workspace, revisionStr string) {
By("Checking the revisions of the resources", func() {
Eventually(func() bool {
dep := &appsv1.Deployment{}
err := utils.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: workspaceObj.Namespace,
Name: workspaceObj.Name,
}, dep)
if err != nil {
GinkgoWriter.Printf("Error fetching resource: %v\n", err)
return false
}
workspaceObjHash := workspaceObj.Annotations[WorkspaceHashAnnotation]
revision := &appsv1.ControllerRevision{}
err = utils.TestingCluster.KubeClient.Get(ctx, client.ObjectKey{
Namespace: workspaceObj.Namespace,
Name: fmt.Sprintf("%s-%s", workspaceObj.Name, workspaceObjHash[:5]),
}, revision)

if err != nil {
GinkgoWriter.Printf("Error fetching resource: %v\n", err)
return false
}

revisionNum, _ := strconv.ParseInt(revisionStr, 10, 64)

isWorkspaceAnnotationCorrect := workspaceObj.Annotations[WorkspaceRevisionAnnotation] == revisionStr
isDeploymentAnnotationCorrect := dep.Annotations[WorkspaceRevisionAnnotation] == revisionStr
isRevisionCorrect := revision.Revision == revisionNum

return isWorkspaceAnnotationCorrect && isDeploymentAnnotationCorrect && isRevisionCorrect
}, 20*time.Minute, utils.PollInterval).Should(BeTrue(), "Failed to wait for correct revisions to be ready")
})
}

// validateTuningResource validates tuning deployment
func validateTuningResource(workspaceObj *kaitov1alpha1.Workspace) {
By("Checking the tuning resource", func() {
Eventually(func() bool {
Expand Down Expand Up @@ -438,7 +498,7 @@ func validateACRTuningResultsUploaded(workspaceObj *kaitov1alpha1.Workspace, job
}
}

// Logic to validate workspace readiness
// validateWorkspaceReadiness validates workspace readiness
func validateWorkspaceReadiness(workspaceObj *kaitov1alpha1.Workspace) {
By("Checking the workspace status is ready", func() {
Eventually(func() bool {
Expand Down

0 comments on commit 61d9a1f

Please sign in to comment.