Skip to content

Commit

Permalink
test: add adapter e2e test for vllm (#795)
Browse files Browse the repository at this point in the history
validate loaded model by using model list api.

https://docs.vllm.ai/en/latest/usage/lora.html#lora-model-lineage-in-model-card

---------

Signed-off-by: jerryzhuang <zhuangqhc@gmail.com>
  • Loading branch information
zhuangqh authored Dec 27, 2024
1 parent d9dc364 commit 8cee2c0
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 12 deletions.
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ require (
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/websocket v1.5.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/imdario/mergo v0.3.16 // indirect
Expand All @@ -68,9 +69,11 @@ require (
github.com/klauspost/compress v1.17.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect
github.com/moby/spdystream v0.4.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.3 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go v1.51.16 h1:vnWKK8KjbftEkuPX8bRj3WHsLy1uhotn0eXptpvrxJI=
Expand Down Expand Up @@ -278,6 +280,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY=
github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
github.com/grpc-ecosystem/grpc-gateway v1.14.6/go.mod h1:zdiPV4Yse/1gnckTHtghG4GkDEdKCRJduHpTxT3/jcw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
Expand Down Expand Up @@ -334,6 +338,8 @@ github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8=
github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
Expand All @@ -345,6 +351,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8=
Expand Down
44 changes: 43 additions & 1 deletion test/e2e/inference_with_adapters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package e2e

import (
"context"
"fmt"
"strings"
"time"
Expand Down Expand Up @@ -131,7 +132,7 @@ func validateImagePullSecrets(workspaceObj *kaitov1alpha1.Workspace, expectedIma
func validateAdapterAdded(workspaceObj *kaitov1alpha1.Workspace, deploymentName string, adapterName string) {
By("Checking the Adapters", func() {
Eventually(func() bool {
coreClient, err := utils.GetK8sConfig()
coreClient, err := utils.GetK8sClientset()
if err != nil {
GinkgoWriter.Printf("Failed to create core client: %v\n", err)
return false
Expand All @@ -158,6 +159,47 @@ func validateAdapterAdded(workspaceObj *kaitov1alpha1.Workspace, deploymentName
})
}

func validateAdapterLoadedInVLLM(workspaceObj *kaitov1alpha1.Workspace, deploymentName string, adapterName string) {
execOption := corev1.PodExecOptions{
Command: []string{"bash", "-c", "apt-get update && apt-get install curl -y; curl -s 127.0.0.1:5000/v1/models | grep " + adapterName},
Container: deploymentName,
Stdout: true,
Stderr: true,
}

By("Checking the loaded Adapters", func() {
Eventually(func() bool {
coreClient, err := utils.GetK8sClientset()
if err != nil {
GinkgoWriter.Printf("Failed to create core client: %v\n", err)
return false
}

namespace := workspaceObj.Namespace
podName, err := utils.GetPodNameForDeployment(coreClient, namespace, deploymentName)
if err != nil {
GinkgoWriter.Printf("Failed to get pod name for deployment %s: %v\n", deploymentName, err)
return false
}

k8sConfig, err := utils.GetK8sConfig()
if err != nil {
GinkgoWriter.Printf("Failed to get k8s config: %v\n", err)
return false
}

ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
_, err = utils.ExecSync(ctx, k8sConfig, coreClient, namespace, podName, execOption)
if err != nil {
GinkgoWriter.Printf("validate command fails: %v\n", err)
return false
}
return true
}, 5*time.Minute, utils.PollInterval).Should(BeTrue(), "Failed to wait for adapter to be loaded")
})
}

var _ = Describe("Workspace Preset", func() {
BeforeEach(func() {
loadTestEnvVars()
Expand Down
16 changes: 8 additions & 8 deletions test/e2e/preset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func loadModelVersions() {
func createCustomWorkspaceWithAdapter(numOfNode int, validAdapters []kaitov1alpha1.AdapterSpec) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace with adapter", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-falcon-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "custom-preset-e2e-test-falcon"},
Expand Down Expand Up @@ -116,7 +116,7 @@ func updateCustomWorkspaceWithAdapter(workspaceObj *kaitov1alpha1.Workspace, val
func createFalconWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Falcon 7B preset public mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-falcon-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon"},
Expand All @@ -130,7 +130,7 @@ func createFalconWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Wor
func createMistralWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Mistral 7B preset public mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-mistral-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-mistral"},
Expand All @@ -144,7 +144,7 @@ func createMistralWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Wo
func createPhi2WorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Phi 2 preset public mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-phi2-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC6s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-phi-2"},
Expand All @@ -158,7 +158,7 @@ func createPhi2WorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Works
func createLlama7BWorkspaceWithPresetPrivateMode(registry, registrySecret, imageVersion string, numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Llama 7B Chat preset private mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-llama-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2AChat, imageVersion),
numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-7b"},
Expand All @@ -172,7 +172,7 @@ func createLlama7BWorkspaceWithPresetPrivateMode(registry, registrySecret, image
func createLlama13BWorkspaceWithPresetPrivateMode(registry, registrySecret, imageVersion string, numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Llama 13B Chat preset private mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-llama-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2BChat, imageVersion),
numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-13b"},
Expand Down Expand Up @@ -200,7 +200,7 @@ func createCustomWorkspaceWithPresetCustomMode(imageName string, numOfNode int)
func createPhi3WorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Phi-3-mini-128k-instruct preset public mode", func() {
uniqueID := fmt.Sprint("preset-", rand.Intn(1000))
uniqueID := fmt.Sprint("preset-phi3-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "",
numOfNode, "Standard_NC6s_v3", &metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-phi-3-mini-128k-instruct"},
Expand Down Expand Up @@ -555,7 +555,7 @@ func validateTuningJobInputOutput(workspaceObj *kaitov1alpha1.Workspace, inputIm
}

func validateACRTuningResultsUploaded(workspaceObj *kaitov1alpha1.Workspace, jobName string) {
coreClient, err := utils.GetK8sConfig()
coreClient, err := utils.GetK8sClientset()
if err != nil {
log.Fatalf("Failed to create core client: %v", err)
}
Expand Down
38 changes: 38 additions & 0 deletions test/e2e/preset_vllm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,30 @@ var _ = Describe("Workspace Preset on vllm runtime", func() {

validateWorkspaceReadiness(workspaceObj)
})

It("should create a falcon workspace with adapter successfully", func() {
numOfNode := 1
workspaceObj := createFalconWorkspaceWithAdapterAndVLLM(numOfNode, validAdapters2)

defer cleanupResources(workspaceObj)
time.Sleep(30 * time.Second)

validateCreateNode(workspaceObj, numOfNode)
validateResourceStatus(workspaceObj)

time.Sleep(30 * time.Second)

validateAssociatedService(workspaceObj)
validateInferenceConfig(workspaceObj)

validateInferenceResource(workspaceObj, int32(numOfNode), false)

validateWorkspaceReadiness(workspaceObj)

validateInitContainers(workspaceObj, expectedInitContainers2)

validateAdapterLoadedInVLLM(workspaceObj, workspaceObj.Name, imageName2)
})
})

func createFalconWorkspaceWithPresetPublicModeAndVLLM(numOfNode int) *kaitov1alpha1.Workspace {
Expand All @@ -143,6 +167,20 @@ func createFalconWorkspaceWithPresetPublicModeAndVLLM(numOfNode int) *kaitov1alp
return workspaceObj
}

func createFalconWorkspaceWithAdapterAndVLLM(numOfNode int, validAdapters []kaitov1alpha1.AdapterSpec) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Falcon 7B preset public mode and vLLM", func() {
uniqueID := fmt.Sprint("preset-falcon-", rand.Intn(1000))
workspaceObj = utils.GenerateInferenceWorkspaceManifestWithVLLM(uniqueID, namespaceName, "", numOfNode, "Standard_NC6s_v3",
&metav1.LabelSelector{
MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon-adapter-vllm"},
}, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil, validAdapters)

createAndValidateWorkspace(workspaceObj)
})
return workspaceObj
}

func createMistralWorkspaceWithPresetPublicModeAndVLLM(numOfNode int) *kaitov1alpha1.Workspace {
workspaceObj := &kaitov1alpha1.Workspace{}
By("Creating a workspace CR with Mistral 7B preset public mode and vLLM", func() {
Expand Down
46 changes: 43 additions & 3 deletions test/e2e/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package utils

import (
"bytes"
"context"
"fmt"
"io"
Expand All @@ -17,8 +18,10 @@ import (
"strings"
"time"

pkgscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/tools/remotecommand"

"github.com/kaito-project/kaito/api/v1alpha1"
kaitov1alpha1 "github.com/kaito-project/kaito/api/v1alpha1"
Expand Down Expand Up @@ -46,7 +49,7 @@ var (
func GetEnv(envVar string) string {
env := os.Getenv(envVar)
if env == "" {
fmt.Printf("%s is not set or is empty", envVar)
fmt.Printf("%s is not set or is empty\n", envVar)
return ""
}
return env
Expand Down Expand Up @@ -105,7 +108,7 @@ func GetPodNameForDeployment(coreClient *kubernetes.Clientset, namespace, deploy
return podList.Items[0].Name, nil
}

func GetK8sConfig() (*kubernetes.Clientset, error) {
func GetK8sConfig() (*rest.Config, error) {
var config *rest.Config
var err error

Expand All @@ -123,6 +126,14 @@ func GetK8sConfig() (*kubernetes.Clientset, error) {
}
}

return config, err
}

func GetK8sClientset() (*kubernetes.Clientset, error) {
config, err := GetK8sConfig()
if err != nil {
log.Fatalf("Failed to get k8s config: %v", err)
}
coreClient, err := kubernetes.NewForConfig(config)
if err != nil {
log.Fatalf("Failed to create core client: %v", err)
Expand Down Expand Up @@ -152,8 +163,37 @@ func GetPodLogs(coreClient *kubernetes.Clientset, namespace, podName, containerN
return buf.String(), nil
}

func ExecSync(ctx context.Context, config *rest.Config, coreClient *kubernetes.Clientset, namespace, podName string, options v1.PodExecOptions) (string, error) {
req := coreClient.CoreV1().RESTClient().Post().
Resource("pods").
Name(podName).
Namespace(namespace).
SubResource("exec")
req.VersionedParams(&options, pkgscheme.ParameterCodec)

exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL())
if err != nil {
return "", fmt.Errorf("failed to initialize SPDY executor: %w", err)
}

var stdout, stderr bytes.Buffer
err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
Stdout: &stdout,
Stderr: &stderr,
})
if err != nil {
return "", fmt.Errorf("failed to execute command: %w, stderr: %q", err, stderr.String())
}

if stderr.Len() > 0 {
return "", fmt.Errorf("command error: %s", stderr.String())
}

return stdout.String(), nil
}

func PrintPodLogsOnFailure(namespace, labelSelector string) {
coreClient, err := GetK8sConfig()
coreClient, err := GetK8sClientset()
if err != nil {
log.Printf("Failed to create core client: %v", err)
}
Expand Down

0 comments on commit 8cee2c0

Please sign in to comment.