From f5343555bd01237f3a304c66a580358fcd239bdf Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Fri, 28 Jun 2024 13:41:05 -0400 Subject: [PATCH] feat: Add Util funcs, updating func names, logs, configs and ensure service requirement (#485) **Reason for Change**: Renaming functions, updating configs and services --- pkg/controllers/workspace_controller.go | 17 +++-- pkg/inference/preset-inferences.go | 3 +- pkg/tuning/preset-tuning.go | 7 +- pkg/tuning/preset-tuning_test.go | 2 +- .../text-generation/requirements.txt | 1 - .../tuning/text-generation/requirements.txt | 1 - test/e2e/preset_test.go | 14 ++-- test/e2e/utils/utils.go | 69 +++++++++++++++++-- test/e2e/webhook_test.go | 8 +-- 9 files changed, 92 insertions(+), 30 deletions(-) diff --git a/pkg/controllers/workspace_controller.go b/pkg/controllers/workspace_controller.go index 05d830b59..cb9cdf034 100644 --- a/pkg/controllers/workspace_controller.go +++ b/pkg/controllers/workspace_controller.go @@ -109,21 +109,20 @@ func (c *WorkspaceReconciler) addOrUpdateWorkspace(ctx context.Context, wObj *ka return reconcile.Result{}, err } - if err := c.ensureService(ctx, wObj); err != nil { - if updateErr := c.updateStatusConditionIfNotMatch(ctx, wObj, kaitov1alpha1.WorkspaceConditionTypeReady, metav1.ConditionFalse, - "workspaceFailed", err.Error()); updateErr != nil { - klog.ErrorS(updateErr, "failed to update workspace status", "workspace", klog.KObj(wObj)) - return reconcile.Result{}, updateErr - } - return reconcile.Result{}, err - } - if wObj.Tuning != nil { if err = c.applyTuning(ctx, wObj); err != nil { return reconcile.Result{}, err } } if wObj.Inference != nil { + if err := c.ensureService(ctx, wObj); err != nil { + if updateErr := c.updateStatusConditionIfNotMatch(ctx, wObj, kaitov1alpha1.WorkspaceConditionTypeReady, metav1.ConditionFalse, + "workspaceFailed", err.Error()); updateErr != nil { + klog.ErrorS(updateErr, "failed to update workspace status", "workspace", klog.KObj(wObj)) + return reconcile.Result{}, updateErr + } + return reconcile.Result{}, err + } if err = c.applyInference(ctx, wObj); err != nil { if updateErr := c.updateStatusConditionIfNotMatch(ctx, wObj, kaitov1alpha1.WorkspaceConditionTypeReady, metav1.ConditionFalse, "workspaceFailed", err.Error()); updateErr != nil { diff --git a/pkg/inference/preset-inferences.go b/pkg/inference/preset-inferences.go index 8ffa22e79..21437139d 100644 --- a/pkg/inference/preset-inferences.go +++ b/pkg/inference/preset-inferences.go @@ -96,7 +96,8 @@ func updateTorchParamsForDistributedInference(ctx context.Context, kubeClient cl func GetInferenceImageInfo(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, presetObj *model.PresetParam) (string, []corev1.LocalObjectReference) { imagePullSecretRefs := []corev1.LocalObjectReference{} - if presetObj.ImageAccessMode == string(kaitov1alpha1.ModelImageAccessModePrivate) { + // Check if the workspace preset's access mode is private + if string(workspaceObj.Inference.Preset.AccessMode) == string(kaitov1alpha1.ModelImageAccessModePrivate) { imageName := workspaceObj.Inference.Preset.PresetOptions.Image for _, secretName := range workspaceObj.Inference.Preset.PresetOptions.ImagePullSecrets { imagePullSecretRefs = append(imagePullSecretRefs, corev1.LocalObjectReference{Name: secretName}) diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index afb6d3d1d..a92ea7126 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -63,7 +63,8 @@ func getInstanceGPUCount(sku string) int { func GetTuningImageInfo(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, presetObj *model.PresetParam) (string, []corev1.LocalObjectReference) { imagePullSecretRefs := []corev1.LocalObjectReference{} - if presetObj.ImageAccessMode == string(kaitov1alpha1.ModelImageAccessModePrivate) { + // Check if the workspace preset's access mode is private + if string(workspaceObj.Tuning.Preset.AccessMode) == string(kaitov1alpha1.ModelImageAccessModePrivate) { imageName := workspaceObj.Tuning.Preset.PresetOptions.Image for _, secretName := range workspaceObj.Tuning.Preset.PresetOptions.ImagePullSecrets { imagePullSecretRefs = append(imagePullSecretRefs, corev1.LocalObjectReference{Name: secretName}) @@ -401,7 +402,11 @@ func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Worksp Command: []string{"sh", "-c", ` for url in $DATA_URLS; do filename=$(basename "$url" | sed 's/[?=&]/_/g') + echo "Downloading $url to $DATA_VOLUME_PATH/$filename" curl -sSL $url -o $DATA_VOLUME_PATH/$filename + if [ $? -ne 0 ]; then + echo "Failed to download $url" + fi done `}, VolumeMounts: []corev1.VolumeMount{ diff --git a/pkg/tuning/preset-tuning_test.go b/pkg/tuning/preset-tuning_test.go index 4e26cfcca..ee2dc2477 100644 --- a/pkg/tuning/preset-tuning_test.go +++ b/pkg/tuning/preset-tuning_test.go @@ -382,7 +382,7 @@ func TestHandleURLDataSource(t *testing.T) { }, expectedInitContainerName: "data-downloader", expectedImage: "curlimages/curl", - expectedCommands: "filename=$(basename \"$url\" | sed 's/[?=&]/_/g')\ncurl -sSL $url -o $DATA_VOLUME_PATH/$filename", + expectedCommands: "curl -sSL $url -o $DATA_VOLUME_PATH/$filename", expectedVolumeName: "data-volume", expectedVolumeMountPath: utils.DefaultDataVolumePath, }, diff --git a/presets/inference/text-generation/requirements.txt b/presets/inference/text-generation/requirements.txt index 8fe6b041c..c53b49d42 100644 --- a/presets/inference/text-generation/requirements.txt +++ b/presets/inference/text-generation/requirements.txt @@ -14,7 +14,6 @@ numpy==1.22.4 bitsandbytes==0.42.0 # Less critical, can be latest -deepspeed gputil psutil # For UTs diff --git a/presets/tuning/text-generation/requirements.txt b/presets/tuning/text-generation/requirements.txt index 614b3690b..22c2405d9 100644 --- a/presets/tuning/text-generation/requirements.txt +++ b/presets/tuning/text-generation/requirements.txt @@ -12,7 +12,6 @@ peft==0.11.1 bitsandbytes==0.42.0 # Less critical, can be latest -deepspeed loralib einops xformers diff --git a/test/e2e/preset_test.go b/test/e2e/preset_test.go index 0cc9c938d..f5fa94775 100644 --- a/test/e2e/preset_test.go +++ b/test/e2e/preset_test.go @@ -33,13 +33,15 @@ const ( PresetMistral7BModel = "mistral-7b" PresetMistral7BInstructModel = "mistral-7b-instruct" PresetPhi2Model = "phi-2" + PresetPhi3Mini4kModel = "phi-3-mini-4k-instruct" + PresetPhi3Mini128kModel = "phi-3-mini-128k-instruct" ) func createFalconWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Workspace { workspaceObj := &kaitov1alpha1.Workspace{} By("Creating a workspace CR with Falcon 7B preset public mode", func() { uniqueID := fmt.Sprint("preset-", rand.Intn(1000)) - workspaceObj = utils.GenerateWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3", + workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-falcon"}, }, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil) @@ -53,7 +55,7 @@ func createMistralWorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Wo workspaceObj := &kaitov1alpha1.Workspace{} By("Creating a workspace CR with Mistral 7B preset public mode", func() { uniqueID := fmt.Sprint("preset-", rand.Intn(1000)) - workspaceObj = utils.GenerateWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3", + workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-mistral"}, }, nil, PresetMistral7BInstructModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil) @@ -67,7 +69,7 @@ func createPhi2WorkspaceWithPresetPublicMode(numOfNode int) *kaitov1alpha1.Works workspaceObj := &kaitov1alpha1.Workspace{} By("Creating a workspace CR with Phi 2 preset public mode", func() { uniqueID := fmt.Sprint("preset-", rand.Intn(1000)) - workspaceObj = utils.GenerateWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC6s_v3", + workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_NC6s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "public-preset-e2e-test-phi-2"}, }, nil, PresetPhi2Model, kaitov1alpha1.ModelImageAccessModePublic, nil, nil) @@ -81,7 +83,7 @@ func createLlama7BWorkspaceWithPresetPrivateMode(registry, registrySecret, image workspaceObj := &kaitov1alpha1.Workspace{} By("Creating a workspace CR with Llama 7B Chat preset private mode", func() { uniqueID := fmt.Sprint("preset-", rand.Intn(1000)) - workspaceObj = utils.GenerateWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2AChat, imageVersion), + workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2AChat, imageVersion), numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-7b"}, }, nil, PresetLlama2AChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil) @@ -95,7 +97,7 @@ func createLlama13BWorkspaceWithPresetPrivateMode(registry, registrySecret, imag workspaceObj := &kaitov1alpha1.Workspace{} By("Creating a workspace CR with Llama 13B Chat preset private mode", func() { uniqueID := fmt.Sprint("preset-", rand.Intn(1000)) - workspaceObj = utils.GenerateWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2BChat, imageVersion), + workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, fmt.Sprintf("%s/%s:%s", registry, PresetLlama2BChat, imageVersion), numOfNode, "Standard_NC12s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-llama-2-13b"}, }, nil, PresetLlama2BChat, kaitov1alpha1.ModelImageAccessModePrivate, []string{registrySecret}, nil) @@ -109,7 +111,7 @@ func createCustomWorkspaceWithPresetCustomMode(imageName string, numOfNode int) workspaceObj := &kaitov1alpha1.Workspace{} By("Creating a workspace CR with custom workspace mode", func() { uniqueID := fmt.Sprint("preset-", rand.Intn(1000)) - workspaceObj = utils.GenerateWorkspaceManifest(uniqueID, namespaceName, "", + workspaceObj = utils.GenerateInferenceWorkspaceManifest(uniqueID, namespaceName, "", numOfNode, "Standard_D4s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "private-preset-e2e-test-custom"}, }, nil, "", utils.InferenceModeCustomTemplate, nil, utils.GeneratePodTemplate(uniqueID, namespaceName, imageName, nil)) diff --git a/test/e2e/utils/utils.go b/test/e2e/utils/utils.go index 38388374f..83d55de85 100644 --- a/test/e2e/utils/utils.go +++ b/test/e2e/utils/utils.go @@ -4,16 +4,22 @@ package utils import ( + "context" "fmt" + "io" "io/ioutil" + "math/rand" "os" + "strings" "time" kaitov1alpha1 "github.com/azure/kaito/api/v1alpha1" "github.com/samber/lo" "gopkg.in/yaml.v2" corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" ) const ( @@ -36,6 +42,13 @@ func GetEnv(envVar string) string { return env } +// GenerateRandomString generates a random number between 0 and 1000 and returns it as a string. +func GenerateRandomString() string { + rand.Seed(time.Now().UnixNano()) // Seed the random number generator + randomNumber := rand.Intn(1001) // Generate a random number between 0 and 1000 + return fmt.Sprintf("%d", randomNumber) +} + func GetModelConfigInfo(configFilePath string) (map[string]interface{}, error) { var data map[string]interface{} @@ -52,6 +65,50 @@ func GetModelConfigInfo(configFilePath string) (map[string]interface{}, error) { return data, nil } +func GetPodNameForJob(coreClient *kubernetes.Clientset, namespace, jobName string) (string, error) { + podList, err := coreClient.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("job-name=%s", jobName), + }) + if err != nil { + return "", err + } + + if len(podList.Items) == 0 { + return "", fmt.Errorf("no pods found for job %s", jobName) + } + + return podList.Items[0].Name, nil +} + +func GetPodLogs(coreClient *kubernetes.Clientset, namespace, podName, containerName string) (string, error) { + req := coreClient.CoreV1().Pods(namespace).GetLogs(podName, &v1.PodLogOptions{Container: containerName}) + logs, err := req.Stream(context.Background()) + if err != nil { + return "", err + } + defer logs.Close() + + buf := new(strings.Builder) + _, err = io.Copy(buf, logs) + if err != nil { + return "", err + } + + return buf.String(), nil +} + +func CopySecret(original *corev1.Secret, targetNamespace string) *corev1.Secret { + newSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: original.Name, + Namespace: targetNamespace, + }, + Data: original.Data, + Type: original.Type, + } + return newSecret +} + func ExtractModelVersion(configs map[string]interface{}) (map[string]string, error) { modelsInfo := make(map[string]string) models, ok := configs["models"].([]interface{}) @@ -81,9 +138,9 @@ func ExtractModelVersion(configs map[string]interface{}) (map[string]string, err return modelsInfo, nil } -func GenerateWorkspaceManifest(name, namespace, imageName string, resourceCount int, instanceType string, +func GenerateInferenceWorkspaceManifest(name, namespace, imageName string, resourceCount int, instanceType string, labelSelector *metav1.LabelSelector, preferredNodes []string, presetName kaitov1alpha1.ModelName, - inferenceMode kaitov1alpha1.ModelImageAccessMode, imagePullSecret []string, + accessMode kaitov1alpha1.ModelImageAccessMode, imagePullSecret []string, podTemplate *corev1.PodTemplateSpec) *kaitov1alpha1.Workspace { workspace := &kaitov1alpha1.Workspace{ @@ -100,12 +157,12 @@ func GenerateWorkspaceManifest(name, namespace, imageName string, resourceCount } var workspaceInference kaitov1alpha1.InferenceSpec - if inferenceMode == kaitov1alpha1.ModelImageAccessModePublic || - inferenceMode == kaitov1alpha1.ModelImageAccessModePrivate { + if accessMode == kaitov1alpha1.ModelImageAccessModePublic || + accessMode == kaitov1alpha1.ModelImageAccessModePrivate { workspaceInference.Preset = &kaitov1alpha1.PresetSpec{ PresetMeta: kaitov1alpha1.PresetMeta{ Name: presetName, - AccessMode: inferenceMode, + AccessMode: accessMode, }, PresetOptions: kaitov1alpha1.PresetOptions{ Image: imageName, @@ -113,7 +170,7 @@ func GenerateWorkspaceManifest(name, namespace, imageName string, resourceCount }, } } - if inferenceMode == InferenceModeCustomTemplate { + if accessMode == InferenceModeCustomTemplate { workspaceInference.Template = podTemplate } diff --git a/test/e2e/webhook_test.go b/test/e2e/webhook_test.go index 800865844..f2cfe4dc6 100644 --- a/test/e2e/webhook_test.go +++ b/test/e2e/webhook_test.go @@ -18,7 +18,7 @@ import ( var _ = Describe("Workspace Validation Webhook", func() { It("should validate the workspace resource spec at creation ", func() { - workspaceObj := utils.GenerateWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_Bad", + workspaceObj := utils.GenerateInferenceWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_Bad", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "webhook-e2e-test"}, }, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil) @@ -33,7 +33,7 @@ var _ = Describe("Workspace Validation Webhook", func() { }) It("should validate the workspace inference spec at creation ", func() { - workspaceObj := utils.GenerateWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_NC6", + workspaceObj := utils.GenerateInferenceWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_NC6", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "webhook-e2e-test"}, }, nil, "invalid-name", kaitov1alpha1.ModelImageAccessModePublic, nil, nil) @@ -51,7 +51,7 @@ var _ = Describe("Workspace Validation Webhook", func() { //TODO custom template It("should validate the workspace resource spec at update ", func() { - workspaceObj := utils.GenerateWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_NC12s_v3", + workspaceObj := utils.GenerateInferenceWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_NC12s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "webhook-e2e-test"}, }, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil) @@ -94,7 +94,7 @@ var _ = Describe("Workspace Validation Webhook", func() { }) It("should validate the workspace inference spec at update ", func() { - workspaceObj := utils.GenerateWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_NC12s_v3", + workspaceObj := utils.GenerateInferenceWorkspaceManifest(fmt.Sprint("webhook-", rand.Intn(1000)), namespaceName, "", 1, "Standard_NC12s_v3", &metav1.LabelSelector{ MatchLabels: map[string]string{"kaito-workspace": "webhook-e2e-test"}, }, nil, PresetFalcon7BModel, kaitov1alpha1.ModelImageAccessModePublic, nil, nil)