Skip to content

Commit

Permalink
feat: Setup Preset Tuning Util Functions and miscellaneous validation…
Browse files Browse the repository at this point in the history
…/logging - Part 7 (#358)

**Reason for Change**:
This PR introduces some of the util functions that are to be used by the
function

func CreatePresetTuning(ctx context.Context, workspaceObj
*kaitov1alpha1.Workspace,
tuningObj *model.PresetParam, kubeClient client.Client) (client.Object,
error)
	

This function will be in charge of handling launching the training job
using the dataset source and uploading results to dataset destination.
It is also incharge of ensuring configmap tuning parameters are in the
right namespace so they are ready to be used by the fine_tuning_api.py
file.
  • Loading branch information
ishaansehgal99 authored Apr 25, 2024
1 parent 6be8a0d commit 0b63598
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 5 deletions.
15 changes: 15 additions & 0 deletions api/v1alpha1/workspace_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"fmt"
"reflect"
"regexp"
"sort"
"strings"

Expand Down Expand Up @@ -162,7 +163,12 @@ func (r *DataSource) validateCreate() (errs *apis.FieldError) {
if r.Volume != nil {
sourcesSpecified++
}
// Regex checks for a / and a colon followed by a tag
if r.Image != "" {
re := regexp.MustCompile(`^(.+/[^:/]+):([^:/]+)$`)
if !re.MatchString(r.Image) {
errs = errs.Also(apis.ErrInvalidValue("Invalid image format, require full input image URL", "Image"))
}
sourcesSpecified++
}

Expand Down Expand Up @@ -214,6 +220,15 @@ func (r *DataDestination) validateCreate() (errs *apis.FieldError) {
destinationsSpecified++
}
if r.Image != "" {
// Regex checks for a / and a colon followed by a tag
re := regexp.MustCompile(`^(.+/[^:/]+):([^:/]+)$`)
if !re.MatchString(r.Image) {
errs = errs.Also(apis.ErrInvalidValue("Invalid image format, require full output image URL", "Image"))
}
// Cloud Provider requires credentials to push image
if r.ImagePushSecret == "" {
errs = errs.Also(apis.ErrMissingField("Must specify imagePushSecret with destination image"))
}
destinationsSpecified++
}

Expand Down
9 changes: 5 additions & 4 deletions api/v1alpha1/workspace_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ func TestDataSourceValidateCreate(t *testing.T) {
{
name: "Image specified only",
dataSource: &DataSource{
Image: "data-image:latest",
Image: "aimodels.azurecr.io/data-image:latest",
},
wantErr: false,
},
Expand All @@ -913,7 +913,7 @@ func TestDataSourceValidateCreate(t *testing.T) {
dataSource: &DataSource{
URLs: []string{"http://example.com/data"},
Volume: &v1.VolumeSource{},
Image: "data-image:latest",
Image: "aimodels.azurecr.io/data-image:latest",
},
wantErr: true,
errField: "Exactly one of URLs, Volume, or Image must be specified",
Expand Down Expand Up @@ -1049,15 +1049,16 @@ func TestDataDestinationValidateCreate(t *testing.T) {
{
name: "Image specified only",
dataDestination: &DataDestination{
Image: "data-image:latest",
Image: "aimodels.azurecr.io/data-image:latest",
ImagePushSecret: "imagePushSecret",
},
wantErr: false,
},
{
name: "Both fields specified",
dataDestination: &DataDestination{
Volume: &v1.VolumeSource{},
Image: "data-image:latest",
Image: "aimodels.azurecr.io/data-image:latest",
ImagePushSecret: "imagePushSecret",
},
wantErr: false,
Expand Down
2 changes: 1 addition & 1 deletion charts/kaito/workspace/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ rules:
verbs: ["get","list","watch","create", "update", "patch" ]
- apiGroups: [ "" ]
resources: [ "configmaps" ]
verbs: [ "get","list","watch" ]
verbs: [ "get","list","watch","create", "delete" ]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get","list","watch","update", "patch"]
Expand Down
8 changes: 8 additions & 0 deletions pkg/resources/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
Expand All @@ -22,6 +23,8 @@ func CreateResource(ctx context.Context, resource client.Object, kubeClient clie
klog.InfoS("CreateStatefulSet", "statefulset", klog.KObj(r))
case *corev1.Service:
klog.InfoS("CreateService", "service", klog.KObj(r))
case *corev1.ConfigMap:
klog.InfoS("CreateConfigMap", "configmap", klog.KObj(r))
}

// Create the resource.
Expand Down Expand Up @@ -76,6 +79,11 @@ func CheckResourceStatus(obj client.Object, kubeClient client.Client, timeoutDur
klog.InfoS("statefulset status is ready", "statefulset", k8sResource.Name)
return nil
}
case *batchv1.Job:
klog.InfoS("checking job status", "name", k8sResource.Name, "namespace", k8sResource.Namespace, "succeeded", k8sResource.Status.Succeeded, "active", k8sResource.Status.Active, "failed", k8sResource.Status.Failed)
if k8sResource.Status.Failed == 0 {
return nil
}
default:
return fmt.Errorf("unsupported resource type")
}
Expand Down
71 changes: 71 additions & 0 deletions pkg/tuning/preset-tuning.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,82 @@ package tuning

import (
"context"
"fmt"
"os"

kaitov1alpha1 "github.com/azure/kaito/api/v1alpha1"
"github.com/azure/kaito/pkg/model"
"github.com/azure/kaito/pkg/resources"
"github.com/azure/kaito/pkg/utils"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func GetInstanceGPUCount(sku string) int {
gpuConfig, exists := kaitov1alpha1.SupportedGPUConfigs[sku]
if !exists {
return 1
}
return gpuConfig.GPUCount
}

func GetTuningImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace, presetObj *model.PresetParam) string {
registryName := os.Getenv("PRESET_REGISTRY_NAME")
return fmt.Sprintf("%s/%s:%s", registryName, "kaito-tuning-"+string(wObj.Tuning.Preset.Name), presetObj.Tag)
}

func GetDataSrcImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (string, []corev1.LocalObjectReference) {
imagePullSecretRefs := make([]corev1.LocalObjectReference, len(wObj.Tuning.Input.ImagePullSecrets))
for i, secretName := range wObj.Tuning.Input.ImagePullSecrets {
imagePullSecretRefs[i] = corev1.LocalObjectReference{Name: secretName}
}
return wObj.Tuning.Input.Image, imagePullSecretRefs
}

func GetDataDestImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (string, []corev1.LocalObjectReference) {
imagePushSecretRefs := []corev1.LocalObjectReference{{Name: wObj.Tuning.Output.ImagePushSecret}}
return wObj.Tuning.Output.Image, imagePushSecretRefs
}

func EnsureTuningConfigMap(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace,
tuningObj *model.PresetParam, kubeClient client.Client) error {
// Copy Configmap from helm chart configmap into workspace
releaseNamespace, err := utils.GetReleaseNamespace()
if err != nil {
return fmt.Errorf("failed to get release namespace: %v", err)
}
existingCM := &corev1.ConfigMap{}
err = resources.GetResource(ctx, workspaceObj.Tuning.ConfigTemplate, workspaceObj.Namespace, kubeClient, existingCM)
if err != nil {
if !errors.IsNotFound(err) {
return err
}
} else {
klog.Info("ConfigMap already exists in target namespace: %s, no action taken.\n", workspaceObj.Namespace)
return nil
}

templateCM := &corev1.ConfigMap{}
err = resources.GetResource(ctx, workspaceObj.Tuning.ConfigTemplate, releaseNamespace, kubeClient, templateCM)
if err != nil {
return fmt.Errorf("failed to get ConfigMap from template namespace: %v", err)
}

templateCM.Namespace = workspaceObj.Namespace
templateCM.ResourceVersion = "" // Clear metadata not needed for creation
templateCM.UID = "" // Clear UID

// TODO: Any Custom Preset override logic for the configmap can go here
err = resources.CreateResource(ctx, templateCM, kubeClient)
if err != nil {
return fmt.Errorf("failed to create ConfigMap in target namespace, %s: %v", workspaceObj.Namespace, err)
}

return nil
}

func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace,
tuningObj *model.PresetParam, kubeClient client.Client) (client.Object, error) {
// TODO
Expand Down

0 comments on commit 0b63598

Please sign in to comment.