Skip to content

Commit

Permalink
feat: Update CRD to add Volume and ConfigTemplate (#356)
Browse files Browse the repository at this point in the history
**Reason for Change**:
This change removes hostPath field in DataSource and DataDestination
struct. Instead, a standard K8s VolumeSource API is used to specify the
volumes used by the tuning Job. This will provide the highest
flexibility for users to load/store the data. HostPath field is hard to
use in practice.

This change also renames the Config to ConfigTemplate because the final
configmap used by the tuning job will be generated by the controller
based on the template. The new name is clearer.

**Notes for Reviewers**:
We will revisit how to do validation check for the Volume field because
it is a fairly complicated API.
  • Loading branch information
Fei-Guo authored Apr 18, 2024
1 parent e799d62 commit bf4acba
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 125 deletions.
20 changes: 13 additions & 7 deletions api/v1alpha1/workspace_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ type DataSource struct {
// URLs specifies the links to the public data sources. E.g., files in a public github repository.
// +optional
URLs []string `json:"urls,omitempty"`
// The directory in the host that contains the data.
// The mounted volume that contains the data.
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Schemaless
// +optional
HostPath string `json:"hostPath,omitempty"`
Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
// The name of the image that contains the source data. The assumption is that the source data locates in the
// `data` directory in the image.
// +optional
Expand All @@ -119,9 +121,11 @@ type DataSource struct {
}

type DataDestination struct {
// The directory in the host that contains the output data.
// The mounted volume that is used to save the output data.
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Schemaless
// +optional
HostPath string `json:"hostPath,omitempty"`
Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
// Name of the image where the output data is pushed to.
// +optional
Image string `json:"image,omitempty"`
Expand All @@ -145,10 +149,12 @@ type TuningSpec struct {
// Method specifies the Parameter-Efficient Fine-Tuning(PEFT) method, such as lora, qlora, used for the tuning.
// +optional
Method TuningMethod `json:"method,omitempty"`
// Config specifies the name of the configmap in the same namespace that contains the arguments used by the tuning method.
// If not specified, a default configmap is used based on the specified method.
// ConfigTemplate specifies the name of the configmap that contains the basic tuning arguments.
// A separate configmap will be generated based on the ConfigTemplate and the preset model name, and used by
// the tuning Job. If specified, the congfigmap needs to be in the same namespace of the workspace custom resource.
// If not specified, a default ConfigTemplate is used based on the specified tuning method.
// +optional
Config string `json:"config,omitempty"`
ConfigTemplate string `json:"configTemplate,omitempty"`
// Input describes the input used by the tuning method.
Input *DataSource `json:"input"`
// Output specified where to store the tuning output.
Expand Down
18 changes: 7 additions & 11 deletions api/v1alpha1/workspace_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,16 +137,16 @@ func (r *DataSource) validateCreate() (errs *apis.FieldError) {
if len(r.URLs) > 0 {
sourcesSpecified++
}
if r.HostPath != "" {
if r.Volume != nil {
sourcesSpecified++
}
if r.Image != "" {
sourcesSpecified++
}

// Ensure exactly one of URLs, HostPath, or Image is specified
// Ensure exactly one of URLs, Volume, or Image is specified
if sourcesSpecified != 1 {
errs = errs.Also(apis.ErrGeneric("Exactly one of URLs, HostPath, or Image must be specified", "URLs", "HostPath", "Image"))
errs = errs.Also(apis.ErrGeneric("Exactly one of URLs, Volume, or Image must be specified", "URLs", "Volume", "Image"))
}

return errs
Expand All @@ -167,9 +167,7 @@ func (r *DataSource) validateUpdate(old *DataSource, isTuning bool) (errs *apis.
if !reflect.DeepEqual(oldURLs, newURLs) {
errs = errs.Also(apis.ErrInvalidValue("URLs field cannot be changed once set", "URLs"))
}
if old.HostPath != r.HostPath {
errs = errs.Also(apis.ErrInvalidValue("HostPath field cannot be changed once set", "HostPath"))
}
// TODO: check if the Volume is changed
if old.Image != r.Image {
errs = errs.Also(apis.ErrInvalidValue("Image field cannot be changed once set", "Image"))
}
Expand All @@ -190,7 +188,7 @@ func (r *DataSource) validateUpdate(old *DataSource, isTuning bool) (errs *apis.

func (r *DataDestination) validateCreate() (errs *apis.FieldError) {
destinationsSpecified := 0
if r.HostPath != "" {
if r.Volume != nil {
destinationsSpecified++
}
if r.Image != "" {
Expand All @@ -199,15 +197,13 @@ func (r *DataDestination) validateCreate() (errs *apis.FieldError) {

// If no destination is specified, return an error
if destinationsSpecified == 0 {
errs = errs.Also(apis.ErrMissingField("At least one of HostPath or Image must be specified"))
errs = errs.Also(apis.ErrMissingField("At least one of Volume or Image must be specified"))
}
return errs
}

func (r *DataDestination) validateUpdate(old *DataDestination) (errs *apis.FieldError) {
if old.HostPath != r.HostPath {
errs = errs.Also(apis.ErrInvalidValue("HostPath field cannot be changed once set", "HostPath"))
}
// TODO: Check if the Volume is changed.
if old.Image != r.Image {
errs = errs.Also(apis.ErrInvalidValue("Image field cannot be changed once set", "Image"))
}
Expand Down
102 changes: 28 additions & 74 deletions api/v1alpha1/workspace_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,8 @@ func TestTuningSpecValidateCreate(t *testing.T) {
{
name: "All fields valid",
tuningSpec: &TuningSpec{
Input: &DataSource{Name: "valid-input", HostPath: "valid-input"},
Output: &DataDestination{HostPath: "valid-output"},
Input: &DataSource{Name: "valid-input", Volume: &v1.VolumeSource{}},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Preset: &PresetSpec{PresetMeta: PresetMeta{Name: ModelName("test-validation")}},
Method: TuningMethodLora,
},
Expand All @@ -660,7 +660,7 @@ func TestTuningSpecValidateCreate(t *testing.T) {
{
name: "Missing Input",
tuningSpec: &TuningSpec{
Output: &DataDestination{HostPath: "valid-output"},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Preset: &PresetSpec{PresetMeta: PresetMeta{Name: ModelName("test-validation")}},
Method: TuningMethodLora,
},
Expand All @@ -681,7 +681,7 @@ func TestTuningSpecValidateCreate(t *testing.T) {
name: "Missing Preset",
tuningSpec: &TuningSpec{
Input: &DataSource{Name: "valid-input"},
Output: &DataDestination{HostPath: "valid-output"},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Method: TuningMethodLora,
},
wantErr: true,
Expand All @@ -691,7 +691,7 @@ func TestTuningSpecValidateCreate(t *testing.T) {
name: "Invalid Preset",
tuningSpec: &TuningSpec{
Input: &DataSource{Name: "valid-input"},
Output: &DataDestination{HostPath: "valid-output"},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Preset: &PresetSpec{PresetMeta: PresetMeta{Name: ModelName("invalid-preset")}},
Method: TuningMethodLora,
},
Expand All @@ -702,7 +702,7 @@ func TestTuningSpecValidateCreate(t *testing.T) {
name: "Invalid Method",
tuningSpec: &TuningSpec{
Input: &DataSource{Name: "valid-input"},
Output: &DataDestination{HostPath: "valid-output"},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Preset: &PresetSpec{PresetMeta: PresetMeta{Name: ModelName("test-validation")}},
Method: "invalid-method",
},
Expand Down Expand Up @@ -744,42 +744,18 @@ func TestTuningSpecValidateUpdate(t *testing.T) {
name: "No changes",
oldTuning: &TuningSpec{
Input: &DataSource{Name: "input1"},
Output: &DataDestination{HostPath: "path1"},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Preset: &PresetSpec{PresetMeta: PresetMeta{Name: ModelName("test-validation")}},
Method: TuningMethodLora,
},
newTuning: &TuningSpec{
Input: &DataSource{Name: "input1"},
Output: &DataDestination{HostPath: "path1"},
Output: &DataDestination{Volume: &v1.VolumeSource{}},
Preset: &PresetSpec{PresetMeta: PresetMeta{Name: ModelName("test-validation")}},
Method: TuningMethodLora,
},
expectErrs: false,
},
{
name: "Input changed",
oldTuning: &TuningSpec{
Input: &DataSource{Name: "input", HostPath: "inputpath"},
Output: &DataDestination{HostPath: "outputpath"},
},
newTuning: &TuningSpec{
Input: &DataSource{Name: "input", HostPath: "randompath"},
Output: &DataDestination{HostPath: "outputpath"},
},
expectErrs: true,
errFields: []string{"HostPath"},
},
{
name: "Output changed",
oldTuning: &TuningSpec{
Output: &DataDestination{HostPath: "path1"},
},
newTuning: &TuningSpec{
Output: &DataDestination{HostPath: "path2"},
},
expectErrs: true,
errFields: []string{"Output"},
},
{
name: "Preset changed",
oldTuning: &TuningSpec{
Expand Down Expand Up @@ -839,9 +815,9 @@ func TestDataSourceValidateCreate(t *testing.T) {
wantErr: false,
},
{
name: "HostPath specified only",
name: "Volume specified only",
dataSource: &DataSource{
HostPath: "/data/path",
Volume: &v1.VolumeSource{},
},
wantErr: false,
},
Expand All @@ -856,26 +832,26 @@ func TestDataSourceValidateCreate(t *testing.T) {
name: "None specified",
dataSource: &DataSource{},
wantErr: true,
errField: "Exactly one of URLs, HostPath, or Image must be specified",
errField: "Exactly one of URLs, Volume, or Image must be specified",
},
{
name: "URLs and HostPath specified",
name: "URLs and Volume specified",
dataSource: &DataSource{
URLs: []string{"http://example.com/data"},
HostPath: "/data/path",
URLs: []string{"http://example.com/data"},
Volume: &v1.VolumeSource{},
},
wantErr: true,
errField: "Exactly one of URLs, HostPath, or Image must be specified",
errField: "Exactly one of URLs, Volume, or Image must be specified",
},
{
name: "All fields specified",
dataSource: &DataSource{
URLs: []string{"http://example.com/data"},
HostPath: "/data/path",
Image: "data-image:latest",
URLs: []string{"http://example.com/data"},
Volume: &v1.VolumeSource{},
Image: "data-image:latest",
},
wantErr: true,
errField: "Exactly one of URLs, HostPath, or Image must be specified",
errField: "Exactly one of URLs, Volume, or Image must be specified",
},
}

Expand Down Expand Up @@ -907,13 +883,13 @@ func TestDataSourceValidateUpdate(t *testing.T) {
name: "No changes",
oldSource: &DataSource{
URLs: []string{"http://example.com/data1", "http://example.com/data2"},
HostPath: "/data/path",
Volume: &v1.VolumeSource{},
Image: "data-image:latest",
ImagePullSecrets: []string{"secret1", "secret2"},
},
newSource: &DataSource{
URLs: []string{"http://example.com/data2", "http://example.com/data1"}, // Note the different order, should not matter
HostPath: "/data/path",
Volume: &v1.VolumeSource{},
Image: "data-image:latest",
ImagePullSecrets: []string{"secret2", "secret1"}, // Note the different order, should not matter
},
Expand Down Expand Up @@ -941,17 +917,6 @@ func TestDataSourceValidateUpdate(t *testing.T) {
wantErr: true,
errFields: []string{"URLs"},
},
{
name: "HostPath changed",
oldSource: &DataSource{
HostPath: "/old/path",
},
newSource: &DataSource{
HostPath: "/new/path",
},
wantErr: true,
errFields: []string{"HostPath"},
},
{
name: "Image changed",
oldSource: &DataSource{
Expand Down Expand Up @@ -1007,12 +972,12 @@ func TestDataDestinationValidateCreate(t *testing.T) {
name: "No fields specified",
dataDestination: &DataDestination{},
wantErr: true,
errField: "At least one of HostPath or Image must be specified",
errField: "At least one of Volume or Image must be specified",
},
{
name: "HostPath specified only",
name: "Volume specified only",
dataDestination: &DataDestination{
HostPath: "/data/path",
Volume: &v1.VolumeSource{},
},
wantErr: false,
},
Expand All @@ -1026,8 +991,8 @@ func TestDataDestinationValidateCreate(t *testing.T) {
{
name: "Both fields specified",
dataDestination: &DataDestination{
HostPath: "/data/path",
Image: "data-image:latest",
Volume: &v1.VolumeSource{},
Image: "data-image:latest",
},
wantErr: false,
},
Expand Down Expand Up @@ -1060,28 +1025,17 @@ func TestDataDestinationValidateUpdate(t *testing.T) {
{
name: "No changes",
oldDest: &DataDestination{
HostPath: "/data/old",
Volume: &v1.VolumeSource{},
Image: "old-image:latest",
ImagePushSecret: "old-secret",
},
newDest: &DataDestination{
HostPath: "/data/old",
Volume: &v1.VolumeSource{},
Image: "old-image:latest",
ImagePushSecret: "old-secret",
},
wantErr: false,
},
{
name: "HostPath changed",
oldDest: &DataDestination{
HostPath: "/data/old",
},
newDest: &DataDestination{
HostPath: "/data/new",
},
wantErr: true,
errFields: []string{"HostPath"},
},
{
name: "Image changed",
oldDest: &DataDestination{
Expand Down
12 changes: 11 additions & 1 deletion api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bf4acba

Please sign in to comment.