Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PVC management #347

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions api/lmes/v1alpha1/lmevaljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,19 @@ func (c *LMEvalContainer) GetResources() *corev1.ResourceRequirements {
return c.Resources
}

type PersistentVolumeClaimManaged struct {
Size string `json:"size,omitempty"`
}

type Outputs struct {
// Use an existing PVC to store the outputs
// +optional
PersistentVolumeClaimName *string `json:"pvcName,omitempty"`
// Create an operator managed PVC
// +optional
PersistentVolumeClaimManaged *PersistentVolumeClaimManaged `json:"pvcManaged,omitempty"`
}

type LMEvalPodSpec struct {
// Extra container data for the lm-eval container
// +optional
Expand Down Expand Up @@ -241,6 +254,24 @@ type LMEvalJobSpec struct {
// Suspend keeps the job but without pods. This is intended to be used by the Kueue integration
// +optional
Suspend bool `json:"suspend,omitempty"`
// Outputs specifies storage for evaluation results
// +optional
Outputs *Outputs `json:"outputs,omitempty"`
}

// HasCustomOutput returns whether an LMEvalJobSpec defines custom outputs or not
func (s *LMEvalJobSpec) HasCustomOutput() bool {
return s.Outputs != nil
}

// HasManagedPVC returns whether the outputs define a managed PVC
func (o *Outputs) HasManagedPVC() bool {
return o.PersistentVolumeClaimManaged != nil
}

// HasExistingPVC returns whether the outputs define an existing PVC
func (o *Outputs) HasExistingPVC() bool {
return o.PersistentVolumeClaimName != nil
}

// LMEvalJobStatus defines the observed state of LMEvalJob
Expand Down
45 changes: 45 additions & 0 deletions api/lmes/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ spec:
numFewShot:
description: Sets the number of few-shot examples to place in context
type: integer
outputs:
description: Outputs specifies storage for evaluation results
properties:
pvcManaged:
description: Create an operator managed PVC
properties:
size:
type: string
type: object
pvcName:
description: Use an existing PVC to store the outputs
type: string
type: object
pod:
description: Specify extra information for the lm-eval job's pod
properties:
Expand Down
1 change: 1 addition & 0 deletions controllers/lmes/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
const (
DriverPath = "/bin/driver"
DestDriverPath = "/opt/app-root/src/bin/driver"
OutputPath = "/opt/app-root/src/output"
PodImageKey = "lmes-pod-image"
DriverImageKey = "lmes-driver-image"
PodCheckingIntervalKey = "lmes-pod-checking-interval"
Expand Down
52 changes: 52 additions & 0 deletions controllers/lmes/lmevaljob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ func (q *syncedMap4Reconciler) remove(key string) {
// +kubebuilder:rbac:groups="",resources=pods/exec,verbs=get;list;watch;create;delete
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;watch;list
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;watch;list
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=list;get;watch
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=list;get;watch;create;update;patch;delete

func (r *LMEvalJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
Expand All @@ -172,6 +174,26 @@ func (r *LMEvalJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return r.handleSuspend(ctx, log, job)
}

// If outputs have been explicitly set
if job.Spec.HasCustomOutput() {
// If managed PVC is set
if job.Spec.Outputs.HasManagedPVC() {
if job.Spec.Outputs.HasExistingPVC() {
log.Info("LMEvalJob has both managed and existing PVCs defined. Existing PVC configuration will be ignored.")
}
err := r.handleManagedPVC(ctx, log, job)
if err != nil {
return ctrl.Result{}, err
}
} else if job.Spec.Outputs.HasExistingPVC() {
err := r.handleExistingPVC(ctx, log, job)
if err != nil {
return ctrl.Result{}, err
}
}
}
log.Info("Continuing after PVC")

// Handle the job based on its state
switch job.Status.State {
case lmesv1alpha1.NewJobState:
Expand Down Expand Up @@ -631,6 +653,15 @@ func createPod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo
},
}

if job.Spec.HasCustomOutput() {
outputPVCMount := corev1.VolumeMount{
Name: "outputs",
MountPath: OutputPath,
}
volumeMounts = append(volumeMounts, outputPVCMount)

}

var volumes = []corev1.Volume{
{
Name: "shared", VolumeSource: corev1.VolumeSource{
Expand All @@ -639,6 +670,27 @@ func createPod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo
},
}

if job.Spec.HasCustomOutput() {

var claimName string
if job.Spec.Outputs.HasManagedPVC() {
claimName = generateManagedPVCName(job)
} else if job.Spec.Outputs.HasExistingPVC() {
claimName = *job.Spec.Outputs.PersistentVolumeClaimName
}

outputPVC := corev1.Volume{
Name: "outputs",
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: claimName,
ReadOnly: false,
},
},
}
volumes = append(volumes, outputPVC)
}

volumes = append(volumes, job.Spec.Pod.GetVolumes()...)
volumeMounts = append(volumeMounts, job.Spec.Pod.GetContainer().GetVolumMounts()...)
labels := getPodLabels(job.Labels, log)
Expand Down
Loading
Loading