diff --git a/apis/components/v1/kueue_types.go b/apis/components/v1/kueue_types.go index f8a94829b6c..1d07d1b99f0 100644 --- a/apis/components/v1/kueue_types.go +++ b/apis/components/v1/kueue_types.go @@ -21,26 +21,21 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - -// KueueSpec defines the desired state of Kueue -type KueueSpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - - // Foo is an example field of Kueue. Edit kueue_types.go to remove/update - Foo string `json:"foo,omitempty"` -} +const ( + KueueComponentName = "kueue" + // value should match whats set in the XValidation below + KueueInstanceName = "default-kueue" + KueueKind = "Kueue" +) -// KueueStatus defines the observed state of Kueue -type KueueStatus struct { - components.Status `json:",inline"` -} +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster +// +kubebuilder:validation:XValidation:rule="self.metadata.name == 'default-kueue'",message="Kueue name must be default-kueue" +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status`,description="Ready" +// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`,description="Reason" // Kueue is the Schema for the kueues API type Kueue struct { @@ -51,16 +46,21 @@ type Kueue struct { Status KueueStatus `json:"status,omitempty"` } -func (c *Kueue) GetDevFlags() *components.DevFlags { - return nil +// KueueSpec defines the desired state of Kueue +type KueueSpec struct { + KueueCommonSpec `json:",inline"` } -func (c *Kueue) GetStatus() *components.Status { - return &c.Status.Status +type KueueCommonSpec struct { + components.DevFlagsSpec `json:",inline"` } -// +kubebuilder:object:root=true +// KueueStatus defines the observed state of Kueue +type KueueStatus struct { + components.Status `json:",inline"` +} +// +kubebuilder:object:root=true // KueueList contains a list of Kueue type KueueList struct { metav1.TypeMeta `json:",inline"` @@ -71,3 +71,17 @@ type KueueList struct { func init() { SchemeBuilder.Register(&Kueue{}, &KueueList{}) } + +func (c *Kueue) GetDevFlags() *components.DevFlags { + return c.Spec.DevFlags +} +func (c *Kueue) GetStatus() *components.Status { + return &c.Status.Status +} + +// DSCKueue contains all the configuration exposed in DSC instance for Kueue component +type DSCKueue struct { + components.ManagementSpec `json:",inline"` + // configuration fields common across components + KueueCommonSpec `json:",inline"` +} diff --git a/apis/components/v1/zz_generated.deepcopy.go b/apis/components/v1/zz_generated.deepcopy.go index a58adc68d27..11e0f992750 100644 --- a/apis/components/v1/zz_generated.deepcopy.go +++ b/apis/components/v1/zz_generated.deepcopy.go @@ -131,6 +131,23 @@ func (in *DSCDashboard) DeepCopy() *DSCDashboard { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DSCKueue) DeepCopyInto(out *DSCKueue) { + *out = *in + out.ManagementSpec = in.ManagementSpec + in.KueueCommonSpec.DeepCopyInto(&out.KueueCommonSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DSCKueue. +func (in *DSCKueue) DeepCopy() *DSCKueue { + if in == nil { + return nil + } + out := new(DSCKueue) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DSCModelRegistry) DeepCopyInto(out *DSCModelRegistry) { *out = *in @@ -489,7 +506,7 @@ func (in *Kueue) DeepCopyInto(out *Kueue) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -511,6 +528,22 @@ func (in *Kueue) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KueueCommonSpec) DeepCopyInto(out *KueueCommonSpec) { + *out = *in + in.DevFlagsSpec.DeepCopyInto(&out.DevFlagsSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KueueCommonSpec. +func (in *KueueCommonSpec) DeepCopy() *KueueCommonSpec { + if in == nil { + return nil + } + out := new(KueueCommonSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KueueList) DeepCopyInto(out *KueueList) { *out = *in @@ -546,6 +579,7 @@ func (in *KueueList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KueueSpec) DeepCopyInto(out *KueueSpec) { *out = *in + in.KueueCommonSpec.DeepCopyInto(&out.KueueCommonSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KueueSpec. diff --git a/apis/datasciencecluster/v1/datasciencecluster_types.go b/apis/datasciencecluster/v1/datasciencecluster_types.go index bcd0afcdbcb..e07464c1bef 100644 --- a/apis/datasciencecluster/v1/datasciencecluster_types.go +++ b/apis/datasciencecluster/v1/datasciencecluster_types.go @@ -29,7 +29,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" - "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -64,7 +63,7 @@ type Components struct { Kserve kserve.Kserve `json:"kserve,omitempty"` // Kueue component configuration. - Kueue kueue.Kueue `json:"kueue,omitempty"` + Kueue componentsv1.DSCKueue `json:"kueue,omitempty"` // CodeFlare component configuration. // If CodeFlare Operator has been installed in the cluster, it should be uninstalled first before enabled component. diff --git a/bundle/manifests/components.opendatahub.io_kueues.yaml b/bundle/manifests/components.opendatahub.io_kueues.yaml index a9916e93ac6..c1a88ed90c8 100644 --- a/bundle/manifests/components.opendatahub.io_kueues.yaml +++ b/bundle/manifests/components.opendatahub.io_kueues.yaml @@ -14,7 +14,16 @@ spec: singular: kueue scope: Cluster versions: - - name: v1 + - additionalPrinterColumns: + - description: Ready + jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - description: Reason + jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Reason + type: string + name: v1 schema: openAPIV3Schema: description: Kueue is the Schema for the kueues API @@ -39,10 +48,32 @@ spec: spec: description: KueueSpec defines the desired state of Kueue properties: - foo: - description: Foo is an example field of Kueue. Edit kueue_types.go - to remove/update - type: string + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to the folder + containing manifests in a repository, default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include any sub-folder + or path: `base`, `overlays/dev`, `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo with tag/branch. + e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object type: object status: description: KueueStatus defines the observed state of Kueue @@ -110,6 +141,9 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: Kueue name must be default-kueue + rule: self.metadata.name == 'default-kueue' served: true storage: true subresources: diff --git a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml index d55386df0e6..1989bdc167f 100644 --- a/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml +++ b/bundle/manifests/opendatahub-operator.clusterserviceversion.yaml @@ -103,7 +103,7 @@ metadata: categories: AI/Machine Learning, Big Data certified: "False" containerImage: quay.io/opendatahub/opendatahub-operator:v2.19.0 - createdAt: "2024-10-30T14:18:10Z" + createdAt: "2024-11-01T10:08:53Z" olm.skipRange: '>=1.0.0 <2.19.0' operators.operatorframework.io/builder: operator-sdk-v1.31.0 operators.operatorframework.io/internal-objects: '["featuretrackers.features.opendatahub.io", @@ -837,7 +837,6 @@ spec: - prometheuses - prometheuses/finalizers - prometheuses/status - - prometheusrules - thanosrulers - thanosrulers/finalizers - thanosrulers/status @@ -859,6 +858,17 @@ spec: - patch - update - watch + - apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - create + - delete + - deletecollection + - get + - list + - patch - apiGroups: - monitoring.coreos.com resources: diff --git a/components/kueue/kueue.go b/components/kueue/kueue.go deleted file mode 100644 index ec609317092..00000000000 --- a/components/kueue/kueue.go +++ /dev/null @@ -1,110 +0,0 @@ -// +groupName=datasciencecluster.opendatahub.io -package kueue - -import ( - "context" - "fmt" - "path/filepath" - - "github.com/go-logr/logr" - operatorv1 "github.com/openshift/api/operator/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - - dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" - "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" -) - -var ( - ComponentName = "kueue" - Path = deploy.DefaultManifestPath + "/" + ComponentName + "/rhoai" // same path for both odh and rhoai -) - -// Verifies that Kueue implements ComponentInterface. -var _ components.ComponentInterface = (*Kueue)(nil) - -// Kueue struct holds the configuration for the Kueue component. -// +kubebuilder:object:generate=true -type Kueue struct { - components.Component `json:""` -} - -func (k *Kueue) Init(ctx context.Context, _ cluster.Platform) error { - log := logf.FromContext(ctx).WithName(ComponentName) - - var imageParamMap = map[string]string{ - "odh-kueue-controller-image": "RELATED_IMAGE_ODH_KUEUE_CONTROLLER_IMAGE", // new kueue image - } - - if err := deploy.ApplyParams(Path, imageParamMap); err != nil { - log.Error(err, "failed to update image", "path", Path) - } - - return nil -} - -func (k *Kueue) OverrideManifests(ctx context.Context, _ cluster.Platform) error { - // If devflags are set, update default manifests path - if len(k.DevFlags.Manifests) != 0 { - manifestConfig := k.DevFlags.Manifests[0] - if err := deploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { - return err - } - // If overlay is defined, update paths - defaultKustomizePath := "rhoai" - if manifestConfig.SourcePath != "" { - defaultKustomizePath = manifestConfig.SourcePath - } - Path = filepath.Join(deploy.DefaultManifestPath, ComponentName, defaultKustomizePath) - } - - return nil -} - -func (k *Kueue) GetComponentName() string { - return ComponentName -} - -func (k *Kueue) ReconcileComponent(ctx context.Context, cli client.Client, l logr.Logger, - owner metav1.Object, dscispec *dsciv1.DSCInitializationSpec, platform cluster.Platform, _ bool) error { - enabled := k.GetManagementState() == operatorv1.Managed - monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed - if enabled { - if k.DevFlags != nil { - // Download manifests and update paths - if err := k.OverrideManifests(ctx, platform); err != nil { - return err - } - } - } - // Deploy Kueue Operator - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, Path, dscispec.ApplicationsNamespace, ComponentName, enabled); err != nil { - return fmt.Errorf("failed to apply manifetss %s: %w", Path, err) - } - l.Info("apply manifests done") - - if enabled { - if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentName, dscispec.ApplicationsNamespace, 20, 2); err != nil { - return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err) - } - } - - // CloudService Monitoring handling - if platform == cluster.ManagedRhods { - if err := k.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, ComponentName); err != nil { - return err - } - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, - filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"), - dscispec.Monitoring.Namespace, - "prometheus", true); err != nil { - return err - } - l.Info("updating SRE monitoring done") - } - - return nil -} diff --git a/components/kueue/zz_generated.deepcopy.go b/components/kueue/zz_generated.deepcopy.go deleted file mode 100644 index 9ab2279f9bf..00000000000 --- a/components/kueue/zz_generated.deepcopy.go +++ /dev/null @@ -1,39 +0,0 @@ -//go:build !ignore_autogenerated - -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by controller-gen. DO NOT EDIT. - -package kueue - -import () - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Kueue) DeepCopyInto(out *Kueue) { - *out = *in - in.Component.DeepCopyInto(&out.Component) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Kueue. -func (in *Kueue) DeepCopy() *Kueue { - if in == nil { - return nil - } - out := new(Kueue) - in.DeepCopyInto(out) - return out -} diff --git a/config/crd/bases/components.opendatahub.io_kueues.yaml b/config/crd/bases/components.opendatahub.io_kueues.yaml index 6822de59481..52f1b91a5a9 100644 --- a/config/crd/bases/components.opendatahub.io_kueues.yaml +++ b/config/crd/bases/components.opendatahub.io_kueues.yaml @@ -14,7 +14,16 @@ spec: singular: kueue scope: Cluster versions: - - name: v1 + - additionalPrinterColumns: + - description: Ready + jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - description: Reason + jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Reason + type: string + name: v1 schema: openAPIV3Schema: description: Kueue is the Schema for the kueues API @@ -39,10 +48,32 @@ spec: spec: description: KueueSpec defines the desired state of Kueue properties: - foo: - description: Foo is an example field of Kueue. Edit kueue_types.go - to remove/update - type: string + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to the folder + containing manifests in a repository, default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include any sub-folder + or path: `base`, `overlays/dev`, `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo with tag/branch. + e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object type: object status: description: KueueStatus defines the observed state of Kueue @@ -110,6 +141,9 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: Kueue name must be default-kueue + rule: self.metadata.name == 'default-kueue' served: true storage: true subresources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 5eb6b30d620..40a8cc1e810 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -589,7 +589,6 @@ rules: - prometheuses - prometheuses/finalizers - prometheuses/status - - prometheusrules - thanosrulers - thanosrulers/finalizers - thanosrulers/status @@ -611,6 +610,17 @@ rules: - patch - update - watch +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - create + - delete + - deletecollection + - get + - list + - patch - apiGroups: - monitoring.coreos.com resources: diff --git a/controllers/components/kueue/kueue.go b/controllers/components/kueue/kueue.go new file mode 100644 index 00000000000..bf0da0a024a --- /dev/null +++ b/controllers/components/kueue/kueue.go @@ -0,0 +1,60 @@ +package kueue + +import ( + "fmt" + + operatorv1 "github.com/openshift/api/operator/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" +) + +const ( + ComponentName = componentsv1.KueueComponentName +) + +var ( + DefaultPath = odhdeploy.DefaultManifestPath + "/" + ComponentName + "/rhoai" // same path for both odh and rhoai +) + +// for DSC to get compoment Kueue's CR. +func GetComponentCR(dsc *dscv1.DataScienceCluster) *componentsv1.Kueue { + kueueAnnotations := make(map[string]string) + switch dsc.Spec.Components.Kueue.ManagementState { + case operatorv1.Managed, operatorv1.Removed: + kueueAnnotations[annotations.ManagementStateAnnotation] = string(dsc.Spec.Components.Kueue.ManagementState) + default: // Force and Unmanaged case for unknown values, we do not support these yet + kueueAnnotations[annotations.ManagementStateAnnotation] = "Unknown" + } + + return &componentsv1.Kueue{ + TypeMeta: metav1.TypeMeta{ + Kind: componentsv1.KueueKind, + APIVersion: componentsv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: componentsv1.KueueInstanceName, + Annotations: kueueAnnotations, + }, + Spec: componentsv1.KueueSpec{ + KueueCommonSpec: dsc.Spec.Components.Kueue.KueueCommonSpec, + }, + } +} + +// Init for set images. +func Init(platform cluster.Platform) error { + imageParamMap := map[string]string{ + "odh-kueue-controller-image": "RELATED_IMAGE_ODH_KUEUE_CONTROLLER_IMAGE", + } + + if err := odhdeploy.ApplyParams(DefaultPath, imageParamMap); err != nil { + return fmt.Errorf("failed to update images on path %s: %w", DefaultPath, err) + } + + return nil +} diff --git a/controllers/components/kueue/kueue_controller.go b/controllers/components/kueue/kueue_controller.go index 5fb5996f6d1..ceb6e6f7222 100644 --- a/controllers/components/kueue/kueue_controller.go +++ b/controllers/components/kueue/kueue_controller.go @@ -19,40 +19,67 @@ package kueue import ( "context" - "k8s.io/apimachinery/pkg/runtime" + promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/builder" componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/updatestatus" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) -// KueueReconciler reconciles a Kueue object. -type KueueReconciler struct { - client.Client - Scheme *runtime.Scheme -} +func NewComponentReconciler(ctx context.Context, mgr ctrl.Manager) error { + _, err := reconciler.ComponentReconcilerFor( + mgr, + componentsv1.KueueInstanceName, + &componentsv1.Kueue{}, + ). + // customized Owns() for Component with new predicates + Owns(&corev1.ConfigMap{}). + Owns(&corev1.Secret{}). + Owns(&rbacv1.ClusterRoleBinding{}). + Owns(&rbacv1.ClusterRole{}). + Owns(&rbacv1.Role{}). + Owns(&rbacv1.RoleBinding{}). + Owns(&corev1.ServiceAccount{}). + Owns(&corev1.Service{}). + Owns(&networkingv1.NetworkPolicy{}). + Owns(&promv1.PodMonitor{}). + Owns(&promv1.PrometheusRule{}). + Owns(&admissionregistrationv1.MutatingWebhookConfiguration{}). + Owns(&admissionregistrationv1.ValidatingWebhookConfiguration{}). + Owns(&appsv1.Deployment{}, builder.WithPredicates(resources.NewDeploymentPredicate())). + Watches(&extv1.CustomResourceDefinition{}). // call ForLabel() + new predicates + // Add Kueue-specific actions + WithAction(initialize). + WithAction(devFlags). + WithAction(kustomize.NewAction( + kustomize.WithCache(kustomize.DefaultCachingKeyFn), + kustomize.WithLabel(labels.ODH.Component(ComponentName), "true"), + kustomize.WithLabel(labels.K8SCommon.PartOf, ComponentName), + )). + WithAction(deploy.NewAction( + deploy.WithFieldOwner(componentsv1.KueueInstanceName), + deploy.WithLabel(labels.ComponentPartOf, componentsv1.KueueInstanceName), + )). + WithAction(updatestatus.NewAction( + updatestatus.WithSelectorLabel(labels.ComponentPartOf, componentsv1.KueueInstanceName), + )). + Build(ctx) -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the Kueue object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile -func (r *KueueReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - - // TODO(user): your logic here - - return ctrl.Result{}, nil -} + if err != nil { + return err // no need customize error, it is done in the caller main + } -// SetupWithManager sets up the controller with the Manager. -func (r *KueueReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&componentsv1.Kueue{}). - Complete(r) + return nil } diff --git a/controllers/components/kueue/kueue_controller_actions.go b/controllers/components/kueue/kueue_controller_actions.go new file mode 100644 index 00000000000..1c9abf58f95 --- /dev/null +++ b/controllers/components/kueue/kueue_controller_actions.go @@ -0,0 +1,64 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kueue + +import ( + "context" + "fmt" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" +) + +func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + rr.Manifests = append(rr.Manifests, odhtypes.ManifestInfo{ + Path: DefaultPath, + ContextDir: "", + SourcePath: "", + }) + if err := odhdeploy.ApplyParams(DefaultPath, nil, map[string]string{"namespace": rr.DSCI.Spec.ApplicationsNamespace}); err != nil { + return fmt.Errorf("failed to update params.env from %s : %w", rr.Manifests[0], err) + } + return nil +} + +func devFlags(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + kueue, ok := rr.Instance.(*componentsv1.Kueue) + if !ok { + return fmt.Errorf("resource instance %v is not a componentsv1.Kueue)", rr.Instance) + } + + if kueue.Spec.DevFlags == nil { + return nil + } + // Implement devflags support logic + // If dev flags are set, update default manifests path + if len(kueue.Spec.DevFlags.Manifests) != 0 { + manifestConfig := kueue.Spec.DevFlags.Manifests[0] + if err := odhdeploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { + return err + } + if manifestConfig.SourcePath != "" { + rr.Manifests[0].Path = odhdeploy.DefaultManifestPath + rr.Manifests[0].ContextDir = ComponentName + rr.Manifests[0].SourcePath = manifestConfig.SourcePath + } + } + // TODO: Implement devflags logmode logic + return nil +} diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index 1ee3e941dc3..7b139862e55 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -54,6 +54,7 @@ import ( dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" dashboardctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" + kueuectrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/kueue" modelregistryctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelregistry" rayctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/ray" trustyaictrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/trustyai" @@ -245,7 +246,6 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R var componentErrors *multierror.Error // Deploy Dashboard - if instance, err = r.ReconcileComponent(ctx, instance, componentsv1.DashboardComponentName, func() (error, bool) { // Get the Dashboard instance dashboard := dashboardctrl.GetComponentCR(instance) @@ -279,6 +279,14 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R componentErrors = multierror.Append(componentErrors, err) } + // Deploy Kueue + if instance, err = r.ReconcileComponent(ctx, instance, componentsv1.KueueComponentName, func() (error, bool) { + kueue := kueuectrl.GetComponentCR(instance) + return r.apply(ctx, instance, kueue), instance.Spec.Components.Kueue.ManagementState == operatorv1.Managed + }); err != nil { + componentErrors = multierror.Append(componentErrors, err) + } + // Process errors for components if componentErrors != nil { log.Info("DataScienceCluster Deployment Incomplete.") @@ -546,6 +554,7 @@ func (r *DataScienceClusterReconciler) SetupWithManager(ctx context.Context, mgr Owns(&componentsv1.Ray{}). Owns(&componentsv1.ModelRegistry{}). Owns(&componentsv1.TrustyAI{}). + Owns(&componentsv1.Kueue{}). Owns( &corev1.ServiceAccount{}, builder.WithPredicates(saPredicates), diff --git a/controllers/datasciencecluster/kubebuilder_rbac.go b/controllers/datasciencecluster/kubebuilder_rbac.go index b89e9c460e9..8ff61c5fbf9 100644 --- a/controllers/datasciencecluster/kubebuilder_rbac.go +++ b/controllers/datasciencecluster/kubebuilder_rbac.go @@ -150,10 +150,12 @@ package datasciencecluster // +kubebuilder:rbac:groups=modelregistry.opendatahub.io,resources=modelregistries/status,verbs=get;update;patch // +kubebuilder:rbac:groups=modelregistry.opendatahub.io,resources=modelregistries/finalizers,verbs=update;get -// TODO: Kueue +// Kueue // +kubebuilder:rbac:groups=components.opendatahub.io,resources=kueues,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=components.opendatahub.io,resources=kueues/status,verbs=get;update;patch // +kubebuilder:rbac:groups=components.opendatahub.io,resources=kueues/finalizers,verbs=update +// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheusrules,verbs=get;create;patch;delete;deletecollection;list +// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=podmonitors,verbs=get;create;delete;update;watch;list;patch // TODO: CFO //+kubebuilder:rbac:groups=components.opendatahub.io,resources=codeflares,verbs=get;list;watch;create;update;patch;delete diff --git a/docs/api-overview.md b/docs/api-overview.md index d9e22a21511..60d81e90ee7 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -120,6 +120,23 @@ DSCDashboard contains all the configuration exposed in DSC instance for Dashboar +_Appears in:_ +- [Components](#components) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `managementState` _[ManagementState](#managementstate)_ | Set to one of the following values:

- "Managed" : the operator is actively managing the component and trying to keep it active.
It will only upgrade the component if it is safe to do so

- "Removed" : the operator is actively managing the component and will not install it,
or if it is installed, the operator will try to remove it | | Enum: [Managed Removed]
| +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + +#### DSCKueue + + + +DSCKueue contains all the configuration exposed in DSC instance for Kueue component + + + _Appears in:_ - [Components](#components) @@ -466,6 +483,23 @@ _Appears in:_ | `status` _[KueueStatus](#kueuestatus)_ | | | | +#### KueueCommonSpec + + + + + + + +_Appears in:_ +- [DSCKueue](#dsckueue) +- [KueueSpec](#kueuespec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + #### KueueList @@ -499,7 +533,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `foo` _string_ | Foo is an example field of Kueue. Edit kueue_types.go to remove/update | | | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | #### KueueStatus @@ -1072,7 +1106,6 @@ _Appears in:_ - [CodeFlare](#codeflare) - [DataSciencePipelines](#datasciencepipelines) - [Kserve](#kserve) -- [Kueue](#kueue) - [ModelMeshServing](#modelmeshserving) - [TrainingOperator](#trainingoperator) - [Workbenches](#workbenches) @@ -1115,11 +1148,14 @@ DevFlagsSpec struct defines the component's dev flags configuration. _Appears in:_ - [Component](#component) - [DSCDashboard](#dscdashboard) +- [DSCKueue](#dsckueue) - [DSCModelRegistry](#dscmodelregistry) - [DSCRay](#dscray) - [DSCTrustyAI](#dsctrustyai) - [DashboardCommonSpec](#dashboardcommonspec) - [DashboardSpec](#dashboardspec) +- [KueueCommonSpec](#kueuecommonspec) +- [KueueSpec](#kueuespec) - [ModelRegistryCommonSpec](#modelregistrycommonspec) - [ModelRegistrySpec](#modelregistryspec) - [RayCommonSpec](#raycommonspec) @@ -1143,6 +1179,7 @@ ManagementSpec struct defines the component's management configuration. _Appears in:_ - [Component](#component) - [DSCDashboard](#dscdashboard) +- [DSCKueue](#dsckueue) - [DSCModelRegistry](#dscmodelregistry) - [DSCRay](#dscray) - [DSCTrustyAI](#dsctrustyai) @@ -1250,28 +1287,6 @@ _Appears in:_ -## datasciencecluster.opendatahub.io/kueue - - - - -#### Kueue - - - -Kueue struct holds the configuration for the Kueue component. - - - -_Appears in:_ -- [Components](#components) - -| Field | Description | Default | Validation | -| --- | --- | --- | --- | -| `Component` _[Component](#component)_ | | | | - - - ## datasciencecluster.opendatahub.io/modelmeshserving Package modelmeshserving provides utility functions to config MoModelMesh, a general-purpose model serving management/routing layer @@ -1398,7 +1413,7 @@ _Appears in:_ | `modelmeshserving` _[ModelMeshServing](#modelmeshserving)_ | ModelMeshServing component configuration.
Does not support enabled Kserve at the same time | | | | `datasciencepipelines` _[DataSciencePipelines](#datasciencepipelines)_ | DataServicePipeline component configuration.
Require OpenShift Pipelines Operator to be installed before enable component | | | | `kserve` _[Kserve](#kserve)_ | Kserve component configuration.
Require OpenShift Serverless and OpenShift Service Mesh Operators to be installed before enable component
Does not support enabled ModelMeshServing at the same time | | | -| `kueue` _[Kueue](#kueue)_ | Kueue component configuration. | | | +| `kueue` _[DSCKueue](#dsckueue)_ | Kueue component configuration. | | | | `codeflare` _[CodeFlare](#codeflare)_ | CodeFlare component configuration.
If CodeFlare Operator has been installed in the cluster, it should be uninstalled first before enabled component. | | | | `ray` _[DSCRay](#dscray)_ | Ray component configuration. | | | | `trustyai` _[DSCTrustyAI](#dsctrustyai)_ | TrustyAI component configuration. | | | diff --git a/main.go b/main.go index f9c68ed8f0b..d6372e57fa4 100644 --- a/main.go +++ b/main.go @@ -64,6 +64,7 @@ import ( featurev1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/features/v1" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/certconfigmapgenerator" dashboardctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" + kueuectrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/kueue" modelregistryctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelregistry" rayctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/ray" trustyaictrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/trustyai" @@ -131,6 +132,10 @@ func initComponents(_ context.Context, p cluster.Platform) error { if err := trustyaictrl.Init(p); err != nil { return err } + + if err := kueuectrl.Init(p); err != nil { + multiErr = multierror.Append(multiErr, err) + } return multiErr.ErrorOrNil() } @@ -439,6 +444,10 @@ func CreateComponentReconcilers(ctx context.Context, mgr manager.Manager) error setupLog.Error(err, "unable to create controller", "controller", "TrustyAIReconciler") return err } + if err := kueuectrl.NewComponentReconciler(ctx, mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "KueueReconciler") + return err + } return nil } diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index d29ee7d4cf8..ec5490f89af 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -35,7 +35,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" - "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -87,8 +86,8 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { Ray: componentsv1.DSCRay{ ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, }, - Kueue: kueue.Kueue{ - Component: componentsold.Component{ManagementState: operatorv1.Managed}, + Kueue: componentsv1.DSCKueue{ + ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, }, TrustyAI: componentsv1.DSCTrustyAI{ ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, diff --git a/tests/e2e/controller_test.go b/tests/e2e/controller_test.go index 9fc2268e503..6e86741fd69 100644 --- a/tests/e2e/controller_test.go +++ b/tests/e2e/controller_test.go @@ -43,6 +43,7 @@ var ( "ray": rayTestSuite, "modelregistry": modelRegistryTestSuite, "trustyai": trustyAITestSuite, + "kueue": kueueTestSuite, } ) diff --git a/tests/e2e/dashboard_test.go b/tests/e2e/dashboard_test.go index eb67286c308..cde11a36859 100644 --- a/tests/e2e/dashboard_test.go +++ b/tests/e2e/dashboard_test.go @@ -121,7 +121,7 @@ func (tc *DashboardTestCtx) testOwnerReferences() error { } // Test Dashboard CR ownerref - if tc.testDashboardInstance.OwnerReferences[0].Kind != "DataScienceCluster" { + if tc.testDashboardInstance.OwnerReferences[0].Kind != dscKind { return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", tc.testDashboardInstance.OwnerReferences[0].Kind) } diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 7aba5ab79d9..354bd4efbcc 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -30,7 +30,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" - "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -156,9 +155,9 @@ func setupDSCInstance(name string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Managed, }, }, - Kueue: kueue.Kueue{ - Component: componentsold.Component{ - ManagementState: operatorv1.Removed, + Kueue: componentsv1.DSCKueue{ + ManagementSpec: components.ManagementSpec{ + ManagementState: operatorv1.Managed, }, }, TrustyAI: componentsv1.DSCTrustyAI{ diff --git a/tests/e2e/kueue_test.go b/tests/e2e/kueue_test.go new file mode 100644 index 00000000000..dcd050226ba --- /dev/null +++ b/tests/e2e/kueue_test.go @@ -0,0 +1,280 @@ +package e2e_test + +import ( + "context" + "errors" + "fmt" + "reflect" + "testing" + "time" + + operatorv1 "github.com/openshift/api/operator/v1" + "github.com/stretchr/testify/require" + autoscalingv1 "k8s.io/api/autoscaling/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" +) + +type KueueTestCtx struct { + testCtx *testContext + testKueueInstance componentsv1.Kueue +} + +func kueueTestSuite(t *testing.T) { + t.Helper() + + kueueCtx := KueueTestCtx{} + var err error + kueueCtx.testCtx, err = NewTestContext() + require.NoError(t, err) + + testCtx := kueueCtx.testCtx + + t.Run(testCtx.testDsc.Name, func(t *testing.T) { + // creation + t.Run("Creation of Kueue CR", func(t *testing.T) { + err = kueueCtx.testKueueCreation() + require.NoError(t, err, "error creating Kueue CR") + }) + + t.Run("Validate Kueue instance", func(t *testing.T) { + err = kueueCtx.validateKueue() + require.NoError(t, err, "error validating Kueue instance") + }) + + t.Run("Validate Ownerrefrences exist", func(t *testing.T) { + err = kueueCtx.testOwnerReferences() + require.NoError(t, err, "error getting all Kueue's Ownerrefrences") + }) + + t.Run("Validate Kueue Ready", func(t *testing.T) { + err = kueueCtx.validateKueueReady() + require.NoError(t, err, "Kueue instance is not Ready") + }) + + // reconcile + t.Run("Validate Controller reconcile", func(t *testing.T) { + err = kueueCtx.testUpdateOnKueueResources() + require.NoError(t, err, "error testing updates for Kueue's managed resources") + }) + + t.Run("Validate Disabling Kueue Component", func(t *testing.T) { + err = kueueCtx.testUpdateKueueComponentDisabled() + require.NoError(t, err, "error testing kueue component enabled field") + }) + }) +} + +func (tc *KueueTestCtx) testKueueCreation() error { + if tc.testCtx.testDsc.Spec.Components.Kueue.ManagementState != operatorv1.Managed { + return nil + } + + err := tc.testCtx.wait(func(ctx context.Context) (bool, error) { + existingKueueList := &componentsv1.KueueList{} + + if err := tc.testCtx.customClient.List(ctx, existingKueueList); err != nil { + return false, err + } + + switch { + case len(existingKueueList.Items) == 1: + tc.testKueueInstance = existingKueueList.Items[0] + return true, nil + case len(existingKueueList.Items) > 1: + return false, fmt.Errorf( + "unexpected Kueue CR instances. Expected 1 , Found %v instance", len(existingKueueList.Items)) + default: + return false, nil + } + }) + + if err != nil { + return fmt.Errorf("unable to find Kueue CR instance: %w", err) + } + + return nil +} + +func (tc *KueueTestCtx) validateKueue() error { + // Kueue spec should match the spec of Kueue component in DSC + if !reflect.DeepEqual(tc.testCtx.testDsc.Spec.Components.Kueue.KueueCommonSpec, tc.testKueueInstance.Spec.KueueCommonSpec) { + err := fmt.Errorf("expected .spec for Kueue %v, got %v", + tc.testCtx.testDsc.Spec.Components.Kueue.KueueCommonSpec, tc.testKueueInstance.Spec.KueueCommonSpec) + return err + } + return nil +} + +func (tc *KueueTestCtx) testOwnerReferences() error { + if len(tc.testKueueInstance.OwnerReferences) != 1 { + return errors.New("expect CR has ownerreferences set") + } + + // Test Kueue CR ownerref + if tc.testKueueInstance.OwnerReferences[0].Kind != dscKind { + return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", + tc.testKueueInstance.OwnerReferences[0].Kind) + } + + // Test Kueue resources + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.KueueComponentName), + }) + if err != nil { + return fmt.Errorf("error listing component deployments %w", err) + } + // test any one deployment for ownerreference + if len(appDeployments.Items) != 0 && appDeployments.Items[0].OwnerReferences[0].Kind != componentsv1.KueueKind { + return fmt.Errorf("expected ownerreference not found. Got ownereferrence: %v", + appDeployments.Items[0].OwnerReferences) + } + + return nil +} + +// Verify Kueue instance is in Ready phase when kueue deployments are up and running. +func (tc *KueueTestCtx) validateKueueReady() error { + err := wait.PollUntilContextTimeout(tc.testCtx.ctx, generalRetryInterval, componentReadyTimeout, true, func(ctx context.Context) (bool, error) { + key := types.NamespacedName{Name: tc.testKueueInstance.Name} + kueue := &componentsv1.Kueue{} + + err := tc.testCtx.customClient.Get(ctx, key, kueue) + if err != nil { + return false, err + } + return kueue.Status.Phase == readyStatus, nil + }) + + if err != nil { + return fmt.Errorf("error waiting Ready state for Kueue %v: %w", tc.testKueueInstance.Name, err) + } + + return nil +} + +func (tc *KueueTestCtx) testUpdateOnKueueResources() error { + // Test Updating Kueue Replicas + + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ComponentPartOf + "=" + tc.testKueueInstance.Name, + }) + if err != nil { + return err + } + + if len(appDeployments.Items) != 1 { + return fmt.Errorf("error getting deployment for component %s", tc.testKueueInstance.Name) + } + + const expectedReplica int32 = 2 // from 1 to 2 + + testDeployment := appDeployments.Items[0] + patchedReplica := &autoscalingv1.Scale{ + ObjectMeta: metav1.ObjectMeta{ + Name: testDeployment.Name, + Namespace: testDeployment.Namespace, + }, + Spec: autoscalingv1.ScaleSpec{ + Replicas: expectedReplica, + }, + Status: autoscalingv1.ScaleStatus{}, + } + updatedDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).UpdateScale(tc.testCtx.ctx, + testDeployment.Name, patchedReplica, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error patching component resources : %w", err) + } + if updatedDep.Spec.Replicas != patchedReplica.Spec.Replicas { + return fmt.Errorf("failed to patch replicas : expect to be %v but got %v", patchedReplica.Spec.Replicas, updatedDep.Spec.Replicas) + } + + // Sleep for 20 seconds to allow the operator to reconcile + // we expect it should not revert back to original value because of AllowList + time.Sleep(2 * generalRetryInterval) + reconciledDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, testDeployment.Name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + if *reconciledDep.Spec.Replicas != expectedReplica { + return fmt.Errorf("failed to revert back replicas : expect to be %v but got %v", expectedReplica, *reconciledDep.Spec.Replicas) + } + + return nil +} + +func (tc *KueueTestCtx) testUpdateKueueComponentDisabled() error { + // Test Updating Kueue to be disabled + var kueueDeploymentName string + + if tc.testCtx.testDsc.Spec.Components.Kueue.ManagementState == operatorv1.Managed { + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.KueueComponentName), + }) + if err != nil { + return fmt.Errorf("error getting enabled component %v", componentsv1.KueueComponentName) + } + if len(appDeployments.Items) > 0 { + kueueDeploymentName = appDeployments.Items[0].Name + if appDeployments.Items[0].Status.ReadyReplicas == 0 { + return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", kueueDeploymentName) + } + } + } else { + return errors.New("kueue spec should be in 'enabled: true' state in order to perform test") + } + + // Disable component Kueue + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + // refresh DSC instance in case it was updated during the reconcile + err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc) + if err != nil { + return fmt.Errorf("error getting resource %w", err) + } + // Disable the Component + tc.testCtx.testDsc.Spec.Components.Kueue.ManagementState = operatorv1.Removed + + // Try to update + err = tc.testCtx.customClient.Update(tc.testCtx.ctx, tc.testCtx.testDsc) + // Return err itself here (not wrapped inside another error) + // so that RetryOnConflict can identify it correctly. + if err != nil { + return fmt.Errorf("error updating component from 'enabled: true' to 'enabled: false': %w", err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("error after retry %w", err) + } + + if err = tc.testCtx.wait(func(ctx context.Context) (bool, error) { + // Verify kueue CR is deleted + kueue := &componentsv1.Kueue{} + err = tc.testCtx.customClient.Get(ctx, client.ObjectKey{Name: tc.testKueueInstance.Name}, kueue) + return k8serr.IsNotFound(err), nil + }); err != nil { + return fmt.Errorf("component kueue is disabled, should not get the Kueue CR %v", tc.testKueueInstance.Name) + } + + // Sleep for 20 seconds to allow the operator to reconcile + time.Sleep(2 * generalRetryInterval) + _, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, kueueDeploymentName, metav1.GetOptions{}) + if err != nil { + if k8serr.IsNotFound(err) { + return nil // correct result: should not find deployment after we disable it already + } + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more", + componentsv1.KueueKind, + kueueDeploymentName, + tc.testCtx.applicationsNamespace) +} diff --git a/tests/e2e/odh_manager_test.go b/tests/e2e/odh_manager_test.go index 8fd1baa2981..f408955de11 100644 --- a/tests/e2e/odh_manager_test.go +++ b/tests/e2e/odh_manager_test.go @@ -62,4 +62,10 @@ func (tc *testContext) validateOwnedCRDs(t *testing.T) { require.NoErrorf(t, tc.validateCRD("trustyais.components.opendatahub.io"), "error in validating CRD : trustyais.components.opendatahub.io") }) + + t.Run("Validate Kueue CRD", func(t *testing.T) { + t.Parallel() + require.NoErrorf(t, tc.validateCRD("kueues.components.opendatahub.io"), + "error in validating CRD : kueues.components.opendatahub.io") + }) } diff --git a/tests/e2e/ray_test.go b/tests/e2e/ray_test.go index d867be6e0a6..1183c3ac35f 100644 --- a/tests/e2e/ray_test.go +++ b/tests/e2e/ray_test.go @@ -119,7 +119,7 @@ func (tc *RayTestCtx) testOwnerReferences() error { } // Test Ray CR ownerref - if tc.testRayInstance.OwnerReferences[0].Kind != "DataScienceCluster" { + if tc.testRayInstance.OwnerReferences[0].Kind != dscKind { return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", tc.testRayInstance.OwnerReferences[0].Kind) }