diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index a185a735e6c..dee03c1c41d 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -85,7 +85,7 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R r.Log.Info("Reconciling DataScienceCluster resources", "Request.Name", req.Name) // Get information on version - currentOperatorReleaseVersion, err := cluster.GetRelease(ctx, r.Client) + currentOperatorReleaseVersion, err := cluster.GetReleaseFromCSV(ctx, r.Client) if err != nil { r.Log.Error(err, "failed to get operator release version") return ctrl.Result{}, err diff --git a/controllers/dscinitialization/dscinitialization_controller.go b/controllers/dscinitialization/dscinitialization_controller.go index 962c4d8d9e2..99ce0d624d8 100644 --- a/controllers/dscinitialization/dscinitialization_controller.go +++ b/controllers/dscinitialization/dscinitialization_controller.go @@ -78,7 +78,7 @@ type DSCInitializationReconciler struct { func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { //nolint:funlen,gocyclo,maintidx r.Log.Info("Reconciling DSCInitialization.", "DSCInitialization Request.Name", req.Name) - currentOperatorReleaseVersion, err := cluster.GetRelease(ctx, r.Client) + currentOperatorReleaseVersion, err := cluster.GetReleaseFromCSV(ctx, r.Client) if err != nil { r.Log.Error(err, "failed to get operator release version") return ctrl.Result{}, err diff --git a/main.go b/main.go index bcdd87914a6..490b8e66ebe 100644 --- a/main.go +++ b/main.go @@ -256,6 +256,9 @@ func main() { //nolint:funlen,maintidx os.Exit(1) } + // get old release version before we create default DSCI CR + oldReleaseVersion, _ := upgrade.GetReleaseFromCR(ctx, setupClient) + // Check if user opted for disabling DSC configuration disableDSCConfig, existDSCConfig := os.LookupEnv("DISABLE_DSC_CONFIG") if existDSCConfig && disableDSCConfig != "false" { @@ -292,7 +295,7 @@ func main() { //nolint:funlen,maintidx } // Cleanup resources from previous v2 releases var cleanExistingResourceFunc manager.RunnableFunc = func(ctx context.Context) error { - if err = upgrade.CleanupExistingResource(ctx, setupClient, platform, dscApplicationsNamespace, dscMonitoringNamespace); err != nil { + if err = upgrade.CleanupExistingResource(ctx, setupClient, platform, dscApplicationsNamespace, dscMonitoringNamespace, oldReleaseVersion); err != nil { setupLog.Error(err, "unable to perform cleanup") } return err diff --git a/pkg/cluster/cluster_config.go b/pkg/cluster/cluster_config.go index a1c462aeeee..6e2707dbfa6 100644 --- a/pkg/cluster/cluster_config.go +++ b/pkg/cluster/cluster_config.go @@ -133,7 +133,7 @@ type Release struct { Version version.OperatorVersion `json:"version,omitempty"` } -func GetRelease(ctx context.Context, cli client.Client) (Release, error) { +func GetReleaseFromCSV(ctx context.Context, cli client.Client) (Release, error) { initRelease := Release{ // dummy version set to name "", version 0.0.0 Version: version.OperatorVersion{ diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 3df33502bfa..93f94b9c071 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -21,6 +21,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -203,7 +204,12 @@ func getDashboardWatsonResources(ns string) []ResourceSpec { } // TODO: remove function once we have a generic solution across all components. -func CleanupExistingResource(ctx context.Context, cli client.Client, platform cluster.Platform, dscApplicationsNamespace, dscMonitoringNamespace string) error { +func CleanupExistingResource(ctx context.Context, + cli client.Client, + platform cluster.Platform, + dscApplicationsNamespace, dscMonitoringNamespace string, + oldReleaseVersion cluster.Release, +) error { var multiErr *multierror.Error // Special Handling of cleanup of deprecated model monitoring stack if platform == cluster.ManagedRhods { @@ -261,7 +267,7 @@ func CleanupExistingResource(ctx context.Context, cli client.Client, platform cl // only apply on RHOAI since ODH has a different way to create this CR by dashboard if platform == cluster.SelfManagedRhods || platform == cluster.ManagedRhods { - if err := unsetOwnerReference(ctx, cli, "odh-dashboard-config", dscApplicationsNamespace); err != nil { + if err := upgradeODCCR(ctx, cli, "odh-dashboard-config", dscApplicationsNamespace, oldReleaseVersion); err != nil { return err } } @@ -401,7 +407,10 @@ func removOdhApplicationsCR(ctx context.Context, cli client.Client, gvk schema.G return nil } -func unsetOwnerReference(ctx context.Context, cli client.Client, instanceName string, applicationNS string) error { +// upgradODCCR handles different cases: +// 1. unset ownerreference for CR odh-dashboard-config +// 2. flip TrustyAI BiasMetrics to false (.spec.dashboardConfig.disableBiasMetrics) if it is lower release version than version. +func upgradeODCCR(ctx context.Context, cli client.Client, instanceName string, applicationNS string, release cluster.Release) error { crd := &apiextv1.CustomResourceDefinition{} if err := cli.Get(ctx, client.ObjectKey{Name: "odhdashboardconfigs.opendatahub.io"}, crd); err != nil { return client.IgnoreNotFound(err) @@ -414,6 +423,14 @@ func unsetOwnerReference(ctx context.Context, cli client.Client, instanceName st }, odhObject); err != nil { return client.IgnoreNotFound(err) } + + if err := unsetOwnerReference(ctx, cli, instanceName, odhObject); err != nil { + return err + } + return updateODCBiasMetrics(ctx, cli, instanceName, release, odhObject) +} + +func unsetOwnerReference(ctx context.Context, cli client.Client, instanceName string, odhObject *unstructured.Unstructured) error { if odhObject.GetOwnerReferences() != nil { // set to nil as updates odhObject.SetOwnerReferences(nil) @@ -424,6 +441,22 @@ func unsetOwnerReference(ctx context.Context, cli client.Client, instanceName st return nil } +func updateODCBiasMetrics(ctx context.Context, cli client.Client, instanceName string, oldRelease cluster.Release, odhObject *unstructured.Unstructured) error { + // "from version" as oldRelease, if return "0.0.0" meaning running on 2.10- release/dummy CI build + // if oldRelease is lower than 2.14.0(e.g 2.13.x-a), flip TrustyAI BiasMetrics to false (even the field did not exist) + if oldRelease.Version.Minor < 14 { + ctrl.Log.Info("Upgrade force BiasMetrics to false due to from release < 2.14.0") + // flip TrustyAI BiasMetrics to false (.spec.dashboardConfig.disableBiasMetrics) + disableBiasMetricsValue := []byte(`{"spec": {"dashboardConfig": {"disableBiasMetrics": false}}}`) + if err := cli.Patch(ctx, odhObject, client.RawPatch(types.MergePatchType, disableBiasMetricsValue)); err != nil { + return fmt.Errorf("error enable BiasMetrics in CR %s : %w", instanceName, err) + } + return nil + } + ctrl.Log.Info("Upgrade does not force BiasMetrics to false due to from release >= 2.14.0") + return nil +} + func RemoveLabel(ctx context.Context, cli client.Client, objectName string, labelKey string) error { foundNamespace := &corev1.Namespace{} if err := cli.Get(ctx, client.ObjectKey{Name: objectName}, foundNamespace); err != nil { @@ -479,3 +512,24 @@ func deleteDeprecatedNamespace(ctx context.Context, cli client.Client, namespace return nil } + +func GetReleaseFromCR(ctx context.Context, cli client.Client) (cluster.Release, error) { + dsciInstance := &dsciv1.DSCInitializationList{} + if err := cli.List(ctx, dsciInstance); err != nil { + return cluster.Release{}, err + } + if len(dsciInstance.Items) == 1 { // found one DSCI CR found + // can return a valid Release or 0.0.0 + return dsciInstance.Items[0].Status.Release, nil + } + // no DSCI CR found, try with DSC CR + dscInstances := &dscv1.DataScienceClusterList{} + if err := cli.List(ctx, dscInstances); err != nil { + return cluster.Release{}, err + } + if len(dscInstances.Items) == 1 { // one DSC CR found + return dscInstances.Items[0].Status.Release, nil + } + // could be a clean installation or both CRs are deleted already + return cluster.Release{}, nil +}