Skip to content

Commit

Permalink
chore: address comments for monitoring component (opendatahub-io#1520)
Browse files Browse the repository at this point in the history
* update : monitoring CR 

- more if to switch...case
- add .status.condition.MonitoringReady type
- change Monitoring CR .status.condition Reason and Message and Type name
- remove unused predicate var from DSCI
- change check on promethus deployment ready
- update: change to use Apply than Create
- update: add or remove prom rules
- add field manager for monitoring CR to DSCI
- add isComponentReady()
- update predicate for monitoring on DSC change on both .spec.components and .status.condition

Signed-off-by: Wen Zhou <wenzhou@redhat.com>

---------

Signed-off-by: Wen Zhou <wenzhou@redhat.com>
  • Loading branch information
zdtsw authored Jan 21, 2025
1 parent 6c7d74b commit 05c7947
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 188 deletions.
37 changes: 7 additions & 30 deletions controllers/dscinitialization/dscinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package dscinitialization
import (
"context"
"path/filepath"
"reflect"

operatorv1 "github.com/openshift/api/operator/v1"
routev1 "github.com/openshift/api/route/v1"
Expand Down Expand Up @@ -50,6 +49,7 @@ import (
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/logger"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/trustedcabundle"
Expand Down Expand Up @@ -362,17 +362,17 @@ func (r *DSCInitializationReconciler) SetupWithManager(ctx context.Context, mgr
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request {
return r.watchDSCResource(ctx)
}),
builder.WithPredicates(DSCDeletionPredicate),
builder.WithPredicates(resources.DSCDeletionPredicate), // TODO: is it needed?
).
Watches(
&corev1.Secret{},
handler.EnqueueRequestsFromMapFunc(r.watchMonitoringSecretResource),
builder.WithPredicates(SecretContentChangedPredicate),
builder.WithPredicates(resources.SecretContentChangedPredicate),
).
Watches(
&corev1.ConfigMap{},
handler.EnqueueRequestsFromMapFunc(r.watchMonitoringConfigMapResource),
builder.WithPredicates(CMContentChangedPredicate),
builder.WithPredicates(resources.CMContentChangedPredicate),
).
Watches(
&serviceApi.Auth{},
Expand All @@ -381,30 +381,6 @@ func (r *DSCInitializationReconciler) SetupWithManager(ctx context.Context, mgr
Complete(r)
}

var SecretContentChangedPredicate = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldSecret, _ := e.ObjectOld.(*corev1.Secret)
newSecret, _ := e.ObjectNew.(*corev1.Secret)

return !reflect.DeepEqual(oldSecret.Data, newSecret.Data)
},
}

var CMContentChangedPredicate = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldCM, _ := e.ObjectOld.(*corev1.ConfigMap)
newCM, _ := e.ObjectNew.(*corev1.ConfigMap)

return !reflect.DeepEqual(oldCM.Data, newCM.Data)
},
}

var DSCDeletionPredicate = predicate.Funcs{
DeleteFunc: func(e event.DeleteEvent) bool {
return true
},
}

var dsciPredicateStateChangeTrustedCA = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldDSCI, _ := e.ObjectOld.(*dsciv1.DSCInitialization)
Expand Down Expand Up @@ -501,8 +477,9 @@ func (r *DSCInitializationReconciler) configureMonitoring(ctx context.Context, d
)

if dsci.Spec.Monitoring.ManagementState == operatorv1.Managed {
err := r.Create(ctx, defaultMonitoring)
if err != nil && !k8serr.IsAlreadyExists(err) {
// for generic case if we need to support configable monitoring namespace
// set filed manager to DSCI
if err := r.Apply(ctx, defaultMonitoring, client.FieldOwner("dscinitialization.opendatahub.io"), client.ForceOwnership); err != nil && !k8serr.IsAlreadyExists(err) {
return err
}
} else {
Expand Down
25 changes: 23 additions & 2 deletions controllers/services/monitoring/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,34 @@ package monitoring

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"

conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
"gopkg.in/yaml.v2"
k8serr "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/opendatahub-io/opendatahub-operator/v2/apis/common"
serviceApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/services/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
odhcli "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client"
odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
)

var (
ComponentName = serviceApi.MonitoringServiceName
prometheusConfigPath = filepath.Join(odhdeploy.DefaultManifestPath, ComponentName, "prometheus", "apps", "prometheus-configs.yaml")
ReadyConditionType = conditionsv1.ConditionType(status.ReadySuffix)
)

// UpdatePrometheusConfig update prometheus-configs.yaml to include/exclude <component>.rules
// updatePrometheusConfig update prometheus-configs.yaml to include/exclude <component>.rules
// parameter enable when set to true to add new rules, when set to false to remove existing rules.
func UpdatePrometheusConfig(ctx context.Context, enable bool, component string) error {
func updatePrometheusConfig(ctx context.Context, enable bool, component string) error {
l := logf.FromContext(ctx)

// create a struct to mock poremtheus.yml
Expand Down Expand Up @@ -123,3 +132,15 @@ func UpdatePrometheusConfig(ctx context.Context, enable bool, component string)

return err
}

func isComponentReady(ctx context.Context, cli *odhcli.Client, obj common.PlatformObject) (bool, error) {
err := cli.Client.Get(ctx, client.ObjectKeyFromObject(obj), obj)
switch {
case k8serr.IsNotFound(err):
return false, nil
case err != nil:
return false, fmt.Errorf("failed to get component instance: %w", err)
default:
return meta.IsStatusConditionTrue(obj.GetStatus().Conditions, status.ConditionTypeReady), nil
}
}
2 changes: 1 addition & 1 deletion controllers/services/monitoring/monitoring_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func NewServiceReconciler(ctx context.Context, mgr ctrl.Manager) error {
// or services.platform.opendatahub.io/part-of set to the current owner
//
Watches(&dscv1.DataScienceCluster{}, reconciler.WithEventHandler(handlers.ToNamed(serviceApi.MonitoringInstanceName)),
reconciler.WithPredicates(resources.DSCSpecUpdatePredicate)).
reconciler.WithPredicates(resources.DSCComponentUpdatePredicate)).
// actions
WithAction(initialize).
WithAction(updatePrometheusConfigMap).
Expand Down
67 changes: 38 additions & 29 deletions controllers/services/monitoring/monitoring_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ import (
"errors"
"fmt"

operatorv1 "github.com/openshift/api/operator/v1"
appsv1 "k8s.io/api/apps/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"

Expand Down Expand Up @@ -58,6 +59,9 @@ func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error {
return nil
}

// if DSC has component as Removed, we remove component's Prom Rules.
// only when DSC has component as Managed and component CR is in "Ready" state, we add rules to Prom Rules.
// all other cases, we do not change Prom rules for component.
func updatePrometheusConfigMap(ctx context.Context, rr *odhtypes.ReconciliationRequest) error {
// Map component names to their rule prefixes
dscList := &dscv1.DataScienceClusterList{}
Expand All @@ -73,44 +77,44 @@ func updatePrometheusConfigMap(ctx context.Context, rr *odhtypes.ReconciliationR

dsc := &dscList.Items[0]

err := cr.ForEach(func(ch cr.ComponentHandler) error {
var enabled bool
return cr.ForEach(func(ch cr.ComponentHandler) error {
ci := ch.NewCRObject(dsc)
// read the component instance to get tha actual status
err := rr.Client.Get(ctx, client.ObjectKeyFromObject(ci), ci)
switch {
case err != nil:
enabled = false
if !k8serr.IsNotFound(err) {
return fmt.Errorf("error getting component state: component=%s, enabled=%t, error=%w", ch.GetName(), enabled, err)
ms := ch.GetManagementState(dsc) // check for modelcontroller with dependency is done in its GetManagementState()
switch ms {
case operatorv1.Removed: // remove
return updatePrometheusConfig(ctx, false, componentRules[ch.GetName()])
case operatorv1.Managed:
ready, err := isComponentReady(ctx, rr.Client, ci)
if err != nil {
return fmt.Errorf("failed to get component status %w", err)
}
default:
enabled = meta.IsStatusConditionTrue(ci.GetStatus().Conditions, status.ConditionTypeReady)
}

// Check for shared components
if ch.GetName() == componentApi.KserveComponentName || ch.GetName() == componentApi.ModelMeshServingComponentName {
if err := UpdatePrometheusConfig(ctx, enabled, componentRules[componentApi.ModelControllerComponentName]); err != nil {
return err
if !ready { // not ready, skip change on prom rules
return nil
}
// add
return updatePrometheusConfig(ctx, true, componentRules[ch.GetName()])
default:
return fmt.Errorf("unsuported management state %s", ms)
}

if err := UpdatePrometheusConfig(ctx, enabled, componentRules[ch.GetName()]); err != nil {
return err
}
return nil
})
if err != nil {
return err
}
return nil
}

func updateStatus(ctx context.Context, rr *odhtypes.ReconciliationRequest) error {
m, ok := rr.Instance.(*serviceApi.Monitoring)
if !ok {
return errors.New("instance is not of type *services.Monitoring")
}

// TODO: deprecate phase
m.Status.Phase = "NotReady"
// condition
nc := metav1.Condition{
Type: string(ReadyConditionType),
Status: metav1.ConditionFalse,
Reason: status.PhaseNotReady,
Message: "Prometheus deployment is not ready",
}

promDeployment := &appsv1.DeploymentList{}
err := rr.Client.List(
ctx,
Expand All @@ -128,10 +132,15 @@ func updateStatus(ctx context.Context, rr *odhtypes.ReconciliationRequest) error
}
}

m.Status.Phase = "NotReady"
if len(promDeployment.Items) == 1 && ready == 1 {
if len(promDeployment.Items) == ready {
// TODO: deprecate phase
m.Status.Phase = "Ready"
// condition
nc.Status = metav1.ConditionTrue
nc.Reason = status.ReconcileCompleted
nc.Message = status.ReconcileCompletedMessage
}
meta.SetStatusCondition(&m.Status.Conditions, nc)
m.Status.ObservedGeneration = m.GetObjectMeta().GetGeneration()

return nil
Expand Down
27 changes: 24 additions & 3 deletions pkg/controller/predicates/resources/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ var DSCDeletionPredicate = predicate.Funcs{
},
}

var DSCSpecUpdatePredicate = predicate.Funcs{
var DSCComponentUpdatePredicate = predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
oldDSC, ok := e.ObjectOld.(*dscv1.DataScienceCluster)
if !ok {
Expand All @@ -92,7 +92,28 @@ var DSCSpecUpdatePredicate = predicate.Funcs{
if !ok {
return false
}
// Compare components state
return !reflect.DeepEqual(oldDSC.Spec.Components, newDSC.Spec.Components)
// if .spec.components is changed, return true.
if !reflect.DeepEqual(oldDSC.Spec.Components, newDSC.Spec.Components) {
return true
}

// if new condition from component is added or removed, return true
oldConditions := oldDSC.Status.Conditions
newConditions := newDSC.Status.Conditions
if len(oldConditions) != len(newConditions) {
return true
}

// compare type one by one with their status if not equal return true
for _, nc := range newConditions {
for _, oc := range oldConditions {
if nc.Type == oc.Type {
if !reflect.DeepEqual(nc.Status, oc.Status) {
return true
}
}
}
}
return false
},
}
Loading

0 comments on commit 05c7947

Please sign in to comment.