From fd3aae9110a4857e71e81f8ab9774e13186d389f Mon Sep 17 00:00:00 2001 From: Humair Khan Date: Sat, 17 Feb 2024 13:52:37 -0500 Subject: [PATCH] feat: have pre-req retry upon check fail Signed-off-by: Humair Khan --- config/base/kustomization.yaml | 7 +++++++ config/base/params.env | 1 + config/manager/manager.yaml | 2 ++ controllers/config/defaults.go | 12 +++++++++++- controllers/dspipeline_controller.go | 12 +++++++----- 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml index 80749b52a..dd21e4b02 100644 --- a/config/base/kustomization.yaml +++ b/config/base/kustomization.yaml @@ -113,5 +113,12 @@ vars: apiVersion: v1 fieldref: fieldpath: data.MAX_CONCURRENT_RECONCILES + - name: DSPO_REQUEUE_TIME + objref: + kind: ConfigMap + name: dspo-parameters + apiVersion: v1 + fieldref: + fieldpath: data.DSPO_REQUEUE_TIME configurations: - params.yaml diff --git a/config/base/params.env b/config/base/params.env index 11fd0f860..550c65601 100644 --- a/config/base/params.env +++ b/config/base/params.env @@ -12,3 +12,4 @@ IMAGES_MARIADB=registry.redhat.io/rhel8/mariadb-103@sha256:b3a6f3fecc2629b61a894 IMAGES_OAUTHPROXY=registry.redhat.io/openshift4/ose-oauth-proxy@sha256:ab112105ac37352a2a4916a39d6736f5db6ab4c29bad4467de8d613e80e9bb33 ZAP_LOG_LEVEL=info MAX_CONCURRENT_RECONCILES=10 +DSPO_REQUEUE_TIME=20s diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 27b136259..bc68f8425 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -62,6 +62,8 @@ spec: value: $(ZAP_LOG_LEVEL) - name: MAX_CONCURRENT_RECONCILES value: $(MAX_CONCURRENT_RECONCILES) + - name: REQUEUE_TIME + value: $(REQUEUE_TIME) securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/controllers/config/defaults.go b/controllers/config/defaults.go index b13ef3c78..a9dbea7f8 100644 --- a/controllers/config/defaults.go +++ b/controllers/config/defaults.go @@ -51,7 +51,8 @@ const ( ObjectStorageAccessKey = "accesskey" ObjectStorageSecretKey = "secretkey" - MlmdGrpcPort = "8080" + MlmdGrpcPort = "8080" + RequeueTimeConfigName = "DSPO.RequeueTime" ) // DSPO Config File Paths @@ -113,6 +114,8 @@ const DefaultObjStoreConnectionTimeout = time.Second * 15 const DefaultMaxConcurrentReconciles = 10 +const DefaultRequeueTime = time.Second * 20 + func GetConfigRequiredFields() []string { return requiredFields } @@ -149,3 +152,10 @@ func GetStringConfigWithDefault(configName, value string) string { } return viper.GetString(configName) } + +func GetDurationConfigWithDefault(configName string, value time.Duration) time.Duration { + if !viper.IsSet(configName) { + return value + } + return viper.GetDuration(configName) +} diff --git a/controllers/dspipeline_controller.go b/controllers/dspipeline_controller.go index d92275ab7..fd69dbee6 100644 --- a/controllers/dspipeline_controller.go +++ b/controllers/dspipeline_controller.go @@ -19,9 +19,6 @@ package controllers import ( "context" "fmt" - "sigs.k8s.io/controller-runtime/pkg/controller" - "time" - "github.com/go-logr/logr" mf "github.com/manifestival/manifestival" dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1" @@ -38,6 +35,7 @@ import ( "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -172,7 +170,6 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. log.Error(err, "Encountered error when fetching DSPA") return ctrl.Result{}, err } - // FixMe: Hack for stubbing gvk during tests as these are not populated by test suite // https://github.com/opendatahub-io/data-science-pipelines-operator/pull/7#discussion_r1102887037 // In production we expect these to be populated @@ -207,10 +204,11 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, nil } + requeueTime := config.GetDurationConfigWithDefault(config.RequeueTimeConfigName, config.DefaultRequeueTime) err = params.ExtractParams(ctx, dspa, r.Client, r.Log) if err != nil { log.Info(fmt.Sprintf("Encountered error when parsing CR: [%s]", err)) - return ctrl.Result{Requeue: true, RequeueAfter: 2 * time.Minute}, nil + return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil } err = r.ReconcileDatabase(ctx, dspa, params) @@ -291,6 +289,10 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. util.GetConditionByType(config.CrReady, conditions): CrReadyMetric, } r.PublishMetrics(dspa, metricsMap) + if !dspaPrereqsReady { + log.Info(fmt.Sprintf("Health check for Database or Object Store failed, retrying in %d seconds.", int(requeueTime.Seconds()))) + return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil + } return ctrl.Result{}, nil }