Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: have pre-req retry upon check fail #574

Merged
merged 1 commit into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions config/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,12 @@ vars:
apiVersion: v1
fieldref:
fieldpath: data.MAX_CONCURRENT_RECONCILES
- name: DSPO_REQUEUE_TIME
objref:
kind: ConfigMap
name: dspo-parameters
apiVersion: v1
fieldref:
fieldpath: data.DSPO_REQUEUE_TIME
configurations:
- params.yaml
1 change: 1 addition & 0 deletions config/base/params.env
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ IMAGES_MARIADB=registry.redhat.io/rhel8/mariadb-103@sha256:b3a6f3fecc2629b61a894
IMAGES_OAUTHPROXY=registry.redhat.io/openshift4/ose-oauth-proxy@sha256:ab112105ac37352a2a4916a39d6736f5db6ab4c29bad4467de8d613e80e9bb33
ZAP_LOG_LEVEL=info
MAX_CONCURRENT_RECONCILES=10
DSPO_REQUEUE_TIME=20s
2 changes: 2 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ spec:
value: $(ZAP_LOG_LEVEL)
- name: MAX_CONCURRENT_RECONCILES
value: $(MAX_CONCURRENT_RECONCILES)
- name: REQUEUE_TIME
value: $(REQUEUE_TIME)
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
12 changes: 11 additions & 1 deletion controllers/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ const (
ObjectStorageAccessKey = "accesskey"
ObjectStorageSecretKey = "secretkey"

MlmdGrpcPort = "8080"
MlmdGrpcPort = "8080"
RequeueTimeConfigName = "DSPO.RequeueTime"
)

// DSPO Config File Paths
Expand Down Expand Up @@ -113,6 +114,8 @@ const DefaultObjStoreConnectionTimeout = time.Second * 15

const DefaultMaxConcurrentReconciles = 10

const DefaultRequeueTime = time.Second * 20

func GetConfigRequiredFields() []string {
return requiredFields
}
Expand Down Expand Up @@ -149,3 +152,10 @@ func GetStringConfigWithDefault(configName, value string) string {
}
return viper.GetString(configName)
}

func GetDurationConfigWithDefault(configName string, value time.Duration) time.Duration {
if !viper.IsSet(configName) {
return value
}
return viper.GetDuration(configName)
}
12 changes: 7 additions & 5 deletions controllers/dspipeline_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ package controllers
import (
"context"
"fmt"
"sigs.k8s.io/controller-runtime/pkg/controller"
"time"

"github.com/go-logr/logr"
mf "github.com/manifestival/manifestival"
dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1"
Expand All @@ -38,6 +35,7 @@ import (
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
Expand Down Expand Up @@ -172,7 +170,6 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
log.Error(err, "Encountered error when fetching DSPA")
return ctrl.Result{}, err
}

// FixMe: Hack for stubbing gvk during tests as these are not populated by test suite
// https://github.com/opendatahub-io/data-science-pipelines-operator/pull/7#discussion_r1102887037
// In production we expect these to be populated
Expand Down Expand Up @@ -207,10 +204,11 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
return ctrl.Result{}, nil
}

requeueTime := config.GetDurationConfigWithDefault(config.RequeueTimeConfigName, config.DefaultRequeueTime)
err = params.ExtractParams(ctx, dspa, r.Client, r.Log)
if err != nil {
log.Info(fmt.Sprintf("Encountered error when parsing CR: [%s]", err))
return ctrl.Result{Requeue: true, RequeueAfter: 2 * time.Minute}, nil
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
}

err = r.ReconcileDatabase(ctx, dspa, params)
Expand Down Expand Up @@ -291,6 +289,10 @@ func (r *DSPAReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
util.GetConditionByType(config.CrReady, conditions): CrReadyMetric,
}
r.PublishMetrics(dspa, metricsMap)
if !dspaPrereqsReady {
log.Info(fmt.Sprintf("Health check for Database or Object Store failed, retrying in %d seconds.", int(requeueTime.Seconds())))
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
}
return ctrl.Result{}, nil
}

Expand Down
Loading