Skip to content

Commit

Permalink
Update Component rules in Prometheus
Browse files Browse the repository at this point in the history
  • Loading branch information
VaishnaviHire committed Jan 17, 2025
1 parent be4c788 commit bcdd7c8
Show file tree
Hide file tree
Showing 31 changed files with 1,274 additions and 79 deletions.
5 changes: 5 additions & 0 deletions Dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ RUN if [ "${USE_LOCAL}" != "true" ]; then \
./get_all_manifests.sh ${OVERWRITE_MANIFESTS}; \
fi

# Copy monitoring config
COPY config/monitoring/ /opt/manifests/monitoring
# Copy ods-configs
COPY config/osd-configs/ /opt/manifests/osd-configs

################################################################################
FROM registry.access.redhat.com/ubi8/go-toolset:$GOLANG_VERSION as builder
ARG CGO_ENABLED=1
Expand Down
46 changes: 46 additions & 0 deletions config/osd-configs/dedicated-admins-mgmt-role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: dedicated-admins-mgmt-role
rules:
- apiGroups:
- ''
verbs:
- create
- edit
- delete
- get
- list
- patch
- update
- watch
resources:
- configmaps
- secrets
- apiGroups:
- image.openshift.io
verbs:
- create
- edit
- delete
- get
- list
- patch
- update
- watch
resources:
- imagestreams
- apiGroups:
- build.openshift.io
verbs:
- create
- edit
- delete
- get
- list
- patch
- update
- watch
resources:
- builds
- buildconfigs
11 changes: 11 additions & 0 deletions config/osd-configs/dedicated-admins-mgmt-rolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: dedicated-admins-mgmt-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: dedicated-admins-mgmt-role
subjects:
- kind: Group
name: dedicated-admins
5 changes: 5 additions & 0 deletions config/osd-configs/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- dedicated-admins-mgmt-role.yaml
- dedicated-admins-mgmt-rolebinding.yaml
116 changes: 116 additions & 0 deletions config/partners/anaconda/base/anaconda-ce-validator-cron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: anaconda-ce-periodic-validator
namespace: redhat-ods-applications
labels:
opendatahub.io/modified: "false"
spec:
schedule: "0 0 * * *"
concurrencyPolicy: "Replace"
startingDeadlineSeconds: 200
suspend: true
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
metadata:
labels:
parent: "anaconda-ce-periodic-validator"
spec:
serviceAccount: "rhods-dashboard"
imagePullSecrets:
- name: addon-managed-odh-pullsecret
containers:
- name: anaconda-ce-validator
image: registry.redhat.io/openshift4/ose-cli@sha256:75bf9b911b6481dcf29f7942240d1555adaa607eec7fc61bedb7f624f87c36d4
command:
- /bin/sh
- -c
- >
#!/bin/sh
IMAGESTREAM_NAME='s2i-minimal-notebook-anaconda'
CONFIGMAP_NAME='anaconda-ce-validation-result'
BUILDCONFIG_NAME='s2i-minimal-notebook-anaconda'
ANACONDA_VERSION='v0.2.2-anaconda'
function generate_imagestream() {
echo '{"apiVersion":"image.openshift.io/v1","kind":"ImageStream","metadata":{"annotations":{"opendatahub.io/notebook-image-order":"10","opendatahub.io/notebook-image-desc":"Notebook with Anaconda CE tools instead of pip.","opendatahub.io/notebook-image-name":"Anaconda Commercial Edition","opendatahub.io/notebook-image-url":"https://github.com/red-hat-data-services/notebooks"},"labels":{"component.opendatahub.io/name":"jupyterhub","opendatahub.io/modified":"false","opendatahub.io/notebook-image":"true"},"name":"s2i-minimal-notebook-anaconda"},"spec":{"lookupPolicy":{"local":true},"tags":[{"name":"2023.1","annotations":{"opendatahub.io/default-image":"true","opendatahub.io/notebook-python-dependencies":"[{\"name\":\"JupyterLab\",\"version\": \"3.5\"}, {\"name\": \"Notebook\",\"version\": \"6.5\"}]","opendatahub.io/notebook-software":"[{\"name\":\"Python\",\"version\":\"v3.8\"}]","opendatahub.io/workbench-image-recommended":"true","openshift.io/imported-from":"quay.io/modh/odh-anaconda-notebook"},"from":{"kind":"DockerImage","name":"quay.io/modh/odh-anaconda-notebook@sha256:380c07bf79f5ec7d22441cde276c50b5eb2a459485cde05087837639a566ae3d"},"generation":2,"importPolicy":{"importMode":"Legacy"},"referencePolicy":{"type":"Local"}}]}}'
}
function create_imagestream() {
generate_imagestream | oc apply -f-
}
function delete_imagestream() {
generate_imagestream | oc delete -f-
}
function get_variable() {
cat "/etc/secret-volume/${1}"
}
function verify_configmap_exists() {
if ! oc get configmap "${CONFIGMAP_NAME}" &>/dev/null; then
echo "Result ConfigMap doesn't exist, creating"
oc create configmap "${CONFIGMAP_NAME}" --from-literal validation_result="false"
fi
}
function write_configmap_value() {
oc patch configmap "${CONFIGMAP_NAME}" -p '"data": { "validation_result": "'${1}'" }'
}
function write_last_valid_time() {
oc patch configmap "${CONFIGMAP_NAME}" -p '"data": { "last_valid_time": "'$(date -Is)'" }'
}
function success() {
echo "Validation succeeded, enabling image"
create_imagestream
verify_configmap_exists
write_configmap_value true
write_last_valid_time
}
function failure() {
echo "Validation failed, disabling image"
verify_configmap_exists
write_configmap_value false
}
CURL_RESULT=$(curl -w 'RESP_CODE:%{response_code}' -IHEAD "https://repo.anaconda.cloud/repo/t/$(get_variable Anaconda_ce_key)/main/noarch/repodata.json" 2>/dev/null)
CURL_CODE=$(echo "${CURL_RESULT}" | grep -o 'RESP_CODE:[1-5][0-9][0-9]'| cut -d':' -f2)
echo "Validation result: ${CURL_CODE}"
if [ "${CURL_CODE}" == 200 ]; then
success
elif [ "${CURL_CODE}" == 403 ]; then
failure
else
echo "Return code ${CURL_CODE} from validation check, possibly upstream error. Exiting."
echo "Result from curl:"
echo "${CURL_RESULT}"
fi
exit 0
volumeMounts:
- name: secret-volume
mountPath: /etc/secret-volume
readOnly: true
resources:
limits:
cpu: 100m
memory: 256Mi
requests:
cpu: 100m
memory: 256Mi
volumes:
- name: secret-volume
secret:
secretName: anaconda-ce-access
restartPolicy: Never
8 changes: 8 additions & 0 deletions config/partners/anaconda/base/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- anaconda-ce-validator-cron.yaml

commonLabels:
opendatahub.io/component: "true"
component.opendatahub.io/name: anaconda-ce
45 changes: 45 additions & 0 deletions controllers/dscinitialization/dscinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
networkingv1 "k8s.io/api/networking/v1"
rbacv1 "k8s.io/api/rbac/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -252,6 +253,9 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re
}
if instance.Spec.Monitoring.ManagementState == operatorv1.Managed {
log.Info("Monitoring enabled in initialization stage", "cluster", "Managed Service Mode")
if err := r.configureMonitoring(ctx, instance); err != nil {
return ctrl.Result{}, err
}
err := r.configureManagedMonitoring(ctx, instance, "init")
if err != nil {
return reconcile.Result{}, err
Expand Down Expand Up @@ -346,6 +350,8 @@ func (r *DSCInitializationReconciler) SetupWithManager(ctx context.Context, mgr
Owns(
&routev1.Route{},
builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.LabelChangedPredicate{}))).
Owns(&corev1.PersistentVolumeClaim{},
builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.LabelChangedPredicate{}))).
Watches(
&dscv1.DataScienceCluster{},
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request {
Expand Down Expand Up @@ -463,3 +469,42 @@ func (r *DSCInitializationReconciler) watchAuthResource(ctx context.Context, a c

return nil
}

func (r *DSCInitializationReconciler) configureMonitoring(ctx context.Context, dsci *dsciv1.DSCInitialization) error {
// Create Monitoring CR singleton
defaultMonitoring := client.Object(&serviceApi.Monitoring{
TypeMeta: metav1.TypeMeta{
Kind: serviceApi.MonitoringKind,
APIVersion: serviceApi.GroupVersion.String(),
},
ObjectMeta: metav1.ObjectMeta{
Name: serviceApi.MonitoringInstanceName,
OwnerReferences: []metav1.OwnerReference{{
APIVersion: dsciv1.GroupVersion.String(),
Kind: dsci.Kind,
Name: dsci.Name,
UID: dsci.UID,
},
},
},
Spec: serviceApi.MonitoringSpec{
MonitoringCommonSpec: serviceApi.MonitoringCommonSpec{
Namespace: dsci.Spec.Monitoring.Namespace,
},
},
},
)

if dsci.Spec.Monitoring.ManagementState == operatorv1.Managed {
err := r.Create(ctx, defaultMonitoring)
if err != nil && !k8serr.IsAlreadyExists(err) {
return err
}
} else {
err := r.Delete(ctx, defaultMonitoring)
if err != nil && !k8serr.IsNotFound(err) {
return err
}
}
return nil
}
2 changes: 2 additions & 0 deletions controllers/dscinitialization/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ func (r *DSCInitializationReconciler) configureManagedMonitoring(ctx context.Con
"(.*)-(.*)odh-model-controller(.*).rules": "",
"(.*)-(.*)ray(.*).rules": "",
"(.*)-(.*)trustyai(.*).rules": "",
"(.*)-(.*)kueue(.*).rules": "",
"(.*)-(.*)trainingoperator(.*).rules": "",
})
if err != nil {
log.Error(err, "error to remove previous enabled component rules")
Expand Down
Loading

0 comments on commit bcdd7c8

Please sign in to comment.