Skip to content

Commit

Permalink
Add status checks for TopologyUpdater
Browse files Browse the repository at this point in the history
Signed-off-by: Carlos Eduardo Arango Gutierrez <carangog@redhat.com>
  • Loading branch information
ArangoGutierrez committed Feb 11, 2022
1 parent 03d4f49 commit fc74f2a
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 30 deletions.
38 changes: 35 additions & 3 deletions controllers/nodefeaturediscovery_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import (
// nfd is an NFD object that will be used to initialize the NFD operator
var nfd NFD

const finalizer = "foreground-deletion"

// NodeFeatureDiscoveryReconciler reconciles a NodeFeatureDiscovery object
type NodeFeatureDiscoveryReconciler struct {

Expand Down Expand Up @@ -153,7 +155,7 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl
// deletion timestamp pointer is not nil. A non-nil value indicates
// someone or something has triggered the deletion.
if instance.DeletionTimestamp != nil {
return r.finalizeNFDOperator(ctx, instance, finalizer)
return r.finalizeNFDOperand(ctx, instance, finalizer)
}

// If the finalizer doesn't exist, add it.
Expand Down Expand Up @@ -190,14 +192,14 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl
}

// Check the status of the NFD Operator cluster role
if rstatus, err := r.getClusterRoleConditions(ctx, instance); err != nil {
if rstatus, err := r.getMasterClusterRoleConditions(ctx, instance); err != nil {
return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, err.Error())
} else if rstatus.isDegraded {
return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd ClusterRole has been degraded")
}

// Check the status of the NFD Operator cluster role binding
if rstatus, err := r.getClusterRoleBindingConditions(ctx, instance); err != nil {
if rstatus, err := r.getMasterClusterRoleBindingConditions(ctx, instance); err != nil {
return r.updateDegradedCondition(instance, conditionFailedGettingNFDClusterRoleBinding, err.Error())
} else if rstatus.isDegraded {
return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd ClusterRoleBinding has been degraded")
Expand Down Expand Up @@ -242,6 +244,36 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl
return r.updateDegradedCondition(instance, err.Error(), "nfd-master Daemonset has been degraded")
}

// Check if nfd-topology-updater is needed, if not, skip
if instance.Spec.TopologyUpdater {
// Check the status of the NFD Operator TopologyUpdater Worker DaemonSet
if rstatus, err := r.getTopologyUpdaterWorkerDaemonSetConditions(ctx, instance); err != nil {
return r.updateDegradedCondition(instance, conditionNFDTopologyUpdaterDaemonSetDegraded, err.Error())
} else if rstatus.isProgressing {
return r.updateProgressingCondition(instance, err.Error(), "nfd-topology-updater Daemonset is progressing")
} else if rstatus.isDegraded {
return r.updateDegradedCondition(instance, err.Error(), "nfd-topology-updater Daemonset has been degraded")
}
// Check the status of the NFD Operator TopologyUpdater cluster role
if rstatus, err := r.getTopologyUpdaterClusterRoleConditions(ctx, instance); err != nil {
return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, err.Error())
} else if rstatus.isDegraded {
return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd-topology-updater ClusterRole has been degraded")
}
// Check the status of the NFD Operator TopologyUpdater cluster role binding
if rstatus, err := r.getTopologyUpdaterClusterRoleBindingConditions(ctx, instance); err != nil {
return r.updateDegradedCondition(instance, conditionFailedGettingNFDClusterRoleBinding, err.Error())
} else if rstatus.isDegraded {
return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd-topology-updater ClusterRoleBinding has been degraded")
}
// Check the status of the NFD Operator TopologyUpdater ServiceAccount
if rstatus, err := r.getTopologyUpdaterServiceAccountConditions(ctx, instance); err != nil {
return r.updateDegradedCondition(instance, conditionFailedGettingNFDTopologyUpdaterServiceAccount, err.Error())
} else if rstatus.isDegraded {
return r.updateDegradedCondition(instance, conditionNFDTopologyUpdaterServiceAccountDegraded, "nfd-topology-updater service account has been degraded")
}
}

// Get available conditions
conditions := r.getAvailableConditions()

Expand Down
6 changes: 3 additions & 3 deletions controllers/nodefeaturediscovery_finalizers.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2020-2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,8 +32,8 @@ var (
Timeout = time.Second * 30
)

// finalizeNFDOperator finalizes an NFD Operator instance
func (r *NodeFeatureDiscoveryReconciler) finalizeNFDOperator(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, finalizer string) (ctrl.Result, error) {
// finalizeNFDOperand finalizes an NFD Operand instance
func (r *NodeFeatureDiscoveryReconciler) finalizeNFDOperand(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, finalizer string) (ctrl.Result, error) {
// Attempt to delete all components. If it fails, return
// a warning letting users know the deletion failed, and
// then call the reconciler once more to see if the error
Expand Down
8 changes: 8 additions & 0 deletions controllers/nodefeaturediscovery_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime/serializer/json"
"k8s.io/klog/v2"
"k8s.io/kubectl/pkg/scheme"
Expand Down Expand Up @@ -175,6 +176,13 @@ func (r *NodeFeatureDiscoveryReconciler) getDaemonSet(ctx context.Context, names
return ds, err
}

// getConfigMap gets one of the NFD Operator's ConfigMap
func (r *NodeFeatureDiscoveryReconciler) getConfigMap(ctx context.Context, namespace string, name string) (*corev1.ConfigMap, error) {
cm := &corev1.ConfigMap{}
err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, cm)
return cm, err
}

// getService gets one of the NFD Operator's Services
func (r *NodeFeatureDiscoveryReconciler) getService(ctx context.Context, namespace string, name string) (*corev1.Service, error) {
svc := &corev1.Service{}
Expand Down
91 changes: 67 additions & 24 deletions controllers/nodefeaturediscovery_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,29 @@ const (

const (
// Resource is missing
conditionFailedGettingNFDWorkerConfig = "FailedGettingNFDWorkerConfig"
conditionFailedGettingNFDWorkerServiceAccount = "FailedGettingNFDWorkerServiceAccount"
conditionFailedGettingNFDMasterServiceAccount = "FailedGettingNFDMasterServiceAccount"
conditionFailedGettingNFDService = "FailedGettingNFDService"
conditionFailedGettingNFDWorkerDaemonSet = "FailedGettingNFDWorkerDaemonSet"
conditionFailedGettingNFDMasterDaemonSet = "FailedGettingNFDMasterDaemonSet"
conditionFailedGettingNFDRoleBinding = "FailedGettingNFDRoleBinding"
conditionFailedGettingNFDClusterRoleBinding = "FailedGettingNFDClusterRole"
conditionFailedGettingNFDWorkerConfig = "FailedGettingNFDWorkerConfig"
conditionFailedGettingNFDWorkerServiceAccount = "FailedGettingNFDWorkerServiceAccount"
conditionFailedGettingNFDTopologyUpdaterServiceAccount = "FailedGettingNFDTopoloGyUpdaterServiceAccount"
conditionFailedGettingNFDMasterServiceAccount = "FailedGettingNFDMasterServiceAccount"
conditionFailedGettingNFDService = "FailedGettingNFDService"
conditionFailedGettingNFDWorkerDaemonSet = "FailedGettingNFDWorkerDaemonSet"
conditionFailedGettingNFDMasterDaemonSet = "FailedGettingNFDMasterDaemonSet"
conditionFailedGettingNFDRoleBinding = "FailedGettingNFDRoleBinding"
conditionFailedGettingNFDClusterRoleBinding = "FailedGettingNFDClusterRole"

// Resource degraded
conditionNFDWorkerConfigDegraded = "NFDWorkerConfigResourceDegraded"
conditionNFDWorkerServiceAccountDegraded = "NFDWorkerServiceAccountDegraded"
conditionNFDMasterServiceAccountDegraded = "NFDMasterServiceAccountDegraded"
conditionNFDServiceDegraded = "NFDServiceDegraded"
conditionNFDWorkerDaemonSetDegraded = "NFDWorkerDaemonSetDegraded"
conditionNFDMasterDaemonSetDegraded = "NFDMasterDaemonSetDegraded"
conditionNFDRoleDegraded = "NFDRoleDegraded"
conditionNFDRoleBindingDegraded = "NFDRoleBindingDegraded"
conditionNFDClusterRoleDegraded = "NFDClusterRoleDegraded"
conditionNFDClusterRoleBindingDegraded = "NFDClusterRoleBindingDegraded"
conditionNFDWorkerConfigDegraded = "NFDWorkerConfigResourceDegraded"
conditionNFDWorkerServiceAccountDegraded = "NFDWorkerServiceAccountDegraded"
conditionNFDTopologyUpdaterServiceAccountDegraded = "NFDTopologyUpdaterServiceAccountDegraded"
conditionNFDMasterServiceAccountDegraded = "NFDMasterServiceAccountDegraded"
conditionNFDServiceDegraded = "NFDServiceDegraded"
conditionNFDWorkerDaemonSetDegraded = "NFDWorkerDaemonSetDegraded"
conditionNFDTopologyUpdaterDaemonSetDegraded = "NFDTopologyUpdaterDaemonSetDegraded"
conditionNFDMasterDaemonSetDegraded = "NFDMasterDaemonSetDegraded"
conditionNFDRoleDegraded = "NFDRoleDegraded"
conditionNFDRoleBindingDegraded = "NFDRoleBindingDegraded"
conditionNFDClusterRoleDegraded = "NFDClusterRoleDegraded"
conditionNFDClusterRoleBindingDegraded = "NFDClusterRoleBindingDegraded"

// Unknown errors. (Catch all)
errorNFDWorkerDaemonSetUnknown = "NFDWorkerDaemonSetCorrupted"
Expand Down Expand Up @@ -271,6 +274,12 @@ func (r *NodeFeatureDiscoveryReconciler) getWorkerDaemonSetConditions(ctx contex
return r.getDaemonSetConditions(ctx, instance, nfdWorkerApp)
}

// getWorkerDaemonSetConditions is a wrapper around "getDaemonSetConditions" for
// worker DaemonSets
func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterWorkerDaemonSetConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
return r.getDaemonSetConditions(ctx, instance, nfdTopologyUpdaterApp)
}

// getMasterDaemonSetConditions is a wrapper around "getDaemonSetConditions" for
// master DaemonSets
func (r *NodeFeatureDiscoveryReconciler) getMasterDaemonSetConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
Expand Down Expand Up @@ -433,14 +442,26 @@ func (r *NodeFeatureDiscoveryReconciler) getRoleBindingConditions(ctx context.Co
return status, nil
}

// getMasterClusterRoleConditions is a wrapper around "getClusterRoleConditions" for
// worker service account.
func (r *NodeFeatureDiscoveryReconciler) getMasterClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
return r.getClusterRoleConditions(ctx, instance, nfdMasterApp)
}

// getTopologyUpdaterClusterRoleConditions is a wrapper around "getClusterRoleConditions" for
// worker service account.
func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
return r.getClusterRoleConditions(ctx, instance, nfdTopologyUpdaterApp)
}

// geClusterRoleConditions gets the current status of a ClusterRole. If an error
// occurs, this function returns the corresponding error message
func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, nfdAppName string) (Status, error) {
// Initialize status to 'Degraded'
status := initializeDegradedStatus()

// Get the existing ClusterRole from the reconciler
_, err := r.getClusterRole(ctx, instance.ObjectMeta.Namespace, nfdMasterApp)
_, err := r.getClusterRole(ctx, instance.ObjectMeta.Namespace, nfdAppName)

// If 'clusterRole' is nil, then it hasn't been (re)created yet
if err != nil {
Expand All @@ -454,14 +475,26 @@ func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Co
return status, nil
}

// getMasterClusterRoleBindingConditions is a wrapper around "getServiceAccountConditions" for
// worker service account.
func (r *NodeFeatureDiscoveryReconciler) getMasterClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
return r.getServiceAccountConditions(ctx, instance, nfdMasterApp)
}

// getTopologyUpdaterClusterRoleBindingConditions is a wrapper around "getServiceAccountConditions" for
// worker service account.
func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
return r.getClusterRoleBindingConditions(ctx, instance, nfdTopologyUpdaterApp)
}

// getClusterRoleBindingConditions gets the current status of a ClusterRoleBinding.
// If an error occurs, this function returns the corresponding error message
func (r *NodeFeatureDiscoveryReconciler) getClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
func (r *NodeFeatureDiscoveryReconciler) getClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, nfdAppName string) (Status, error) {
// Initialize status to 'Degraded'
status := initializeDegradedStatus()

// Get the existing ClusterRoleBinding from the reconciler
_, err := r.getClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdMasterApp)
_, err := r.getClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdAppName)

// If the error is not nil, then the ClusterRoleBinding hasn't been (re)created
// yet
Expand All @@ -482,6 +515,12 @@ func (r *NodeFeatureDiscoveryReconciler) getWorkerServiceAccountConditions(ctx c
return r.getServiceAccountConditions(ctx, instance, nfdWorkerApp)
}

// getTopologyUpdaterServiceAccountConditions is a wrapper around "getServiceAccountConditions" for
// worker service account.
func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterServiceAccountConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
return r.getServiceAccountConditions(ctx, instance, nfdTopologyUpdaterApp)
}

// getMasterServiceAccountConditions is a wrapper around "getServiceAccountConditions" for
// master service account.
func (r *NodeFeatureDiscoveryReconciler) getMasterServiceAccountConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {
Expand All @@ -499,10 +538,14 @@ func (r *NodeFeatureDiscoveryReconciler) getServiceAccountConditions(ctx context

// If the error is not nil, then the ServiceAccount hasn't been (re)created yet
if err != nil {
if nfdAppName == nfdWorkerApp {
switch nfdAppName {
case nfdWorkerApp:
return status, errors.New(conditionNFDWorkerServiceAccountDegraded)
case nfdMasterApp:
return status, errors.New(conditionNFDMasterServiceAccountDegraded)
case nfdTopologyUpdaterApp:
return status, errors.New(conditionNFDTopologyUpdaterServiceAccountDegraded)
}
return status, errors.New(conditionNFDMasterServiceAccountDegraded)
}

// Set the resource to available
Expand Down

0 comments on commit fc74f2a

Please sign in to comment.