diff --git a/controllers/nodefeaturediscovery_controller.go b/controllers/nodefeaturediscovery_controller.go index c8d04909..ace5e719 100644 --- a/controllers/nodefeaturediscovery_controller.go +++ b/controllers/nodefeaturediscovery_controller.go @@ -38,6 +38,8 @@ import ( // nfd is an NFD object that will be used to initialize the NFD operator var nfd NFD +const finalizer = "foreground-deletion" + // NodeFeatureDiscoveryReconciler reconciles a NodeFeatureDiscovery object type NodeFeatureDiscoveryReconciler struct { @@ -149,6 +151,18 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl return ctrl.Result{Requeue: true}, err } + // If the resources are to be deleted, first check to see if the + // deletion timestamp pointer is not nil. A non-nil value indicates + // someone or something has triggered the deletion. + if instance.DeletionTimestamp != nil { + return r.finalizeNFDOperand(ctx, instance, finalizer) + } + + // If the finalizer doesn't exist, add it. + if !r.hasFinalizer(instance, finalizer) { + return r.addFinalizer(ctx, instance, finalizer) + } + klog.Info("Ready to apply components") nfd.init(r, instance) result, err := applyComponents() @@ -178,14 +192,14 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl } // Check the status of the NFD Operator cluster role - if rstatus, err := r.getClusterRoleConditions(ctx, instance); err != nil { + if rstatus, err := r.getMasterClusterRoleConditions(ctx, instance); err != nil { return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, err.Error()) } else if rstatus.isDegraded { return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd ClusterRole has been degraded") } // Check the status of the NFD Operator cluster role binding - if rstatus, err := r.getClusterRoleBindingConditions(ctx, instance); err != nil { + if rstatus, err := r.getMasterClusterRoleBindingConditions(ctx, instance); err != nil { return r.updateDegradedCondition(instance, conditionFailedGettingNFDClusterRoleBinding, err.Error()) } else if rstatus.isDegraded { return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd ClusterRoleBinding has been degraded") @@ -230,6 +244,36 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl return r.updateDegradedCondition(instance, err.Error(), "nfd-master Daemonset has been degraded") } + // Check if nfd-topology-updater is needed, if not, skip + if instance.Spec.TopologyUpdater { + // Check the status of the NFD Operator TopologyUpdater Worker DaemonSet + if rstatus, err := r.getTopologyUpdaterDaemonSetConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionNFDTopologyUpdaterDaemonSetDegraded, err.Error()) + } else if rstatus.isProgressing { + return r.updateProgressingCondition(instance, err.Error(), "nfd-topology-updater Daemonset is progressing") + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, err.Error(), "nfd-topology-updater Daemonset has been degraded") + } + // Check the status of the NFD Operator TopologyUpdater cluster role + if rstatus, err := r.getTopologyUpdaterClusterRoleConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, err.Error()) + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd-topology-updater ClusterRole has been degraded") + } + // Check the status of the NFD Operator TopologyUpdater cluster role binding + if rstatus, err := r.getTopologyUpdaterClusterRoleBindingConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionFailedGettingNFDClusterRoleBinding, err.Error()) + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd-topology-updater ClusterRoleBinding has been degraded") + } + // Check the status of the NFD Operator TopologyUpdater ServiceAccount + if rstatus, err := r.getTopologyUpdaterServiceAccountConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionFailedGettingNFDTopologyUpdaterServiceAccount, err.Error()) + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, conditionNFDTopologyUpdaterServiceAccountDegraded, "nfd-topology-updater service account has been degraded") + } + } + // Get available conditions conditions := r.getAvailableConditions() diff --git a/controllers/nodefeaturediscovery_controls.go b/controllers/nodefeaturediscovery_controls.go index 83bedb15..2eaf6cb5 100644 --- a/controllers/nodefeaturediscovery_controls.go +++ b/controllers/nodefeaturediscovery_controls.go @@ -159,7 +159,6 @@ func ClusterRole(n NFD) (ResourceStatus, error) { // ClusterRoleBinding checks if a ClusterRoleBinding exists and creates one if it doesn't func ClusterRoleBinding(n NFD) (ResourceStatus, error) { - // state represents the resource's 'control' function index state := n.idx @@ -210,7 +209,6 @@ func ClusterRoleBinding(n NFD) (ResourceStatus, error) { // Role checks if a Role exists and creates a Role if it doesn't func Role(n NFD) (ResourceStatus, error) { - // state represents the resource's 'control' function index state := n.idx @@ -262,7 +260,6 @@ func Role(n NFD) (ResourceStatus, error) { // RoleBinding checks if a RoleBinding exists and creates a RoleBinding if it doesn't func RoleBinding(n NFD) (ResourceStatus, error) { - // state represents the resource's 'control' function index state := n.idx @@ -315,7 +312,6 @@ func RoleBinding(n NFD) (ResourceStatus, error) { // ConfigMap checks if a ConfigMap exists and creates one if it doesn't func ConfigMap(n NFD) (ResourceStatus, error) { - // state represents the resource's 'control' function index state := n.idx @@ -372,7 +368,6 @@ func ConfigMap(n NFD) (ResourceStatus, error) { // DaemonSet checks the readiness of a DaemonSet and creates one if it doesn't exist func DaemonSet(n NFD) (ResourceStatus, error) { - // state represents the resource's 'control' function index state := n.idx @@ -479,7 +474,6 @@ func DaemonSet(n NFD) (ResourceStatus, error) { // Service checks if a Service exists and creates one if it doesn't exist func Service(n NFD) (ResourceStatus, error) { - // state represents the resource's 'control' function index state := n.idx diff --git a/controllers/nodefeaturediscovery_finalizers.go b/controllers/nodefeaturediscovery_finalizers.go new file mode 100644 index 00000000..d5306923 --- /dev/null +++ b/controllers/nodefeaturediscovery_finalizers.go @@ -0,0 +1,374 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package controllers + +import ( + "context" + "time" + + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + + nfdv1 "github.com/kubernetes-sigs/node-feature-discovery-operator/api/v1" +) + +var ( + RetryInterval = time.Second * 5 + Timeout = time.Second * 30 +) + +// finalizeNFDOperand finalizes an NFD Operand instance +func (r *NodeFeatureDiscoveryReconciler) finalizeNFDOperand(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, finalizer string) (ctrl.Result, error) { + klog.Info("Attempting to delete NFD operator components") + if err := r.deleteComponents(ctx, instance); err != nil { + klog.Error(err, "Failed to delete one or more components") + return ctrl.Result{}, err + } + + // Check if all components are deleted. If they're not, + // then call the reconciler but wait 10 seconds before + // checking again. + klog.Info("Deletion appears to have succeeded, but running a secondary check to ensure resources are cleaned up") + if r.doComponentsExist(ctx, instance) { + klog.Info("Some components still exist. Requeueing deletion request.") + return ctrl.Result{RequeueAfter: 10 * time.Second}, nil + } + + // If all components are deleted, then remove the finalizer + klog.Info("Secondary check passed. Removing finalizer if it exists.") + if r.hasFinalizer(instance, finalizer) { + r.removeFinalizer(instance, finalizer) + if err := r.Update(ctx, instance); err != nil { + if k8serrors.IsNotFound(err) { + return ctrl.Result{Requeue: false}, nil + } + klog.Info("Finalizer was found, but removing it was unsuccessful. Requeueing deletion request.") + return ctrl.Result{}, nil + } + + klog.Info("Finalizer was found and successfully removed.") + return ctrl.Result{Requeue: false}, nil + } + + klog.Info("Finalizer does not exist, but resource deletion succesful.") + return ctrl.Result{Requeue: false}, nil +} + +// addFinalizer adds a finalizer for the NFD operand. +func (r *NodeFeatureDiscoveryReconciler) addFinalizer(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, finalizer string) (ctrl.Result, error) { + instance.Finalizers = append(instance.Finalizers, finalizer) + instance.Status.Conditions = r.getProgressingConditions("DeploymentStarting", "Deployment is starting") + if err := r.Update(ctx, instance); err != nil { + return ctrl.Result{}, err + } + + // we exit reconcile loop because we will have additional update reconcile + return ctrl.Result{Requeue: false}, nil +} + +// hasFinalizer determines if the operand has a certain finalizer. +func (r *NodeFeatureDiscoveryReconciler) hasFinalizer(instance *nfdv1.NodeFeatureDiscovery, finalizer string) bool { + for _, f := range instance.Finalizers { + if f == finalizer { + return true + } + } + return false +} + +// removeFinalizer removes a finalizer from the operand. +func (r *NodeFeatureDiscoveryReconciler) removeFinalizer(instance *nfdv1.NodeFeatureDiscovery, finalizer string) { + var finalizers []string + + for _, f := range instance.Finalizers { + if f != finalizer { + finalizers = append(finalizers, f) + } + } + + instance.Finalizers = finalizers +} + +// deleteComponents deletes all of the NFD operand components. +func (r *NodeFeatureDiscoveryReconciler) deleteComponents(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) error { + // Update CRD status to notify instance is undergoing deletion + _, _ = r.updateProgressingCondition(instance, "finalizers", "Foreground-Deletion") + + // If NFD-Topology-Updater was requested + if instance.Spec.TopologyUpdater { + // Attempt to delete Topology DaemonSet + err := wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp) + if err != nil { + return false, interpretError(err, "Topology DaemonSet") + } + klog.Info("nfd-topology-updater DaemonSet resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + // Attempt to delete the ClusterRole + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteClusterRole(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp) + if err != nil { + return false, interpretError(err, "nfd-Topology-Updater ClusterRole") + } + klog.Info("nfd-Topology-Updater ClusterRole resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + // Attempt to delete the ClusterRoleBinding + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp) + if err != nil { + return false, interpretError(err, "ClusterRoleBinding") + } + klog.Info("ClusterRoleBinding ", nfdTopologyUpdaterApp, " resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + // Attempt to delete the Worker ServiceAccount + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteServiceAccount(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp) + if err != nil { + return false, interpretError(err, "worker ServiceAccount") + } + klog.Info("nfd-Topology-Updater ServiceAccount resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + } + + // Attempt to delete worker DaemonSet + err := wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp) + if err != nil { + return false, interpretError(err, "worker DaemonSet") + } + klog.Info("Worker DaemonSet resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete master DaemonSet + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + if err != nil { + return false, interpretError(err, "master DaemonSet") + } + klog.Info("Master DaemonSet resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the Service + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteService(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + if err != nil { + return false, interpretError(err, "nfd-master Service") + } + klog.Info("nfd-master Service resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the Role + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteRole(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp) + if err != nil { + return false, interpretError(err, "nfd-worker Role") + } + klog.Info("nfd-worker Role resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the ClusterRole + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteClusterRole(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + if err != nil { + return false, interpretError(err, "nfd-master ClusterRole") + } + klog.Info("nfd-master ClusterRole resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the RoleBinding + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp) + if err != nil { + return false, interpretError(err, "nfd-worker RoleBinding") + } + klog.Info("nfd-worker RoleBinding resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the ClusterRoleBinding + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + if err != nil { + return false, interpretError(err, "ClusterRoleBinding") + } + klog.Info("ClusterRoleBinding ", nfdMasterApp, " resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the Worker ServiceAccount + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteServiceAccount(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp) + if err != nil { + return false, interpretError(err, "worker ServiceAccount") + } + klog.Info("Worker ServiceAccount resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the Master ServiceAccount + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteServiceAccount(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + if err != nil { + return false, interpretError(err, "master ServiceAccount") + } + klog.Info("Master ServiceAccount resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + // Attempt to delete the Worker config map + err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { + err = r.deleteConfigMap(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp) + if err != nil { + return false, interpretError(err, "nfd-worker config map") + } + klog.Info("nfd-worker config map resource has been deleted.") + return true, nil + }) + if err != nil { + return err + } + + return nil +} + +// doComponentsExist checks to see if any of the operand components exist. +func (r *NodeFeatureDiscoveryReconciler) doComponentsExist(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) bool { + // Attempt to find the worker DaemonSet + if _, err := r.getDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to find the master DaemonSet + if _, err := r.getDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the Service + if _, err := r.getService(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the Role + if _, err := r.getRole(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the ClusterRole + if _, err := r.getClusterRole(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the RoleBinding + if _, err := r.getRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the ClusterRoleBinding + if _, err := r.getClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the Worker ServiceAccount + if _, err := r.getServiceAccount(ctx, instance.ObjectMeta.Namespace, nfdWorkerApp); !k8serrors.IsNotFound(err) { + return true + } + + // Attempt to get the Master ServiceAccount + if _, err := r.getServiceAccount(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { + return true + } + + if instance.Spec.TopologyUpdater { + // Attempt to find the topology-updater DaemonSet + if _, err := r.getDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp); !k8serrors.IsNotFound(err) { + return true + } + // Attempt to get the Worker ServiceAccount + if _, err := r.getServiceAccount(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp); !k8serrors.IsNotFound(err) { + return true + } + // Attempt to get the ClusterRole + if _, err := r.getClusterRole(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp); !k8serrors.IsNotFound(err) { + return true + } + // Attempt to get the ClusterRoleBinding + if _, err := r.getClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdTopologyUpdaterApp); !k8serrors.IsNotFound(err) { + return true + } + } + + return false +} + +// interpretError determines if a resource has already been +// (successfully) deleted +func interpretError(err error, resourceName string) error { + if k8serrors.IsNotFound(err) { + klog.Info("Resource ", resourceName, " has been deleted.") + return nil + } + return err +} diff --git a/controllers/nodefeaturediscovery_resources.go b/controllers/nodefeaturediscovery_resources.go index a1c5672b..b28be354 100644 --- a/controllers/nodefeaturediscovery_resources.go +++ b/controllers/nodefeaturediscovery_resources.go @@ -27,6 +27,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime/serializer/json" "k8s.io/klog/v2" "k8s.io/kubectl/pkg/scheme" @@ -53,7 +54,6 @@ type Resources struct { // filePathWalkDir finds all non-directory files under the given path recursively, // i.e. including its subdirectories func filePathWalkDir(root string) ([]string, error) { - var files []string err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if !info.IsDir() { @@ -66,7 +66,6 @@ func filePathWalkDir(root string) ([]string, error) { // getAssetsFrom recursively reads all manifest files under a given path func getAssetsFrom(path string) []assetsFromFile { - // All assets (manifests) as raw data manifests := []assetsFromFile{} assets := path @@ -91,7 +90,6 @@ func getAssetsFrom(path string) []assetsFromFile { } func addResourcesControls(path string) (Resources, controlFunc) { - // Information about the manifest res := Resources{} @@ -161,51 +159,186 @@ func panicIfError(err error) { } } -// getServiceAccount gets one of the NFD Operator's ServiceAccounts +// getServiceAccount gets one of the NFD Operand's ServiceAccounts func (r *NodeFeatureDiscoveryReconciler) getServiceAccount(ctx context.Context, namespace string, name string) (*corev1.ServiceAccount, error) { sa := &corev1.ServiceAccount{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, sa) return sa, err } -// getDaemonSet gets one of the NFD Operator's DaemonSets +// getDaemonSet gets one of the NFD Operand's DaemonSets func (r *NodeFeatureDiscoveryReconciler) getDaemonSet(ctx context.Context, namespace string, name string) (*appsv1.DaemonSet, error) { ds := &appsv1.DaemonSet{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, ds) return ds, err } -// getService gets one of the NFD Operator's Services +// getConfigMap gets one of the NFD Operand's ConfigMap +func (r *NodeFeatureDiscoveryReconciler) getConfigMap(ctx context.Context, namespace string, name string) (*corev1.ConfigMap, error) { + cm := &corev1.ConfigMap{} + err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, cm) + return cm, err +} + +// getService gets one of the NFD Operand's Services func (r *NodeFeatureDiscoveryReconciler) getService(ctx context.Context, namespace string, name string) (*corev1.Service, error) { svc := &corev1.Service{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, svc) return svc, err } -// getRole gets one of the NFD Operator's Roles +// getRole gets one of the NFD Operand's Roles func (r *NodeFeatureDiscoveryReconciler) getRole(ctx context.Context, namespace string, name string) (*rbacv1.Role, error) { role := &rbacv1.Role{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, role) return role, err } -// getRoleBinding gets one of the NFD Operator's RoleBindings +// getRoleBinding gets one of the NFD Operand's RoleBindings func (r *NodeFeatureDiscoveryReconciler) getRoleBinding(ctx context.Context, namespace string, name string) (*rbacv1.RoleBinding, error) { rb := &rbacv1.RoleBinding{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, rb) return rb, err } -// getClusterRole gets one of the NFD Operator's ClusterRoles +// getClusterRole gets one of the NFD Operand's ClusterRoles func (r *NodeFeatureDiscoveryReconciler) getClusterRole(ctx context.Context, namespace string, name string) (*rbacv1.ClusterRole, error) { cr := &rbacv1.ClusterRole{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, cr) return cr, err } -// getClusterRoleBinding gets one of the NFD Operator's ClusterRoleBindings +// getClusterRoleBinding gets one of the NFD Operand's ClusterRoleBindings func (r *NodeFeatureDiscoveryReconciler) getClusterRoleBinding(ctx context.Context, namespace string, name string) (*rbacv1.ClusterRoleBinding, error) { crb := &rbacv1.ClusterRoleBinding{} err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, crb) return crb, err } + +// deleteServiceAccount deletes one of the NFD Operand's ServiceAccounts +func (r *NodeFeatureDiscoveryReconciler) deleteServiceAccount(ctx context.Context, namespace string, name string) error { + sa, err := r.getServiceAccount(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), sa) +} + +// deleteConfigMap deletes the NFD Operand ConfigMap +func (r *NodeFeatureDiscoveryReconciler) deleteConfigMap(ctx context.Context, namespace string, name string) error { + cm, err := r.getConfigMap(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), cm) +} + +// deleteDaemonSet deletes Operand DaemonSet +func (r *NodeFeatureDiscoveryReconciler) deleteDaemonSet(ctx context.Context, namespace string, name string) error { + ds, err := r.getDaemonSet(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), ds) +} + +// deleteService deletes the NFD Operand's Service +func (r *NodeFeatureDiscoveryReconciler) deleteService(ctx context.Context, namespace string, name string) error { + svc, err := r.getService(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), svc) +} + +// deleteRole deletes one of the NFD Operand's Roles +func (r *NodeFeatureDiscoveryReconciler) deleteRole(ctx context.Context, namespace string, name string) error { + role, err := r.getRole(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), role) +} + +// deleteRoleBinding deletes one of the NFD Operand's RoleBindings +func (r *NodeFeatureDiscoveryReconciler) deleteRoleBinding(ctx context.Context, namespace string, name string) error { + rb, err := r.getRoleBinding(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), rb) +} + +// deleteClusterRole deletes one of the NFD Operand's ClusterRoles +func (r *NodeFeatureDiscoveryReconciler) deleteClusterRole(ctx context.Context, namespace string, name string) error { + cr, err := r.getClusterRole(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), cr) +} + +// deleteClusterRoleBinding deletes one of the NFD Operand's ClusterRoleBindings +func (r *NodeFeatureDiscoveryReconciler) deleteClusterRoleBinding(ctx context.Context, namespace string, name string) error { + crb, err := r.getClusterRoleBinding(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), crb) +} diff --git a/controllers/nodefeaturediscovery_state.go b/controllers/nodefeaturediscovery_state.go index 8bc32705..995532ed 100644 --- a/controllers/nodefeaturediscovery_state.go +++ b/controllers/nodefeaturediscovery_state.go @@ -24,7 +24,6 @@ import ( // NFD holds the needed information to watch from the Controller. type NFD struct { - // resources contains information about NFD's resources. resources []Resources @@ -71,7 +70,6 @@ func (n *NFD) init( // one set of resource control functions n order to determine if the related // resources are ready. func (n *NFD) step() error { - for _, fs := range n.controls[n.idx] { stat, err := fs(*n) if err != nil { diff --git a/controllers/nodefeaturediscovery_status.go b/controllers/nodefeaturediscovery_status.go index 8e761bba..b4655200 100644 --- a/controllers/nodefeaturediscovery_status.go +++ b/controllers/nodefeaturediscovery_status.go @@ -37,26 +37,29 @@ const ( const ( // Resource is missing - conditionFailedGettingNFDWorkerConfig = "FailedGettingNFDWorkerConfig" - conditionFailedGettingNFDWorkerServiceAccount = "FailedGettingNFDWorkerServiceAccount" - conditionFailedGettingNFDMasterServiceAccount = "FailedGettingNFDMasterServiceAccount" - conditionFailedGettingNFDService = "FailedGettingNFDService" - conditionFailedGettingNFDWorkerDaemonSet = "FailedGettingNFDWorkerDaemonSet" - conditionFailedGettingNFDMasterDaemonSet = "FailedGettingNFDMasterDaemonSet" - conditionFailedGettingNFDRoleBinding = "FailedGettingNFDRoleBinding" - conditionFailedGettingNFDClusterRoleBinding = "FailedGettingNFDClusterRole" + conditionFailedGettingNFDWorkerConfig = "FailedGettingNFDWorkerConfig" + conditionFailedGettingNFDWorkerServiceAccount = "FailedGettingNFDWorkerServiceAccount" + conditionFailedGettingNFDTopologyUpdaterServiceAccount = "FailedGettingNFDTopoloGyUpdaterServiceAccount" + conditionFailedGettingNFDMasterServiceAccount = "FailedGettingNFDMasterServiceAccount" + conditionFailedGettingNFDService = "FailedGettingNFDService" + conditionFailedGettingNFDWorkerDaemonSet = "FailedGettingNFDWorkerDaemonSet" + conditionFailedGettingNFDMasterDaemonSet = "FailedGettingNFDMasterDaemonSet" + conditionFailedGettingNFDRoleBinding = "FailedGettingNFDRoleBinding" + conditionFailedGettingNFDClusterRoleBinding = "FailedGettingNFDClusterRole" // Resource degraded - conditionNFDWorkerConfigDegraded = "NFDWorkerConfigResourceDegraded" - conditionNFDWorkerServiceAccountDegraded = "NFDWorkerServiceAccountDegraded" - conditionNFDMasterServiceAccountDegraded = "NFDMasterServiceAccountDegraded" - conditionNFDServiceDegraded = "NFDServiceDegraded" - conditionNFDWorkerDaemonSetDegraded = "NFDWorkerDaemonSetDegraded" - conditionNFDMasterDaemonSetDegraded = "NFDMasterDaemonSetDegraded" - conditionNFDRoleDegraded = "NFDRoleDegraded" - conditionNFDRoleBindingDegraded = "NFDRoleBindingDegraded" - conditionNFDClusterRoleDegraded = "NFDClusterRoleDegraded" - conditionNFDClusterRoleBindingDegraded = "NFDClusterRoleBindingDegraded" + conditionNFDWorkerConfigDegraded = "NFDWorkerConfigResourceDegraded" + conditionNFDWorkerServiceAccountDegraded = "NFDWorkerServiceAccountDegraded" + conditionNFDTopologyUpdaterServiceAccountDegraded = "NFDTopologyUpdaterServiceAccountDegraded" + conditionNFDMasterServiceAccountDegraded = "NFDMasterServiceAccountDegraded" + conditionNFDServiceDegraded = "NFDServiceDegraded" + conditionNFDWorkerDaemonSetDegraded = "NFDWorkerDaemonSetDegraded" + conditionNFDTopologyUpdaterDaemonSetDegraded = "NFDTopologyUpdaterDaemonSetDegraded" + conditionNFDMasterDaemonSetDegraded = "NFDMasterDaemonSetDegraded" + conditionNFDRoleDegraded = "NFDRoleDegraded" + conditionNFDRoleBindingDegraded = "NFDRoleBindingDegraded" + conditionNFDClusterRoleDegraded = "NFDClusterRoleDegraded" + conditionNFDClusterRoleBindingDegraded = "NFDClusterRoleBindingDegraded" // Unknown errors. (Catch all) errorNFDWorkerDaemonSetUnknown = "NFDWorkerDaemonSetCorrupted" @@ -271,6 +274,12 @@ func (r *NodeFeatureDiscoveryReconciler) getWorkerDaemonSetConditions(ctx contex return r.getDaemonSetConditions(ctx, instance, nfdWorkerApp) } +// getTopologyUpdaterDaemonSetConditions is a wrapper around "getDaemonSetConditions" for +// worker DaemonSets +func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterDaemonSetConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getDaemonSetConditions(ctx, instance, nfdTopologyUpdaterApp) +} + // getMasterDaemonSetConditions is a wrapper around "getDaemonSetConditions" for // master DaemonSets func (r *NodeFeatureDiscoveryReconciler) getMasterDaemonSetConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { @@ -433,14 +442,26 @@ func (r *NodeFeatureDiscoveryReconciler) getRoleBindingConditions(ctx context.Co return status, nil } +// getMasterClusterRoleConditions is a wrapper around "getClusterRoleConditions" for +// worker service account. +func (r *NodeFeatureDiscoveryReconciler) getMasterClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getClusterRoleConditions(ctx, instance, nfdMasterApp) +} + +// getTopologyUpdaterClusterRoleConditions is a wrapper around "getClusterRoleConditions" for +// worker service account. +func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getClusterRoleConditions(ctx, instance, nfdTopologyUpdaterApp) +} + // geClusterRoleConditions gets the current status of a ClusterRole. If an error // occurs, this function returns the corresponding error message -func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { +func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, nfdAppName string) (Status, error) { // Initialize status to 'Degraded' status := initializeDegradedStatus() // Get the existing ClusterRole from the reconciler - _, err := r.getClusterRole(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + _, err := r.getClusterRole(ctx, instance.ObjectMeta.Namespace, nfdAppName) // If 'clusterRole' is nil, then it hasn't been (re)created yet if err != nil { @@ -454,14 +475,26 @@ func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Co return status, nil } +// getMasterClusterRoleBindingConditions is a wrapper around "getServiceAccountConditions" for +// worker service account. +func (r *NodeFeatureDiscoveryReconciler) getMasterClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getServiceAccountConditions(ctx, instance, nfdMasterApp) +} + +// getTopologyUpdaterClusterRoleBindingConditions is a wrapper around "getServiceAccountConditions" for +// worker service account. +func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getClusterRoleBindingConditions(ctx, instance, nfdTopologyUpdaterApp) +} + // getClusterRoleBindingConditions gets the current status of a ClusterRoleBinding. // If an error occurs, this function returns the corresponding error message -func (r *NodeFeatureDiscoveryReconciler) getClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { +func (r *NodeFeatureDiscoveryReconciler) getClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, nfdAppName string) (Status, error) { // Initialize status to 'Degraded' status := initializeDegradedStatus() // Get the existing ClusterRoleBinding from the reconciler - _, err := r.getClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + _, err := r.getClusterRoleBinding(ctx, instance.ObjectMeta.Namespace, nfdAppName) // If the error is not nil, then the ClusterRoleBinding hasn't been (re)created // yet @@ -482,6 +515,12 @@ func (r *NodeFeatureDiscoveryReconciler) getWorkerServiceAccountConditions(ctx c return r.getServiceAccountConditions(ctx, instance, nfdWorkerApp) } +// getTopologyUpdaterServiceAccountConditions is a wrapper around "getServiceAccountConditions" for +// worker service account. +func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterServiceAccountConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getServiceAccountConditions(ctx, instance, nfdTopologyUpdaterApp) +} + // getMasterServiceAccountConditions is a wrapper around "getServiceAccountConditions" for // master service account. func (r *NodeFeatureDiscoveryReconciler) getMasterServiceAccountConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { @@ -499,10 +538,14 @@ func (r *NodeFeatureDiscoveryReconciler) getServiceAccountConditions(ctx context // If the error is not nil, then the ServiceAccount hasn't been (re)created yet if err != nil { - if nfdAppName == nfdWorkerApp { + switch nfdAppName { + case nfdWorkerApp: return status, errors.New(conditionNFDWorkerServiceAccountDegraded) + case nfdMasterApp: + return status, errors.New(conditionNFDMasterServiceAccountDegraded) + case nfdTopologyUpdaterApp: + return status, errors.New(conditionNFDTopologyUpdaterServiceAccountDegraded) } - return status, errors.New(conditionNFDMasterServiceAccountDegraded) } // Set the resource to available