From e9210f7ce12580a1be63f59a800f4ad8c6ed1483 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Wed, 13 Apr 2022 09:32:03 -0400 Subject: [PATCH 1/3] Run nfd-master as Deployment Signed-off-by: Carlos Eduardo Arango Gutierrez --- .../assets/master/0400_master_daemonset.yaml | 37 ----- .../assets/master/0400_master_deployment.yaml | 86 +++++++++++ build/assets/master/0500_service.yaml | 5 +- .../nodefeaturediscovery_controller.go | 11 +- controllers/nodefeaturediscovery_controls.go | 134 +++++++++++++----- .../nodefeaturediscovery_finalizers.go | 12 +- controllers/nodefeaturediscovery_resources.go | 28 ++++ controllers/nodefeaturediscovery_status.go | 44 ++++-- 8 files changed, 253 insertions(+), 104 deletions(-) delete mode 100644 build/assets/master/0400_master_daemonset.yaml create mode 100644 build/assets/master/0400_master_deployment.yaml diff --git a/build/assets/master/0400_master_daemonset.yaml b/build/assets/master/0400_master_daemonset.yaml deleted file mode 100644 index 8e885851..00000000 --- a/build/assets/master/0400_master_daemonset.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - labels: - app: nfd-master - name: nfd-master -spec: - selector: - matchLabels: - app: nfd-master - template: - metadata: - labels: - app: nfd-master - spec: - serviceAccount: nfd-master - nodeSelector: - node-role.kubernetes.io/master: "" - tolerations: - - key: "node-role.kubernetes.io/master" - operator: "Equal" - value: "" - effect: "NoSchedule" - containers: - - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - image: $(NODE_FEATURE_DISCOVERY_IMAGE) - name: nfd-master - command: ["nfd-master"] - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] diff --git a/build/assets/master/0400_master_deployment.yaml b/build/assets/master/0400_master_deployment.yaml new file mode 100644 index 00000000..836e220b --- /dev/null +++ b/build/assets/master/0400_master_deployment.yaml @@ -0,0 +1,86 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: nfd-master + name: nfd-master +spec: + replicas: 1 + selector: + matchLabels: + app: nfd-master + template: + metadata: + labels: + app: nfd-master + spec: + serviceAccount: nfd-master + serviceAccountName: nfd-master + dnsPolicy: ClusterFirstWithHostNet + restartPolicy: Always + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Equal + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Equal + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: node-role.kubernetes.io/master + operator: In + values: + - "" + weight: 1 + - preference: + matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: In + values: + - "" + weight: 1 + containers: + - name: nfd-master + env: + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + image: $(NODE_FEATURE_DISCOVERY_IMAGE) + imagePullPolicy: Always + command: + - "nfd-master" + args: [] + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + volumeMounts: [] + livenessProbe: + exec: + command: + - /usr/bin/grpc_health_probe + - -addr=:12000 + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + exec: + command: + - /usr/bin/grpc_health_probe + - -addr=:12000 + failureThreshold: 10 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + volumes: [] \ No newline at end of file diff --git a/build/assets/master/0500_service.yaml b/build/assets/master/0500_service.yaml index cffbb4bc..8598f2a9 100644 --- a/build/assets/master/0500_service.yaml +++ b/build/assets/master/0500_service.yaml @@ -3,12 +3,9 @@ kind: Service metadata: name: nfd-master spec: - type: ClusterIP selector: app: nfd-master ports: - protocol: TCP port: 12000 - targetPort: 12000 - name: nfd - + type: ClusterIP diff --git a/controllers/nodefeaturediscovery_controller.go b/controllers/nodefeaturediscovery_controller.go index ace5e719..d2111072 100644 --- a/controllers/nodefeaturediscovery_controller.go +++ b/controllers/nodefeaturediscovery_controller.go @@ -103,6 +103,7 @@ func validateUpdateEvent(e *event.UpdateEvent) bool { // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=pods/log,verbs=get // +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete @@ -235,13 +236,13 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl return r.updateDegradedCondition(instance, err.Error(), "nfd-worker Daemonset has been degraded") } - // Check the status of the NFD Operator Master DaemonSet - if rstatus, err := r.getMasterDaemonSetConditions(ctx, instance); err != nil { - return r.updateDegradedCondition(instance, conditionFailedGettingNFDMasterDaemonSet, err.Error()) + // Check the status of the NFD Operator Master Deployment + if rstatus, err := r.getMasterDeploymentConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionFailedGettingNFDMasterDeployment, err.Error()) } else if rstatus.isProgressing { - return r.updateProgressingCondition(instance, err.Error(), "nfd-master Daemonset is progressing") + return r.updateProgressingCondition(instance, err.Error(), "nfd-master Deployment is progressing") } else if rstatus.isDegraded { - return r.updateDegradedCondition(instance, err.Error(), "nfd-master Daemonset has been degraded") + return r.updateDegradedCondition(instance, err.Error(), "nfd-master Deployment has been degraded") } // Check if nfd-topology-updater is needed, if not, skip diff --git a/controllers/nodefeaturediscovery_controls.go b/controllers/nodefeaturediscovery_controls.go index 2eaf6cb5..c480acd7 100644 --- a/controllers/nodefeaturediscovery_controls.go +++ b/controllers/nodefeaturediscovery_controls.go @@ -129,7 +129,7 @@ func ClusterRole(n NFD) (ResourceStatus, error) { // found states if the ClusterRole was found found := &rbacv1.ClusterRole{} - klog.Info("Looking for ClusterRole %q in Namespace %", obj.Name, obj.Namespace) + klog.Info("Looking for ClusterRole %q in Namespace %q", obj.Name, obj.Namespace) // Look for the ClusterRole to see if it exists, and if so, check // if it's Ready/NotReady. If the ClusterRole does not exist, then @@ -388,55 +388,111 @@ func DaemonSet(n NFD) (ResourceStatus, error) { obj.Spec.Template.Spec.Containers[0].ImagePullPolicy = n.ins.Spec.Operand.ImagePolicy(n.ins.Spec.Operand.ImagePullPolicy) } - // Update nfd-master service port - if obj.ObjectMeta.Name == "nfd-master" { - var args []string - port := defaultServicePort + // Set namespace based on the NFD namespace. (And again, + // it is assumed that the Namespace has already been + // determined before this function was called.) + obj.SetNamespace(n.ins.GetNamespace()) - // If the operand service port has already been defined, - // then set "port" to the defined port. Otherwise, it is - // ok to just use the defaultServicePort value - if n.ins.Spec.Operand.ServicePort != 0 { - port = n.ins.Spec.Operand.ServicePort - } + // found states if the DaemonSet was found + found := &appsv1.DaemonSet{} - // Now that the port has been determined, append it to - // the list of args - args = append(args, fmt.Sprintf("--port=%d", port)) + klog.Info("Looking for Daemonset %q in Namespace %q", obj.Name, obj.Namespace) - // Check if running as instance. If not, then it is - // expected that n.ins.Spec.Instance will return "" - // https://kubernetes-sigs.github.io/node-feature-discovery/v0.8/advanced/master-commandline-reference.html#-instance - if n.ins.Spec.Instance != "" { - args = append(args, fmt.Sprintf("--instance=%s", n.ins.Spec.Instance)) - } + // SetControllerReference sets the owner as a Controller OwnerReference + // and is used for garbage collection of the controlled object. It is + // also used to reconcile the owner object on changes to the controlled + // object. If we cannot set the owner, then return NotReady + if err := controllerutil.SetControllerReference(n.ins, &obj, n.rec.Scheme); err != nil { + return NotReady, err + } - if len(n.ins.Spec.ExtraLabelNs) != 0 { - args = append(args, fmt.Sprintf("--extra-label-ns=%s", strings.Join(n.ins.Spec.ExtraLabelNs, ","))) + // Look for the DaemonSet to see if it exists, and if so, check if it's + // Ready/NotReady. If the DaemonSet does not exist, then attempt to + // create it + err := n.rec.Client.Get(context.TODO(), types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found) + if err != nil && errors.IsNotFound(err) { + klog.Info("Daemonset %q in Namespace %q not found, creating", obj.Name, obj.Namespace) + err = n.rec.Client.Create(context.TODO(), &obj) + if err != nil { + klog.Info("Couldn't create Daemonset %q in Namespace %q", obj.Name, obj.Namespace) + return NotReady, err } + return Ready, nil + } else if err != nil { + return NotReady, err + } - if len(n.ins.Spec.ResourceLabels) != 0 { - args = append(args, fmt.Sprintf("--resource-labels=%s", strings.Join(n.ins.Spec.ResourceLabels, ","))) - } + // If we found the DaemonSet, let's attempt to update it + klog.Info("Daemonset %q in Namespace %q found, updating", obj.Name, obj.Namespace) + err = n.rec.Client.Update(context.TODO(), &obj) + if err != nil { + return NotReady, err + } - if strings.TrimSpace(n.ins.Spec.LabelWhiteList) != "" { - args = append(args, fmt.Sprintf("--label-whitelist=%s", n.ins.Spec.LabelWhiteList)) - } + return Ready, nil +} + +// Deployment checks the readiness of a Deployment and creates one if it doesn't exist +func Deployment(n NFD) (ResourceStatus, error) { + // state represents the resource's 'control' function index + state := n.idx + + // It is assumed that the index has already been verified to be a + // Deployment object, so let's get the resource's Deployment object + obj := n.resources[state].Deployment + + // Update the NFD operand image + obj.Spec.Template.Spec.Containers[0].Image = n.ins.Spec.Operand.ImagePath() + + // Update the image pull policy + if n.ins.Spec.Operand.ImagePullPolicy != "" { + obj.Spec.Template.Spec.Containers[0].ImagePullPolicy = n.ins.Spec.Operand.ImagePolicy(n.ins.Spec.Operand.ImagePullPolicy) + } + + var args []string + port := defaultServicePort + + // If the operand service port has already been defined, + // then set "port" to the defined port. Otherwise, it is + // ok to just use the defaultServicePort value + if n.ins.Spec.Operand.ServicePort != 0 { + port = n.ins.Spec.Operand.ServicePort + } + + // Now that the port has been determined, append it to + // the list of args + args = append(args, fmt.Sprintf("--port=%d", port)) + + // Check if running as instance. If not, then it is + // expected that n.ins.Spec.Instance will return "" + // https://kubernetes-sigs.github.io/node-feature-discovery/v0.8/advanced/master-commandline-reference.html#-instance + if n.ins.Spec.Instance != "" { + args = append(args, fmt.Sprintf("--instance=%q", n.ins.Spec.Instance)) + } + + if len(n.ins.Spec.ExtraLabelNs) != 0 { + args = append(args, fmt.Sprintf("--extra-label-ns=%q", strings.Join(n.ins.Spec.ExtraLabelNs, ","))) + } + + if len(n.ins.Spec.ResourceLabels) != 0 { + args = append(args, fmt.Sprintf("--resource-labels=%q", strings.Join(n.ins.Spec.ResourceLabels, ","))) + } - // Set the args based on the port that was determined - // and the instance that was determined - obj.Spec.Template.Spec.Containers[0].Args = args + if strings.TrimSpace(n.ins.Spec.LabelWhiteList) != "" { + args = append(args, fmt.Sprintf("--label-whitelist=%q", n.ins.Spec.LabelWhiteList)) } + obj.Spec.Template.Spec.Containers[0].Args = args + // Set namespace based on the NFD namespace. (And again, // it is assumed that the Namespace has already been // determined before this function was called.) obj.SetNamespace(n.ins.GetNamespace()) - // found states if the DaemonSet was found - found := &appsv1.DaemonSet{} + // found states if the Deployment was found + found := &appsv1.Deployment{} - klog.Info("Looking for Daemonset %q in Namespace %q", obj.Name, obj.Namespace) + klog.Info("Looking for Deployment %q in Namespace %q", obj.Name, obj.Namespace) // SetControllerReference sets the owner as a Controller OwnerReference // and is used for garbage collection of the controlled object. It is @@ -446,15 +502,15 @@ func DaemonSet(n NFD) (ResourceStatus, error) { return NotReady, err } - // Look for the DaemonSet to see if it exists, and if so, check if it's + // Look for the Deployment to see if it exists, and if so, check if it's // Ready/NotReady. If the DaemonSet does not exist, then attempt to // create it err := n.rec.Client.Get(context.TODO(), types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found) if err != nil && errors.IsNotFound(err) { - klog.Info("Daemonset %q in Namespace %q not found, creating", obj.Name, obj.Namespace) + klog.Info("Deployment %q in Namespace %q not found, creating", obj.Name, obj.Namespace) err = n.rec.Client.Create(context.TODO(), &obj) if err != nil { - klog.Info("Couldn't create Daemonset %q in Namespace %q", obj.Name, obj.Namespace) + klog.Info("Couldn't create Deployment %q in Namespace %q", obj.Name, obj.Namespace) return NotReady, err } return Ready, nil @@ -462,8 +518,8 @@ func DaemonSet(n NFD) (ResourceStatus, error) { return NotReady, err } - // If we found the DaemonSet, let's attempt to update it - klog.Info("Daemonset %q in Namespace %q found, updating", obj.Name, obj.Namespace) + // If we found the Deployment, let's attempt to update it + klog.Info("Deployment %q in Namespace %q found, updating", obj.Name, obj.Namespace) err = n.rec.Client.Update(context.TODO(), &obj) if err != nil { return NotReady, err diff --git a/controllers/nodefeaturediscovery_finalizers.go b/controllers/nodefeaturediscovery_finalizers.go index d5306923..59b6f0ad 100644 --- a/controllers/nodefeaturediscovery_finalizers.go +++ b/controllers/nodefeaturediscovery_finalizers.go @@ -174,13 +174,13 @@ func (r *NodeFeatureDiscoveryReconciler) deleteComponents(ctx context.Context, i return err } - // Attempt to delete master DaemonSet + // Attempt to delete master Deployment err = wait.Poll(RetryInterval, Timeout, func() (done bool, err error) { - err = r.deleteDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) + err = r.deleteDeployment(ctx, instance.ObjectMeta.Namespace, nfdMasterApp) if err != nil { - return false, interpretError(err, "master DaemonSet") + return false, interpretError(err, "master Deployment") } - klog.Info("Master DaemonSet resource has been deleted.") + klog.Info("Master Deployment resource has been deleted.") return true, nil }) if err != nil { @@ -301,8 +301,8 @@ func (r *NodeFeatureDiscoveryReconciler) doComponentsExist(ctx context.Context, return true } - // Attempt to find the master DaemonSet - if _, err := r.getDaemonSet(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { + // Attempt to find the master Deployment + if _, err := r.getDeployment(ctx, instance.ObjectMeta.Namespace, nfdMasterApp); !k8serrors.IsNotFound(err) { return true } diff --git a/controllers/nodefeaturediscovery_resources.go b/controllers/nodefeaturediscovery_resources.go index b28be354..2b3e4cb4 100644 --- a/controllers/nodefeaturediscovery_resources.go +++ b/controllers/nodefeaturediscovery_resources.go @@ -47,6 +47,7 @@ type Resources struct { ClusterRoleBinding rbacv1.ClusterRoleBinding ConfigMap corev1.ConfigMap DaemonSet appsv1.DaemonSet + Deployment appsv1.Deployment Pod corev1.Pod Service corev1.Service } @@ -139,6 +140,10 @@ func addResourcesControls(path string) (Resources, controlFunc) { _, _, err := s.Decode(m, nil, &res.DaemonSet) panicIfError(err) ctrl = append(ctrl, DaemonSet) + case "Deployment": + _, _, err := s.Decode(m, nil, &res.Deployment) + panicIfError(err) + ctrl = append(ctrl, Deployment) case "Service": _, _, err := s.Decode(m, nil, &res.Service) panicIfError(err) @@ -173,6 +178,13 @@ func (r *NodeFeatureDiscoveryReconciler) getDaemonSet(ctx context.Context, names return ds, err } +// getDeployment gets one of the NFD Operand's Deployment +func (r *NodeFeatureDiscoveryReconciler) getDeployment(ctx context.Context, namespace string, name string) (*appsv1.Deployment, error) { + d := &appsv1.Deployment{} + err := r.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, d) + return d, err +} + // getConfigMap gets one of the NFD Operand's ConfigMap func (r *NodeFeatureDiscoveryReconciler) getConfigMap(ctx context.Context, namespace string, name string) (*corev1.ConfigMap, error) { cm := &corev1.ConfigMap{} @@ -263,6 +275,22 @@ func (r *NodeFeatureDiscoveryReconciler) deleteDaemonSet(ctx context.Context, na return r.Delete(context.TODO(), ds) } +// deleteDeployment deletes Operand Deployment +func (r *NodeFeatureDiscoveryReconciler) deleteDeployment(ctx context.Context, namespace string, name string) error { + d, err := r.getDeployment(ctx, namespace, name) + + // Do not return an error if the object has already been deleted + if k8serrors.IsNotFound(err) { + return nil + } + + if err != nil { + return err + } + + return r.Delete(context.TODO(), d) +} + // deleteService deletes the NFD Operand's Service func (r *NodeFeatureDiscoveryReconciler) deleteService(ctx context.Context, namespace string, name string) error { svc, err := r.getService(ctx, namespace, name) diff --git a/controllers/nodefeaturediscovery_status.go b/controllers/nodefeaturediscovery_status.go index b4655200..c08254f6 100644 --- a/controllers/nodefeaturediscovery_status.go +++ b/controllers/nodefeaturediscovery_status.go @@ -43,7 +43,7 @@ const ( conditionFailedGettingNFDMasterServiceAccount = "FailedGettingNFDMasterServiceAccount" conditionFailedGettingNFDService = "FailedGettingNFDService" conditionFailedGettingNFDWorkerDaemonSet = "FailedGettingNFDWorkerDaemonSet" - conditionFailedGettingNFDMasterDaemonSet = "FailedGettingNFDMasterDaemonSet" + conditionFailedGettingNFDMasterDeployment = "FailedGettingNFDMasterDeployment" conditionFailedGettingNFDRoleBinding = "FailedGettingNFDRoleBinding" conditionFailedGettingNFDClusterRoleBinding = "FailedGettingNFDClusterRole" @@ -55,7 +55,7 @@ const ( conditionNFDServiceDegraded = "NFDServiceDegraded" conditionNFDWorkerDaemonSetDegraded = "NFDWorkerDaemonSetDegraded" conditionNFDTopologyUpdaterDaemonSetDegraded = "NFDTopologyUpdaterDaemonSetDegraded" - conditionNFDMasterDaemonSetDegraded = "NFDMasterDaemonSetDegraded" + conditionNFDMasterDeploymentDegraded = "NFDMasterDDeploymentDegraded" conditionNFDRoleDegraded = "NFDRoleDegraded" conditionNFDRoleBindingDegraded = "NFDRoleBindingDegraded" conditionNFDClusterRoleDegraded = "NFDClusterRoleDegraded" @@ -63,16 +63,13 @@ const ( // Unknown errors. (Catch all) errorNFDWorkerDaemonSetUnknown = "NFDWorkerDaemonSetCorrupted" - errorNFDMasterDaemonSetUnknown = "NFDMasterDaemonSetCorrupted" // More nodes are listed as "ready" than selected errorTooManyNFDWorkerDaemonSetReadyNodes = "NFDWorkerDaemonSetHasMoreNodesThanScheduled" - errorTooManyNFDMasterDaemonSetReadyNodes = "NFDMasterDaemonSetHasMoreNodesThanScheduled" // DaemonSet warnings (for "Progressing" conditions) warningNumberOfReadyNodesIsLessThanScheduled = "warningNumberOfReadyNodesIsLessThanScheduled" warningNFDWorkerDaemonSetProgressing = "warningNFDWorkerDaemonSetProgressing" - warningNFDMasterDaemonSetProgressing = "warningNFDMasterDaemonSetProgressing" // ConditionAvailable indicates that the resources maintained by the operator, // is functional and available in the cluster. @@ -280,10 +277,35 @@ func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterDaemonSetConditions(c return r.getDaemonSetConditions(ctx, instance, nfdTopologyUpdaterApp) } -// getMasterDaemonSetConditions is a wrapper around "getDaemonSetConditions" for -// master DaemonSets -func (r *NodeFeatureDiscoveryReconciler) getMasterDaemonSetConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { - return r.getDaemonSetConditions(ctx, instance, nfdMasterApp) +// getMasterDeploymentConditions is a wrapper around "getDeploymentConditions" for +// master Deployment +func (r *NodeFeatureDiscoveryReconciler) getMasterDeploymentConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getDeploymentConditions(ctx, instance, nfdMasterApp) +} + +// getDeploymentConditions gets the current status of a Deployment. If an error +// occurs, this function returns the corresponding error message +func (r *NodeFeatureDiscoveryReconciler) getDeploymentConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery, nfdAppName string) (Status, error) { + // Initialize the resource's status to 'Degraded' + status := initializeDegradedStatus() + + d, err := r.getDeployment(ctx, instance.ObjectMeta.Namespace, nfdAppName) + if err != nil { + return status, err + } + + dStatus := d.Status.DeepCopy() + + // TODO make the number of replicas configurable from CRD + if dStatus.AvailableReplicas == 0 { + return status, errors.New(conditionNFDMasterDeploymentDegraded) + } + + // If all nodes are ready, then update the status to be "isAvailable" + status.isAvailable = true + status.isDegraded = false + + return status, nil } // getDaemonSetConditions gets the current status of a DaemonSet. If an error @@ -312,7 +334,6 @@ func (r *NodeFeatureDiscoveryReconciler) getDaemonSetConditions(ctx context.Cont if nfdAppName == nfdWorkerApp { return status, errors.New(errorNFDWorkerDaemonSetUnknown) } - return status, errors.New(errorNFDMasterDaemonSetUnknown) } if numberUnavailable > 0 { status.isProgressing = true @@ -320,7 +341,6 @@ func (r *NodeFeatureDiscoveryReconciler) getDaemonSetConditions(ctx context.Cont if nfdAppName == nfdWorkerApp { return status, errors.New(warningNFDWorkerDaemonSetProgressing) } - return status, errors.New(warningNFDMasterDaemonSetProgressing) } // If there are none scheduled, then we have a problem because we should @@ -329,7 +349,6 @@ func (r *NodeFeatureDiscoveryReconciler) getDaemonSetConditions(ctx context.Cont if nfdAppName == nfdWorkerApp { return status, errors.New(conditionNFDWorkerDaemonSetDegraded) } - return status, errors.New(conditionNFDMasterDaemonSetDegraded) } // Just check in case the number of "ready" nodes is greater than the @@ -339,7 +358,6 @@ func (r *NodeFeatureDiscoveryReconciler) getDaemonSetConditions(ctx context.Cont if nfdAppName == nfdWorkerApp { return status, errors.New(errorTooManyNFDWorkerDaemonSetReadyNodes) } - return status, errors.New(errorTooManyNFDMasterDaemonSetReadyNodes) } // If we have less than the number of scheduled pods, then the DaemonSet From 27222e48eb72d25c2638c79af4b12aa635bf6434 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Tue, 19 Apr 2022 11:47:24 -0400 Subject: [PATCH 2/3] Update kubebuilder rbac Signed-off-by: Carlos Eduardo Arango Gutierrez --- config/rbac/kustomization.yaml | 42 ----- config/rbac/role.yaml | 147 ++++-------------- .../nodefeaturediscovery_controller.go | 25 ++- 3 files changed, 35 insertions(+), 179 deletions(-) diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 1805a2e1..3c88b378 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -13,45 +13,3 @@ resources: - auth_proxy_role.yaml - auth_proxy_role_binding.yaml - auth_proxy_client_clusterrole.yaml - -# needed for nfd-worker -# this patch is needed given that -# +kubebuilder does not allow resourceNames -patchesJSON6902: -- target: - kind: ClusterRole - name: manager-role - patch: |- - - op: add - path: /rules/0 - value: - apiGroups: - - policy - resources: - - podsecuritypolicies - verbs: - - use - resourceNames: - - nfd-worker - - op: add - path: /rules/1 - value: - apiGroups: - - nfd.k8s-sigs.io - resources: - - nodefeaturerules - verbs: - - get - - list - - watch - - op: add - path: /rules/2 - value: - apiGroups: - - topology.node.k8s.io - resources: - - noderesourcetopologies - verbs: - - create - - get - - update \ No newline at end of file diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 208a376a..47573547 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -5,6 +5,14 @@ metadata: creationTimestamp: null name: manager-role rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - update + - watch - apiGroups: - apps resources: @@ -30,56 +38,44 @@ rules: - update - watch - apiGroups: - - coordination.k8s.io + - cert-manager.io resources: - - leases + - certificates verbs: - - create - - delete - get - list - - update - watch - apiGroups: - - "" + - cert-manager.io resources: - - configmaps + - issuers verbs: - - create - - delete - get - list - - patch - - update - watch - apiGroups: - - "" + - coordination.k8s.io resources: - - endpoints + - leases verbs: - create - delete - get - list - - patch - update - watch - apiGroups: - "" resources: - - events + - configmaps verbs: - create + - delete + - get - list - patch - update - watch -- apiGroups: - - "" - resources: - - imagestreams/layers - verbs: - - get - apiGroups: - "" resources: @@ -97,8 +93,6 @@ rules: resources: - nodes verbs: - - create - - delete - get - list - patch @@ -112,25 +106,6 @@ rules: - get - patch - update -- apiGroups: - - "" - resources: - - persistentvolumeclaims - verbs: - - get - - list - - update - - watch -- apiGroups: - - "" - resources: - - persistentvolumes - verbs: - - create - - delete - - get - - list - - watch - apiGroups: - "" resources: @@ -143,24 +118,6 @@ rules: - patch - update - watch -- apiGroups: - - "" - resources: - - pods/log - verbs: - - get -- apiGroups: - - "" - resources: - - secrets - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - apiGroups: - "" resources: @@ -186,55 +143,21 @@ rules: - update - watch - apiGroups: - - monitoring.coreos.com + - nfd.k8s-sigs.io resources: - - prometheusrules + - nodefeaturerules verbs: - - create - - delete - get - list - - patch - - update - watch - apiGroups: - - monitoring.coreos.com + - policy + resourceNames: + - nfd-worker resources: - - servicemonitors + - podsecuritypolicies verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - nfd.kubernetes.io - resources: - - nodefeaturediscoveries - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - nfd.kubernetes.io - resources: - - nodefeaturediscoveries/finalizers - verbs: - - update -- apiGroups: - - nfd.kubernetes.io - resources: - - nodefeaturediscoveries/status - verbs: - - get - - patch - - update + - use - apiGroups: - rbac.authorization.k8s.io resources: @@ -284,28 +207,10 @@ rules: - update - watch - apiGroups: - - storage.k8s.io + - topology.node.k8s.io resources: - - csidrivers + - noderesourcetopologies verbs: - create - - delete - get - - list - - patch - update - - watch -- apiGroups: - - storage.k8s.io - resources: - - csinodes - verbs: - - get - - list - - watch -- apiGroups: - - storage.k8s.io - resources: - - storageclasses - verbs: - - watch \ No newline at end of file diff --git a/controllers/nodefeaturediscovery_controller.go b/controllers/nodefeaturediscovery_controller.go index d2111072..01244612 100644 --- a/controllers/nodefeaturediscovery_controller.go +++ b/controllers/nodefeaturediscovery_controller.go @@ -97,35 +97,28 @@ func validateUpdateEvent(e *event.UpdateEvent) bool { return true } -// +kubebuilder:rbac:groups=nfd.kubernetes.io,resources=nodefeaturediscoveries,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=nfd.kubernetes.io,resources=nodefeaturediscoveries/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=nfd.kubernetes.io,resources=nodefeaturediscoveries/finalizers,verbs=update +// +kubebuilder:rbac:groups=core,resources=nodes,verbs=update +// +kubebuilder:rbac:groups=core,resources=nodes/status,verbs=get;patch;update // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=pods/log,verbs=get // +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=imagestreams/layers,verbs=get // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=events,verbs=list;watch;create;update;patch -// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;update; -// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;delete -// +kubebuilder:rbac:groups=storage.k8s.io,resources=csinodes,verbs=get;list;watch -// +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=watch -// +kubebuilder:rbac:groups=storage.k8s.io,resources=csidrivers,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update +// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;patch +// +kubebuilder:rbac:groups=policy,resources=podsecuritypolicies,verbs=use,resourceNames=nfd-worker +// +kubebuilder:rbac:groups=cert-manager.io,resources=issuers,verbs=get;list;watch +// +kubebuilder:rbac:groups=cert-manager.io,resources=certificates,verbs=get;list;watch +// +kubebuilder:rbac:groups=topology.node.k8s.io,resources=noderesourcetopologies,verbs=create;update;get +// +kubebuilder:rbac:groups=nfd.k8s-sigs.io,resources=nodefeaturerules,verbs=get;list;watch // Reconcile is part of the main kubernetes reconciliation loop which aims // to move the current state of the cluster closer to the desired state. From 9868aeb9f695050dab482028ac60f870820edbc3 Mon Sep 17 00:00:00 2001 From: Markus Lehtonen Date: Wed, 13 Apr 2022 09:51:43 +0300 Subject: [PATCH 3/3] scripts/test-infra: bump golangci-lint to v1.45.2 Supports golang v1.18. --- scripts/test-infra/verify.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test-infra/verify.sh b/scripts/test-infra/verify.sh index b0f14035..1f29d8e9 100755 --- a/scripts/test-infra/verify.sh +++ b/scripts/test-infra/verify.sh @@ -2,7 +2,7 @@ # Install deps gobinpath="$(go env GOPATH)/bin" -curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b "$gobinpath" v1.42.1 +curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b "$gobinpath" v1.45.2 export PATH=$PATH:$gobinpath # Run verify steps