Skip to content

Commit

Permalink
Add Prometheus metrics to fleet-controller
Browse files Browse the repository at this point in the history
Expose Prometheus metrics of the fleet-controller for the following
controllers:

- GitRepo
- Bundle
- BundleDeployment
- Cluster
- ClusterGroup
  • Loading branch information
p-se committed Feb 28, 2024
1 parent 3ad25e1 commit 8d832b3
Show file tree
Hide file tree
Showing 18 changed files with 928 additions and 9 deletions.
3 changes: 3 additions & 0 deletions charts/fleet/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ spec:
{{- if not .Values.gitops.enabled }}
- --disable-gitops
{{- end }}
{{- if not .Values.metrics.enabled }}
- --disable-metrics
{{- end }}
{{- if .Values.debug }}
- --debug
- --debug-level
Expand Down
17 changes: 17 additions & 0 deletions charts/fleet/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{- if .Values.metrics.enabled }}
apiVersion: v1
kind: Service
metadata:
name: monitoring-fleet-controller
labels:
app: fleet-controller
spec:
type: ClusterIP
ports:
- port: 8080
targetPort: 8080
protocol: TCP
name: metrics
selector:
app: fleet-controller
{{- end }}
3 changes: 3 additions & 0 deletions charts/fleet/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ priorityClassName: ""
gitops:
enabled: true

metrics:
enabled: true

debug: false
debugLevel: 0
propagateDebugSettingsToAgents: true
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
github.com/onsi/gomega v1.30.0
github.com/otiai10/copy v1.14.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.18.0
github.com/rancher/fleet/pkg/apis v0.0.0-00010101000000-000000000000
github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29
github.com/rancher/wrangler/v2 v2.1.2
Expand Down Expand Up @@ -185,7 +186,6 @@ require (
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/prometheus/client_golang v1.18.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
Expand Down
26 changes: 23 additions & 3 deletions internal/cmd/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/rancher/fleet/internal/cmd/controller/reconciler"
"github.com/rancher/fleet/internal/cmd/controller/target"
"github.com/rancher/fleet/internal/manifest"
"github.com/rancher/fleet/internal/metrics"
"github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"

"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -29,12 +30,31 @@ func init() {
//+kubebuilder:scaffold:scheme
}

func start(ctx context.Context, systemNamespace string, config *rest.Config, leaderOpts LeaderElectionOptions, bindAddresses BindAddresses, disableGitops bool) error {
setupLog.Info("listening for changes on local cluster", "disableGitops", disableGitops)
func start(
ctx context.Context,
systemNamespace string,
config *rest.Config,
leaderOpts LeaderElectionOptions,
bindAddresses BindAddresses,
disableGitops bool,
disableMetrics bool,
) error {
setupLog.Info("listening for changes on local cluster",
"disableGitops", disableGitops,
"disableMetrics", disableMetrics,
)

var metricServerOptions metricsserver.Options
if disableMetrics {
metricServerOptions = metricsserver.Options{BindAddress: "0"}
} else {
metricServerOptions = metricsserver.Options{BindAddress: bindAddresses.Metrics}
metrics.RegisterMetrics() // enable fleet related metrics
}

mgr, err := ctrl.NewManager(config, ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{BindAddress: bindAddresses.Metrics},
Metrics: metricServerOptions,
HealthProbeBindAddress: bindAddresses.HealthProbe,

LeaderElection: true,
Expand Down
5 changes: 5 additions & 0 deletions internal/cmd/controller/reconciler/bundle_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/rancher/fleet/internal/cmd/controller/summary"
"github.com/rancher/fleet/internal/cmd/controller/target"
"github.com/rancher/fleet/internal/manifest"
"github.com/rancher/fleet/internal/metrics"
fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"

apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -96,18 +97,21 @@ func (r *BundleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr

if err := resetStatus(&bundle.Status, matchedTargets); err != nil {
updateDisplay(&bundle.Status)
metrics.CollectBundleMetrics(bundle)
return ctrl.Result{}, err
}

// this will add the defaults for a new bundledeployment
if err := target.UpdatePartitions(&bundle.Status, matchedTargets); err != nil {
updateDisplay(&bundle.Status)
metrics.CollectBundleMetrics(bundle)
return ctrl.Result{}, err
}

if bundle.Status.ObservedGeneration != bundle.Generation {
if err := setResourceKey(context.Background(), &bundle.Status, bundle, manifest, r.isNamespaced); err != nil {
updateDisplay(&bundle.Status)
metrics.CollectBundleMetrics(bundle)
return ctrl.Result{}, err
}
}
Expand Down Expand Up @@ -144,6 +148,7 @@ func (r *BundleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
}

updateDisplay(&bundle.Status)
metrics.CollectBundleMetrics(bundle)
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
t := &fleet.Bundle{}
err := r.Get(ctx, req.NamespacedName, t)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"reflect"

"github.com/rancher/fleet/internal/cmd/controller/summary"
"github.com/rancher/fleet/internal/metrics"
fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"
"github.com/rancher/wrangler/v2/pkg/genericcondition"

Expand Down Expand Up @@ -68,7 +69,13 @@ func (r *BundleDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
return err
}
t.Status = bd.Status
return r.Status().Update(ctx, t)
err = r.Status().Update(ctx, t)
if err != nil {
return err
}

metrics.CollectBundleDeploymentMetrics(t)
return nil
})
if err != nil {
logger.V(1).Error(err, "Reconcile failed final update to bundle deployment status", "status", bd.Status)
Expand Down
3 changes: 3 additions & 0 deletions internal/cmd/controller/reconciler/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/rancher/fleet/internal/cmd/controller/summary"
"github.com/rancher/fleet/internal/metrics"
fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"
"github.com/rancher/fleet/pkg/durations"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -167,6 +168,8 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
logger.V(1).Error(err, "Reconcile failed final update to cluster status", "status", cluster.Status)
}

metrics.CollectClusterMetrics(cluster)

if allReady && cluster.Status.ResourceCounts.Ready != cluster.Status.ResourceCounts.DesiredReady {
logrus.Debugf("Cluster %s/%s is not ready because not all gitrepos are ready: %d/%d, enqueue cluster again",
cluster.Namespace, cluster.Name, cluster.Status.ResourceCounts.Ready, cluster.Status.ResourceCounts.DesiredReady)
Expand Down
3 changes: 3 additions & 0 deletions internal/cmd/controller/reconciler/clustergroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"reflect"
"strings"

"github.com/rancher/fleet/internal/metrics"
fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"

"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -71,6 +72,8 @@ func (r *ClusterGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request
})
if err != nil {
logger.V(1).Error(err, "Reconcile failed final update to cluster group status", "status", group.Status)
} else {
metrics.CollectClusterGroupMetrics(group)
}

return ctrl.Result{}, err
Expand Down
3 changes: 3 additions & 0 deletions internal/cmd/controller/reconciler/gitrepo_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

grutil "github.com/rancher/fleet/internal/cmd/controller/gitrepo"
"github.com/rancher/fleet/internal/cmd/controller/imagescan"
"github.com/rancher/fleet/internal/metrics"
fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"
"github.com/reugn/go-quartz/quartz"

Expand Down Expand Up @@ -68,6 +69,8 @@ func (r *GitRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, nil
}

metrics.CollectGitRepoMetrics(gitrepo)

logger = logger.WithValues("commit", gitrepo.Status.Commit)
logger.V(1).Info("Reconciling GitRepo", "lastAccepted", acceptedLastUpdate(gitrepo.Status.Conditions))

Expand Down
16 changes: 12 additions & 4 deletions internal/cmd/controller/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ import (

type FleetManager struct {
command.DebugConfig
Kubeconfig string `usage:"Kubeconfig file"`
Namespace string `usage:"namespace to watch" default:"cattle-fleet-system" env:"NAMESPACE"`
DisableGitops bool `usage:"disable gitops components" name:"disable-gitops"`
Kubeconfig string `usage:"Kubeconfig file"`
Namespace string `usage:"namespace to watch" default:"cattle-fleet-system" env:"NAMESPACE"`
DisableGitops bool `usage:"disable gitops components" name:"disable-gitops"`
DisableMetrics bool `usage:"disable metrics" name:"disable-metrics"`
}

type LeaderElectionOptions struct {
Expand Down Expand Up @@ -128,7 +129,14 @@ func (f *FleetManager) Run(cmd *cobra.Command, args []string) error {
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil)) // nolint:gosec // Debugging only
}()
if err := start(ctx, f.Namespace, kubeconfig, leaderOpts, bindAddresses, f.DisableGitops); err != nil {
if err := start(
ctx, f.Namespace,
kubeconfig,
leaderOpts,
bindAddresses,
f.DisableGitops,
f.DisableMetrics,
); err != nil {
return err
}

Expand Down
2 changes: 2 additions & 0 deletions internal/cmd/controller/summary/summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ func IncrementResourceCounts(left *fleet.GitRepoResourceCounts, right fleet.GitR
left.NotReady += right.NotReady
}

// GetSummaryState returns the summary state of a bundle. The returns value is
// empty if the bundle is ready.
func GetSummaryState(summary fleet.BundleSummary) fleet.BundleState {
var state fleet.BundleState
for _, nonReady := range summary.NonReadyResources {
Expand Down
Loading

0 comments on commit 8d832b3

Please sign in to comment.