diff --git a/.gitignore b/.gitignore index 33d19e417e..9edb4e9c17 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ e2e/testenv/infra/infra ^fleet$ FleetCI-RootCA .envrc +env.multi-cluster +env.single-cluster diff --git a/charts/fleet/templates/deployment.yaml b/charts/fleet/templates/deployment.yaml index b31a1d6ddd..803e7bc90f 100644 --- a/charts/fleet/templates/deployment.yaml +++ b/charts/fleet/templates/deployment.yaml @@ -58,11 +58,19 @@ spec: image: '{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}' name: fleet-controller imagePullPolicy: "{{ .Values.image.imagePullPolicy }}" + {{- if .Values.metrics.enabled }} + ports: + - containerPort: 8080 + name: metrics + {{- end }} command: - fleetcontroller {{- if not .Values.gitops.enabled }} - --disable-gitops {{- end }} + {{- if not .Values.metrics.enabled }} + - --disable-metrics + {{- end }} {{- if .Values.debug }} - --debug - --debug-level diff --git a/charts/fleet/templates/service.yaml b/charts/fleet/templates/service.yaml new file mode 100644 index 0000000000..cc17e9b285 --- /dev/null +++ b/charts/fleet/templates/service.yaml @@ -0,0 +1,17 @@ +{{- if .Values.metrics.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: monitoring-fleet-controller + labels: + app: fleet-controller +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: metrics + selector: + app: fleet-controller +{{- end }} diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index e360407915..5582fb9fe4 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -66,6 +66,9 @@ priorityClassName: "" gitops: enabled: true +metrics: + enabled: true + debug: false debugLevel: 0 propagateDebugSettingsToAgents: true diff --git a/e2e/assets/clustergroup-template.yaml b/e2e/assets/clustergroup-template.yaml new file mode 100644 index 0000000000..7c8f319478 --- /dev/null +++ b/e2e/assets/clustergroup-template.yaml @@ -0,0 +1,11 @@ +apiVersion: fleet.cattle.io/v1alpha1 +kind: ClusterGroup +metadata: + name: {{ .Name }} + namespace: {{ .Namespace }} +spec: + selector: + matchLabels: + {{- range $key, $value := .MatchLabels}} + {{$key}}: {{$value}} + {{- end}} diff --git a/e2e/assets/metrics/fleetcontroller_service.yaml b/e2e/assets/metrics/fleetcontroller_service.yaml new file mode 100644 index 0000000000..577129f8c8 --- /dev/null +++ b/e2e/assets/metrics/fleetcontroller_service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ .Name }} +spec: + selector: + app: fleet-controller + ports: + - protocol: TCP + port: {{ .Port }} + targetPort: metrics + type: LoadBalancer diff --git a/e2e/metrics/bundle_test.go b/e2e/metrics/bundle_test.go new file mode 100644 index 0000000000..e30e08057d --- /dev/null +++ b/e2e/metrics/bundle_test.go @@ -0,0 +1,156 @@ +package metrics_test + +import ( + "fmt" + "math/rand" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/rancher/fleet/e2e/metrics" + "github.com/rancher/fleet/e2e/testenv" + "github.com/rancher/fleet/e2e/testenv/kubectl" +) + +var _ = Describe("Bundle Metrics", Label("bundle"), func() { + const ( + objName = "metrics" + branch = "master" + ) + + var ( + // kw is the kubectl command for namespace the workload is deployed to + kw kubectl.Command + namespace string + ) + + BeforeEach(func() { + k = env.Kubectl.Namespace(env.Namespace) + namespace = testenv.NewNamespaceName( + objName, + rand.New(rand.NewSource(time.Now().UnixNano())), + ) + kw = k.Namespace(namespace) + + out, err := k.Create("ns", namespace) + Expect(err).ToNot(HaveOccurred(), out) + + err = testenv.CreateGitRepo( + kw, + namespace, + objName, + branch, + "simple-manifest", + ) + Expect(err).ToNot(HaveOccurred()) + + DeferCleanup(func() { + out, err = k.Delete("ns", namespace) + Expect(err).ToNot(HaveOccurred(), out) + }) + }) + + When("testing Bundle metrics", func() { + bundleMetricNames := []string{ + "fleet_bundle_desired_ready", + "fleet_bundle_err_applied", + "fleet_bundle_modified", + "fleet_bundle_not_ready", + "fleet_bundle_out_of_sync", + "fleet_bundle_pending", + "fleet_bundle_ready", + "fleet_bundle_wait_applied", + } + + It("should have exactly one metric for the bundle", func() { + et := metrics.NewExporterTest(metricsURL) + Eventually(func() error { + for _, metricName := range bundleMetricNames { + metric, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": objName + "-simple-manifest", + "namespace": namespace, + }, + ) + if err != nil { + return err + } + Expect(metric.Gauge.GetValue()).To(Equal(float64(0))) + } + return nil + }).ShouldNot(HaveOccurred()) + }) + + Context("when the GitRepo (and therefore Bundle) is changed", Label("bundle-altered"), func() { + It("it should not duplicate metrics if Bundle is updated", Label("bundle-update"), func() { + et := metrics.NewExporterTest(metricsURL) + out, err := kw.Patch( + "gitrepo", objName, + "--type=json", + "-p", `[{"op": "replace", "path": "/spec/paths", "value": ["simple-chart"]}]`, + ) + Expect(err).ToNot(HaveOccurred(), out) + Expect(out).To(ContainSubstring("gitrepo.fleet.cattle.io/metrics patched")) + + // Wait for it to be changed and fetched. + Eventually(func() (string, error) { + return kw.Get("gitrepo", objName, "-o", "jsonpath={.status.commit}") + }).ShouldNot(BeEmpty()) + + var metric *metrics.Metric + // Expect still no metrics to be duplicated. + Eventually(func() error { + for _, metricName := range bundleMetricNames { + metric, err = et.FindOneMetric( + metricName, + map[string]string{ + "name": objName + "-simple-chart", + "namespace": namespace, + }, + ) + if err != nil { + return err + } + if metric.LabelValue("paths") == "simple-manifest" { + return fmt.Errorf("path for metric %s unchanged", metricName) + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + + It("should not keep metrics if Bundle is deleted", Label("bundle-delete"), func() { + et := metrics.NewExporterTest(metricsURL) + + objName := objName + "-simple-manifest" + + var ( + out string + err error + ) + Eventually(func() error { + out, err = kw.Delete("bundle", objName) + return err + }).ShouldNot(HaveOccurred(), out) + + Eventually(func() error { + for _, metricName := range bundleMetricNames { + _, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": objName, + "namespace": namespace, + }, + ) + if err == nil { + return fmt.Errorf("metric %s found but not expected", metricName) + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + }) + }) +}) diff --git a/e2e/metrics/bundledeployment_test.go b/e2e/metrics/bundledeployment_test.go new file mode 100644 index 0000000000..875088c3ee --- /dev/null +++ b/e2e/metrics/bundledeployment_test.go @@ -0,0 +1,168 @@ +package metrics_test + +import ( + "fmt" + "math/rand" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/rancher/fleet/e2e/metrics" + "github.com/rancher/fleet/e2e/testenv" + "github.com/rancher/fleet/e2e/testenv/kubectl" +) + +var _ = Describe("BundleDeployment Metrics", Label("bundledeployment"), func() { + const ( + branch = "master" + namespace = "fleet-local" // required for this test to create BundleDeployments + ) + + var ( + // kw is the kubectl command for namespace the workload is deployed to + kw kubectl.Command + // objName is going to be "randomized" instead of using a dedicated and + // random namespace, like it is the case for the other tests. + objName string + ) + + BeforeEach(func() { + k = env.Kubectl.Namespace(env.Namespace) + kw = k.Namespace(namespace) + objName = testenv.AddRandomSuffix( + "metrics", + rand.New(rand.NewSource(time.Now().UnixNano())), + ) + + err := testenv.CreateGitRepo( + kw, + namespace, + objName, + branch, + "simple-manifest", + ) + Expect(err).ToNot(HaveOccurred()) + + DeferCleanup(func() { + out, err := k.Delete("gitrepo", objName) + Expect(err).ToNot(HaveOccurred(), out) + }) + }) + + When("testing BundleDeployment metrics", func() { + bundleDeploymentMetricNames := []string{ + "fleet_bundledeployment_state", + } + bundleDeploymentMetricStates := []string{ + "ErrApplied", + "Modified", + "NotReady", + "OutOfSync", + "Pending", + "Ready", + "WaitApplied", + } + + It("should have exactly one metric for the BundleDeployment", func() { + et := metrics.NewExporterTest(metricsURL) + Eventually(func() error { + for _, metricName := range bundleDeploymentMetricNames { + for _, state := range bundleDeploymentMetricStates { + _, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": objName + "-simple-manifest", + "cluster_namespace": namespace, + "state": state, + }, + ) + if err != nil { + return err + } + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + + Context( + "when the GitRepo (and therefore Bundle) is changed", + Label("bundle-altered"), + func() { + It( + "it should not duplicate metrics if Bundle is updated", + Label("bundle-update"), + func() { + et := metrics.NewExporterTest(metricsURL) + out, err := kw.Patch( + "gitrepo", objName, + "--type=json", + "-p", + `[{"op": "replace", "path": "/spec/paths", "value": ["simple-chart"]}]`, + ) + Expect(err).ToNot(HaveOccurred(), out) + Expect(out).To(ContainSubstring( + fmt.Sprintf("gitrepo.fleet.cattle.io/%s patched", objName))) + + // Wait for it to be changed and fetched. + Eventually(func() (string, error) { + return kw.Get("gitrepo", objName, "-o", "jsonpath={.status.commit}") + }).ShouldNot(BeEmpty()) + + // Expect still no metrics to be duplicated. + Eventually(func() error { + for _, metricName := range bundleDeploymentMetricNames { + for _, metricState := range bundleDeploymentMetricStates { + _, err = et.FindOneMetric( + metricName, + map[string]string{ + "name": objName + "-simple-chart", + "cluster_namespace": namespace, + "state": metricState, + }, + ) + if err != nil { + return err + } + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + + It("should not keep metrics if Bundle is deleted", Label("bundle-delete"), func() { + et := metrics.NewExporterTest(metricsURL) + + objName := objName + "-simple-manifest" + + Eventually(func() (string, error) { + return kw.Get("-A", "bundledeployment") + }).Should(ContainSubstring(objName)) + + var ( + out string + err error + ) + out, err = kw.Delete("bundle", objName) + Expect(err).ToNot(HaveOccurred(), out) + + Eventually(func() error { + for _, metricName := range bundleDeploymentMetricNames { + _, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": objName, + "namespace": namespace, + }, + ) + if err == nil { + return fmt.Errorf("metric %s found but not expected", metricName) + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + }) + }) +}) diff --git a/e2e/metrics/cluster_test.go b/e2e/metrics/cluster_test.go new file mode 100644 index 0000000000..9b486ec5cd --- /dev/null +++ b/e2e/metrics/cluster_test.go @@ -0,0 +1,114 @@ +package metrics_test + +import ( + "encoding/json" + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/rancher/fleet/e2e/metrics" +) + +type cluster struct { + Name string `json:"name"` + Namespace string `json:"namespace"` +} + +type clusters []cluster + +func (cs *clusters) UnmarshalJSON(data []byte) error { + assertMap := func(i interface{}) map[string]interface{} { + return i.(map[string]interface{}) + } + + var tmp interface{} + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + + m := tmp.(map[string]interface{}) + items := m["items"].([]interface{}) + + *cs = clusters{} + + for _, item := range items { + metadata := assertMap(assertMap(item)["metadata"]) + + c := cluster{} + c.Namespace = metadata["namespace"].(string) + c.Name = metadata["name"].(string) + + *cs = append(*cs, c) + } + + return nil +} + +var _ = Describe("Cluster Metrics", Label("cluster"), func() { + expectedMetricsExist := map[string]bool{ + "fleet_cluster_desired_ready_git_repos": true, + "fleet_cluster_ready_git_repos": true, + "fleet_cluster_resources_count_desiredready": true, + "fleet_cluster_resources_count_missing": true, + "fleet_cluster_resources_count_modified": true, + "fleet_cluster_resources_count_notready": true, + "fleet_cluster_resources_count_orphaned": true, + "fleet_cluster_resources_count_ready": true, + "fleet_cluster_resources_count_unknown": true, + "fleet_cluster_resources_count_waitapplied": true, + // The value of cluster.Status.Display.State is empty if no issues are + // found and this means no metric is created. + "fleet_cluster_state": false, + } + + It( + "should have as many clusters in metrics as there are objects in the cluster", + func() { + Eventually(func() error { + var ( + clustersOut string + err error + ) + clustersOut, err = env.Kubectl.Get( + "-A", "clusters.fleet.cattle.io", + "-o", "json", + ) + Expect(err).ToNot(HaveOccurred()) + + var existingClusters clusters + err = json.Unmarshal([]byte(clustersOut), &existingClusters) + Expect(err).ToNot(HaveOccurred()) + + et := metrics.NewExporterTest(metricsURL) + + for _, cluster := range existingClusters { + for metricName, expectedExist := range expectedMetricsExist { + _, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": cluster.Name, + "namespace": cluster.Namespace, + }, + ) + if expectedExist { + if err != nil { + return err + } + } else { + if err == nil { + return fmt.Errorf( + "expected metric %s not to exist, but it exists", + metricName, + ) + } + } + } + } + return nil + }).ShouldNot(HaveOccurred()) + }, + ) + + // TODO test if cluster metrics are properly removed if the cluster resource is removed. +}) diff --git a/e2e/metrics/clustergroup_test.go b/e2e/metrics/clustergroup_test.go new file mode 100644 index 0000000000..f0b48a7f7f --- /dev/null +++ b/e2e/metrics/clustergroup_test.go @@ -0,0 +1,106 @@ +package metrics_test + +import ( + "fmt" + "math/rand" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/rancher/fleet/e2e/metrics" + "github.com/rancher/fleet/e2e/testenv" +) + +var _ = Describe("Cluster Metrics", Label("clustergroup"), func() { + const ( + namespace = "fleet-local" + ) + + var ( + clusterGroupName string + ) + + expectedMetricsExist := map[string]bool{ + "fleet_cluster_group_bundle_desired_ready": true, + "fleet_cluster_group_bundle_ready": true, + "fleet_cluster_group_cluster_count": true, + "fleet_cluster_group_non_ready_cluster_count": true, + "fleet_cluster_group_resource_count_desired_ready": true, + "fleet_cluster_group_resource_count_missing": true, + "fleet_cluster_group_resource_count_modified": true, + "fleet_cluster_group_resource_count_notready": true, + "fleet_cluster_group_resource_count_orphaned": true, + "fleet_cluster_group_resource_count_ready": true, + "fleet_cluster_group_resource_count_unknown": true, + "fleet_cluster_group_resource_count_waitapplied": true, + } + + BeforeEach(func() { + clusterGroupName = testenv.AddRandomSuffix( + "test-cluster-group", + rand.NewSource(time.Now().UnixNano()), + ) + err := testenv.CreateClusterGroup( + k, + namespace, + clusterGroupName, + map[string]string{ + "name": "local", + }, + ) + Expect(err).ToNot(HaveOccurred()) + + DeferCleanup(func() { + out, err := k.Delete( + "clustergroups.fleet.cattle.io", + clusterGroupName, + "-n", namespace, + ) + Expect(out).To(ContainSubstring("deleted")) + Expect(err).ToNot(HaveOccurred()) + }) + }) + + // The cluster group is created without an UID. This UID is added shortly + // after the creation of the cluster group. This results in the cluster + // group being modified and, if not properly checked, duplicated metrics. + // This is why this test does test for duplicated metrics as well, although + // it does not look like it. + It("should have all metrics for a single cluster group once", func() { + Eventually(func() (string, error) { + return env.Kubectl.Get( + "-n", namespace, + "clustergroups.fleet.cattle.io", + clusterGroupName, + "-o", "jsonpath=.metadata.name", + ) + }).ShouldNot(ContainSubstring("not found")) + + et := metrics.NewExporterTest(metricsURL) + + Eventually(func() error { + for metricName, expectedExist := range expectedMetricsExist { + metric, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": clusterGroupName, + "namespace": namespace, + }, + ) + if expectedExist { + if err != nil { + return err + } + Expect(err).ToNot(HaveOccurred()) + } else { + if err == nil { + return fmt.Errorf("expected not to exist but found %s", metric) + } + } + } + return nil + }).ShouldNot(HaveOccurred()) + }, + ) + +}) diff --git a/e2e/metrics/exporter.go b/e2e/metrics/exporter.go new file mode 100644 index 0000000000..0b8e0ea2b0 --- /dev/null +++ b/e2e/metrics/exporter.go @@ -0,0 +1,161 @@ +package metrics + +import ( + "fmt" + "net/http" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" +) + +type ExporterTest struct { + url string +} + +func NewExporterTest(url string) *ExporterTest { + return &ExporterTest{ + url: url, + } +} + +// getMetrics fetches the metrics from the Prometheus endpoint and returns them +// as a map of metric families. +func (et *ExporterTest) getMetrics() (map[string]*dto.MetricFamily, error) { + resp, err := http.Get(et.url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var parser expfmt.TextParser + metrics, err := parser.TextToMetricFamilies(resp.Body) + if err != nil { + return nil, err + } + return metrics, nil +} + +// FindAll returns all metrics with the given name, resource name, resource +// namespace, and labels. If no such metrics are found, an empty slice is +// returned. +// +// resourceName and resourceNamespace are the values of the "name" and +// "namespace" labels, respectively. They have to be provided. +// +// TODO use and finish or remove +func (m *ExporterTest) FindAll( + metricName, + resourceName, + resourceNamespace string, + labels map[string]string, +) ([]*dto.Metric, error) { + return nil, nil +} + +// FindOneMetric expects to find exactly one metric with the given name, resource name, +// resource namespace, and labels. If no such metric is found, or if more than +// one is found, an error is returned. +// +// `resourceName` and `resourceNamespace` are the values of the `name` and +// `namespace` labels, respectively. +// +// If labels is nil, only the name and namespace labels are checked. +func (m *ExporterTest) FindOneMetric( + metricName string, + labels map[string]string, +) (*Metric, error) { + allMetrics, err := m.getMetrics() + if err != nil { + return nil, fmt.Errorf("failed to get metrics: %w", err) + } + + // Metric name exists. + mf, ok := allMetrics[metricName] + if !ok { + return nil, fmt.Errorf("metric %q not found", metricName) + } + + var metrics []*dto.Metric + for _, metric := range mf.Metric { + m := Metric{Metric: metric} + + // Check that all labels match, if present. + match := true + for k, v := range labels { + if m.LabelValue(k) != v { + match = false + break + } + } + if match { + metrics = append(metrics, metric) + } + } + + if len(metrics) != 1 { + return nil, fmt.Errorf( + "expected to find 1 metric for %s{%s}, got %d", + metricName, + promLabels(labels), + len(metrics), + ) + } + + return &Metric{Metric: metrics[0]}, nil +} + +type promLabels map[string]string + +func (l promLabels) String() string { + r := "" + for k, v := range l { + r += fmt.Sprintf("%s=%q, ", k, v) + } + return r +} + +type Metric struct { + *dto.Metric +} + +// LabelValue returns the value of the label with the given name. If no such +// label is found, an empty string is returned. +func (m *Metric) LabelValue(name string) string { + for _, label := range m.Label { + if *label.Name == name { + return *label.Value + } + } + return "" +} + +func (m *Metric) MatchLabelValue(name, value string) error { + for _, label := range m.Label { + if *label.Name == name { + if *label.Value == value { + return nil + } else { + return fmt.Errorf( + "expected label %q to have value %q, got %q", + name, + value, + *label.Value, + ) + } + } + } + return fmt.Errorf("label %q not found", name) +} + +func (m *Metric) HasLabel(name string) bool { + for _, label := range m.Label { + if *label.Name == name { + return true + } + } + return false +} diff --git a/e2e/metrics/gitrepo_test.go b/e2e/metrics/gitrepo_test.go new file mode 100644 index 0000000000..cd92079a7c --- /dev/null +++ b/e2e/metrics/gitrepo_test.go @@ -0,0 +1,150 @@ +package metrics_test + +import ( + "fmt" + "math/rand" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/rancher/fleet/e2e/metrics" + "github.com/rancher/fleet/e2e/testenv" + "github.com/rancher/fleet/e2e/testenv/kubectl" +) + +var _ = Describe("GitRepo Metrics", Label("gitrepo"), func() { + const ( + objName = "metrics" + branch = "master" + ) + + var ( + // kw is the kubectl command for namespace the workload is deployed to + kw kubectl.Command + namespace string + ) + + BeforeEach(func() { + k = env.Kubectl.Namespace(env.Namespace) + namespace = testenv.NewNamespaceName( + objName, + rand.New(rand.NewSource(time.Now().UnixNano())), + ) + kw = k.Namespace(namespace) + + out, err := k.Create("ns", namespace) + Expect(err).ToNot(HaveOccurred(), out) + + err = testenv.CreateGitRepo( + kw, + namespace, + objName, + branch, + "simple-manifest", + ) + Expect(err).ToNot(HaveOccurred()) + + DeferCleanup(func() { + out, err = k.Delete("ns", namespace) + Expect(err).ToNot(HaveOccurred(), out) + }) + }) + + When("testing GitRepo metrics", func() { + gitrepoMetricNames := []string{ + "fleet_gitrepo_desired_ready_clusters", + "fleet_gitrepo_ready_clusters", + "fleet_gitrepo_resources_desired_ready", + "fleet_gitrepo_resources_missing", + "fleet_gitrepo_resources_modified", + "fleet_gitrepo_resources_not_ready", + "fleet_gitrepo_resources_orphaned", + "fleet_gitrepo_resources_ready", + "fleet_gitrepo_resources_unknown", + "fleet_gitrepo_resources_wait_applied", + } + + It("should have exactly one metric of each type for the gitrepo", func() { + et := metrics.NewExporterTest(metricsURL) + Eventually(func() error { + for _, metricName := range gitrepoMetricNames { + metric, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": objName, + "namespace": namespace, + }, + ) + if err != nil { + return err + } + Expect(metric.Gauge.GetValue()).To(Equal(float64(0))) + } + return nil + }).ShouldNot(HaveOccurred()) + }) + + Context("when the GitRepo is changed", func() { + It("it should not duplicate metrics if GitRepo is updated", func() { + et := metrics.NewExporterTest(metricsURL) + out, err := kw.Patch( + "gitrepo", objName, + "--type=json", + "-p", `[{"op": "replace", "path": "/spec/paths", "value": ["simple-chart"]}]`, + ) + Expect(err).ToNot(HaveOccurred(), out) + Expect(out).To(ContainSubstring("gitrepo.fleet.cattle.io/metrics patched")) + + // Wait for it to be changed and fetched. + Eventually(func() (string, error) { + return kw.Get("gitrepo", objName, "-o", "jsonpath={.status.commit}") + }).ShouldNot(BeEmpty()) + + var metric *metrics.Metric + // Expect still no metrics to be duplicated. + Eventually(func() error { + for _, metricName := range gitrepoMetricNames { + metric, err = et.FindOneMetric( + metricName, + map[string]string{ + "name": objName, + "namespace": namespace, + }, + ) + if err != nil { + return err + } + if metric.LabelValue("paths") != "simple-chart" { + return fmt.Errorf("path for metric %s unchanged", metricName) + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + + It("should not keep metrics if GitRepo is deleted", Label("gitrepo-delete"), func() { + et := metrics.NewExporterTest(metricsURL) + + out, err := kw.Delete("gitrepo", objName) + Expect(err).ToNot(HaveOccurred(), out) + + Eventually(func() error { + for _, metricName := range gitrepoMetricNames { + _, err := et.FindOneMetric( + metricName, + map[string]string{ + "name": objName, + "namespace": namespace, + }, + ) + if err == nil { + return fmt.Errorf("metric %s found", metricName) + } + } + return nil + }).ShouldNot(HaveOccurred()) + }) + }) + }) +}) diff --git a/e2e/metrics/suite_test.go b/e2e/metrics/suite_test.go new file mode 100644 index 0000000000..153c0ee871 --- /dev/null +++ b/e2e/metrics/suite_test.go @@ -0,0 +1,78 @@ +package metrics_test + +import ( + "fmt" + "math/rand" + "os" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/rancher/fleet/e2e/testenv" + "github.com/rancher/fleet/e2e/testenv/kubectl" +) + +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "E2E Suite for metrics") +} + +var ( + env *testenv.Env + // k is the kubectl command for the cluster registration namespace + k kubectl.Command + metricsURL string + loadBalancerName string +) + +func setupLoadBalancer() { + rs := rand.NewSource(time.Now().UnixNano()) + port := rs.Int63()%1000 + 30000 + loadBalancerName = testenv.AddRandomSuffix("fleetcontroller", rs) + + ks := k.Namespace("cattle-fleet-system") + err := testenv.ApplyTemplate( + ks, + testenv.AssetPath("metrics/fleetcontroller_service.yaml"), + map[string]interface{}{ + "Name": loadBalancerName, + "Port": port, + }, + ) + Expect(err).ToNot(HaveOccurred()) + + if os.Getenv("METRICS_URL") != "" { + metricsURL = os.Getenv("METRICS_URL") + } else { + Eventually(func() (string, error) { + ip, err := ks.Get( + "service", loadBalancerName, + "-o", "jsonpath={.status.loadBalancer.ingress[0].ip}", + ) + metricsURL = fmt.Sprintf("http://%s:%d/metrics", ip, port) + return ip, err + }).ShouldNot(BeEmpty()) + } +} + +func tearDownLoadBalancer() { + ks := k.Namespace("cattle-fleet-system") + out, err := ks.Delete("service", loadBalancerName) + Expect(err).ToNot(HaveOccurred(), out) +} + +var _ = BeforeSuite(func() { + SetDefaultEventuallyTimeout(time.Minute) + SetDefaultEventuallyPollingInterval(time.Second) + testenv.SetRoot("../..") + + setupLoadBalancer() + + env = testenv.New() +}) + +var _ = AfterSuite(func() { + tearDownLoadBalancer() +}) diff --git a/e2e/testenv/env.go b/e2e/testenv/env.go index f00317784c..013f8c02b8 100644 --- a/e2e/testenv/env.go +++ b/e2e/testenv/env.go @@ -65,3 +65,14 @@ func NewNamespaceName(name string, s rand.Source) string { } return fmt.Sprintf("test-%.20s-%.12s", name, hex.EncodeToString(p)) } + +// AddRandomSuffix adds a random suffix to a given name. +func AddRandomSuffix(name string, s rand.Source) string { + p := make([]byte, 6) + r := rand.New(s) // nolint:gosec // non-crypto usage + _, err := r.Read(p) + if err != nil { + panic(err) + } + return fmt.Sprintf("%s-%s", name, hex.EncodeToString(p)) +} diff --git a/e2e/testenv/kubectl/kubectl.go b/e2e/testenv/kubectl/kubectl.go index b5e0162ff9..2529be5101 100644 --- a/e2e/testenv/kubectl/kubectl.go +++ b/e2e/testenv/kubectl/kubectl.go @@ -103,3 +103,4 @@ func (c Command) exec(command string, args ...string) (string, error) { err := cmd.Run() return b.String(), err } + diff --git a/e2e/testenv/template.go b/e2e/testenv/template.go index 2974369812..819b32e945 100644 --- a/e2e/testenv/template.go +++ b/e2e/testenv/template.go @@ -15,6 +15,7 @@ import ( ) const gitrepoTemplate = "gitrepo-template.yaml" +const clustergroupTemplate = "clustergroup-template.yaml" // GitRepoData can be used with the gitrepo-template.yaml asset when no custom // GitRepo properties are required. All fields are required. @@ -35,12 +36,28 @@ func CreateGitRepo(k kubectl.Command, namespace string, name string, branch stri }) } +// CreateClusterGroup uses the template to create a clustergroup resource. +func CreateClusterGroup( + k kubectl.Command, + namespace, + name string, + labels map[string]string, +) error { + return ApplyTemplate(k, AssetPath(clustergroupTemplate), map[string]interface{}{ + "Name": name, + "Namespace": namespace, + "MatchLabels": labels, + }) +} + // ApplyTemplate templates a file and applies it to the cluster. func ApplyTemplate(k kubectl.Command, asset string, data interface{}) error { tmpdir, _ := os.MkdirTemp("", "fleet-") defer os.RemoveAll(tmpdir) - output := path.Join(tmpdir, RandomFilename(asset, rand.New(rand.NewSource(ginkgo.GinkgoRandomSeed())))) // nolint:gosec // test code + output := path.Join( + tmpdir, RandomFilename(asset, rand.New(rand.NewSource(ginkgo.GinkgoRandomSeed()))), + ) // nolint:gosec // test code if err := Template(output, AssetPath(asset), data); err != nil { return err } diff --git a/go.mod b/go.mod index b977d8ae36..dd1bc5739a 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,9 @@ require ( github.com/onsi/gomega v1.32.0 github.com/otiai10/copy v1.14.0 github.com/pkg/errors v0.9.1 + github.com/prometheus/client_golang v1.19.0 + github.com/prometheus/client_model v0.6.0 + github.com/prometheus/common v0.48.0 github.com/rancher/fleet/pkg/apis v0.10.0-rc.4 github.com/rancher/lasso v0.0.0-20240325194215-0064abcb8aee github.com/rancher/wrangler/v2 v2.1.4 @@ -185,9 +188,6 @@ require ( github.com/pjbgf/sha1cd v0.3.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect - github.com/prometheus/client_golang v1.19.0 // indirect - github.com/prometheus/client_model v0.6.0 // indirect - github.com/prometheus/common v0.48.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect diff --git a/internal/cmd/controller/operator.go b/internal/cmd/controller/operator.go index 064bd05c87..9c453edb45 100644 --- a/internal/cmd/controller/operator.go +++ b/internal/cmd/controller/operator.go @@ -8,6 +8,7 @@ import ( "github.com/rancher/fleet/internal/cmd/controller/reconciler" "github.com/rancher/fleet/internal/cmd/controller/target" "github.com/rancher/fleet/internal/manifest" + "github.com/rancher/fleet/internal/metrics" "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" "k8s.io/apimachinery/pkg/runtime" @@ -29,12 +30,31 @@ func init() { //+kubebuilder:scaffold:scheme } -func start(ctx context.Context, systemNamespace string, config *rest.Config, leaderOpts LeaderElectionOptions, bindAddresses BindAddresses, disableGitops bool) error { - setupLog.Info("listening for changes on local cluster", "disableGitops", disableGitops) +func start( + ctx context.Context, + systemNamespace string, + config *rest.Config, + leaderOpts LeaderElectionOptions, + bindAddresses BindAddresses, + disableGitops bool, + disableMetrics bool, +) error { + setupLog.Info("listening for changes on local cluster", + "disableGitops", disableGitops, + "disableMetrics", disableMetrics, + ) + + var metricServerOptions metricsserver.Options + if disableMetrics { + metricServerOptions = metricsserver.Options{BindAddress: "0"} + } else { + metricServerOptions = metricsserver.Options{BindAddress: bindAddresses.Metrics} + metrics.RegisterMetrics() // enable fleet related metrics + } mgr, err := ctrl.NewManager(config, ctrl.Options{ Scheme: scheme, - Metrics: metricsserver.Options{BindAddress: bindAddresses.Metrics}, + Metrics: metricServerOptions, HealthProbeBindAddress: bindAddresses.HealthProbe, LeaderElection: true, diff --git a/internal/cmd/controller/reconciler/bundle_controller.go b/internal/cmd/controller/reconciler/bundle_controller.go index e57a172e86..40fb68646d 100644 --- a/internal/cmd/controller/reconciler/bundle_controller.go +++ b/internal/cmd/controller/reconciler/bundle_controller.go @@ -8,6 +8,7 @@ import ( "github.com/rancher/fleet/internal/cmd/controller/summary" "github.com/rancher/fleet/internal/cmd/controller/target" "github.com/rancher/fleet/internal/manifest" + "github.com/rancher/fleet/internal/metrics" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -60,6 +61,8 @@ func (r *BundleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr bundle := &fleet.Bundle{} err := r.Get(ctx, req.NamespacedName, bundle) if apierrors.IsNotFound(err) { + metrics.BundleCollector.Delete(req.Name, req.Namespace) + logger.V(1).Info("Bundle not found, purging bundle deployments") if err := purgeBundleDeployments(ctx, r.Client, req.NamespacedName); err != nil { return ctrl.Result{}, err @@ -111,18 +114,21 @@ func (r *BundleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr if err := resetStatus(&bundle.Status, matchedTargets); err != nil { updateDisplay(&bundle.Status) + metrics.BundleCollector.Collect(bundle) return ctrl.Result{}, err } // this will add the defaults for a new bundledeployment if err := target.UpdatePartitions(&bundle.Status, matchedTargets); err != nil { updateDisplay(&bundle.Status) + metrics.BundleCollector.Collect(bundle) return ctrl.Result{}, err } if bundle.Status.ObservedGeneration != bundle.Generation { if err := setResourceKey(context.Background(), &bundle.Status, bundle, manifest, r.isNamespaced); err != nil { updateDisplay(&bundle.Status) + metrics.BundleCollector.Collect(bundle) return ctrl.Result{}, err } } @@ -159,6 +165,7 @@ func (r *BundleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr } updateDisplay(&bundle.Status) + metrics.BundleCollector.Collect(bundle) err = retry.RetryOnConflict(retry.DefaultRetry, func() error { t := &fleet.Bundle{} err := r.Get(ctx, req.NamespacedName, t) diff --git a/internal/cmd/controller/reconciler/bundledeployment_controller.go b/internal/cmd/controller/reconciler/bundledeployment_controller.go index bc358f2ba8..cbd25c09c0 100644 --- a/internal/cmd/controller/reconciler/bundledeployment_controller.go +++ b/internal/cmd/controller/reconciler/bundledeployment_controller.go @@ -7,6 +7,7 @@ import ( "reflect" "github.com/rancher/fleet/internal/cmd/controller/summary" + "github.com/rancher/fleet/internal/metrics" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" "github.com/rancher/wrangler/v2/pkg/genericcondition" @@ -37,6 +38,7 @@ func (r *BundleDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req bd := &fleet.BundleDeployment{} err := r.Get(ctx, req.NamespacedName, bd) if err != nil { + metrics.BundleDeploymentCollector.Delete(req.Name, req.Namespace) return ctrl.Result{}, client.IgnoreNotFound(err) } // increased log level, this triggers a lot @@ -61,8 +63,9 @@ func (r *BundleDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req State: string(summary.GetDeploymentState(bd)), } + var t *fleet.BundleDeployment err = retry.RetryOnConflict(retry.DefaultRetry, func() error { - t := &fleet.BundleDeployment{} + t = &fleet.BundleDeployment{} err := r.Get(ctx, req.NamespacedName, t) if err != nil { return err @@ -72,6 +75,8 @@ func (r *BundleDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req }) if err != nil { logger.V(1).Error(err, "Reconcile failed final update to bundle deployment status", "status", bd.Status) + } else { + metrics.BundleDeploymentCollector.Collect(t) } return ctrl.Result{}, err diff --git a/internal/cmd/controller/reconciler/cluster_controller.go b/internal/cmd/controller/reconciler/cluster_controller.go index febdc444bd..96f4ebaa11 100644 --- a/internal/cmd/controller/reconciler/cluster_controller.go +++ b/internal/cmd/controller/reconciler/cluster_controller.go @@ -9,6 +9,7 @@ import ( "time" "github.com/rancher/fleet/internal/cmd/controller/summary" + "github.com/rancher/fleet/internal/metrics" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" "github.com/rancher/fleet/pkg/durations" "github.com/sirupsen/logrus" @@ -55,6 +56,7 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct cluster := &fleet.Cluster{} err := r.Get(ctx, req.NamespacedName, cluster) if apierrors.IsNotFound(err) { + metrics.ClusterCollector.Delete(req.Name, req.Namespace) return ctrl.Result{}, nil } else if err != nil { return ctrl.Result{}, err @@ -163,6 +165,8 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct logger.V(1).Error(err, "Reconcile failed final update to cluster status", "status", cluster.Status) } + metrics.ClusterCollector.Collect(cluster) + if allReady && cluster.Status.ResourceCounts.Ready != cluster.Status.ResourceCounts.DesiredReady { logrus.Debugf("Cluster %s/%s is not ready because not all gitrepos are ready: %d/%d, enqueue cluster again", cluster.Namespace, cluster.Name, cluster.Status.ResourceCounts.Ready, cluster.Status.ResourceCounts.DesiredReady) diff --git a/internal/cmd/controller/reconciler/clustergroup_controller.go b/internal/cmd/controller/reconciler/clustergroup_controller.go index aec0166dc9..0b5c3564b8 100644 --- a/internal/cmd/controller/reconciler/clustergroup_controller.go +++ b/internal/cmd/controller/reconciler/clustergroup_controller.go @@ -8,6 +8,7 @@ import ( "reflect" "strings" + "github.com/rancher/fleet/internal/metrics" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" "k8s.io/apimachinery/pkg/runtime" @@ -37,6 +38,7 @@ func (r *ClusterGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request group := &fleet.ClusterGroup{} err := r.Get(ctx, req.NamespacedName, group) if err != nil { + metrics.ClusterGroupCollector.Delete(req.Name, req.Namespace) return ctrl.Result{}, client.IgnoreNotFound(err) } logger.V(1).Info("Reconciling clustergroup, updating display status field", "oldDisplay", group.Status.Display) @@ -71,6 +73,8 @@ func (r *ClusterGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request }) if err != nil { logger.V(1).Error(err, "Reconcile failed final update to cluster group status", "status", group.Status) + } else { + metrics.ClusterGroupCollector.Collect(group) } return ctrl.Result{}, err diff --git a/internal/cmd/controller/reconciler/gitrepo_controller.go b/internal/cmd/controller/reconciler/gitrepo_controller.go index 7968056a7f..81f1d3cea7 100644 --- a/internal/cmd/controller/reconciler/gitrepo_controller.go +++ b/internal/cmd/controller/reconciler/gitrepo_controller.go @@ -9,6 +9,7 @@ import ( grutil "github.com/rancher/fleet/internal/cmd/controller/gitrepo" "github.com/rancher/fleet/internal/cmd/controller/imagescan" + "github.com/rancher/fleet/internal/metrics" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" "github.com/reugn/go-quartz/quartz" @@ -57,6 +58,9 @@ func (r *GitRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct // Clean up if apierrors.IsNotFound(err) { logger.V(1).Info("Gitrepo deleted, deleting bundle, image scans") + + metrics.GitRepoCollector.Delete(req.NamespacedName.Name, req.NamespacedName.Namespace) + if err := purgeBundles(ctx, r.Client, req.NamespacedName); err != nil { return ctrl.Result{}, err } @@ -70,6 +74,8 @@ func (r *GitRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, nil } + metrics.GitRepoCollector.Collect(gitrepo) + logger = logger.WithValues("commit", gitrepo.Status.Commit) logger.V(1).Info("Reconciling GitRepo", "lastAccepted", acceptedLastUpdate(gitrepo.Status.Conditions)) diff --git a/internal/cmd/controller/root.go b/internal/cmd/controller/root.go index 7219292f91..0aa1345383 100644 --- a/internal/cmd/controller/root.go +++ b/internal/cmd/controller/root.go @@ -31,9 +31,10 @@ import ( type FleetManager struct { command.DebugConfig - Kubeconfig string `usage:"Kubeconfig file"` - Namespace string `usage:"namespace to watch" default:"cattle-fleet-system" env:"NAMESPACE"` - DisableGitops bool `usage:"disable gitops components" name:"disable-gitops"` + Kubeconfig string `usage:"Kubeconfig file"` + Namespace string `usage:"namespace to watch" default:"cattle-fleet-system" env:"NAMESPACE"` + DisableGitops bool `usage:"disable gitops components" name:"disable-gitops"` + DisableMetrics bool `usage:"disable metrics" name:"disable-metrics"` } type LeaderElectionOptions struct { @@ -128,7 +129,15 @@ func (f *FleetManager) Run(cmd *cobra.Command, args []string) error { go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) // nolint:gosec // Debugging only }() - if err := start(ctx, f.Namespace, kubeconfig, leaderOpts, bindAddresses, f.DisableGitops); err != nil { + if err := start( + ctx, + f.Namespace, + kubeconfig, + leaderOpts, + bindAddresses, + f.DisableGitops, + f.DisableMetrics, + ); err != nil { return err } diff --git a/internal/cmd/controller/summary/summary.go b/internal/cmd/controller/summary/summary.go index 59c93a6dab..5584c0a21a 100644 --- a/internal/cmd/controller/summary/summary.go +++ b/internal/cmd/controller/summary/summary.go @@ -72,6 +72,8 @@ func IncrementResourceCounts(left *fleet.GitRepoResourceCounts, right fleet.GitR left.NotReady += right.NotReady } +// GetSummaryState returns the summary state of a bundle. The returned value is +// empty if the bundle is ready. func GetSummaryState(summary fleet.BundleSummary) fleet.BundleState { var state fleet.BundleState for _, nonReady := range summary.NonReadyResources { diff --git a/internal/metrics/bundle_metrics.go b/internal/metrics/bundle_metrics.go new file mode 100644 index 0000000000..98b4147ac5 --- /dev/null +++ b/internal/metrics/bundle_metrics.go @@ -0,0 +1,147 @@ +package metrics + +import ( + "fmt" + + "github.com/rancher/fleet/internal/cmd/controller/summary" + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + bundleSubsystem = "bundle" + bundleLabels = []string{"name", "namespace", "commit", "repo", "generation", "state"} + BundleCollector = CollectorCollection{ + subsystem: bundleSubsystem, + metrics: map[string]prometheus.Collector{ + "not_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "not_ready", + Help: "Number of deployments for a specific bundle in a not ready state.", + }, + bundleLabels, + ), + "wait_applied": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "wait_applied", + Help: "Number of deployments for a specific bundle in a wait applied state.", + }, + bundleLabels, + ), + "err_applied": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "err_applied", + Help: "Number of deployments for a specific bundle in an error applied state.", + }, + bundleLabels, + ), + "out_of_sync": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "out_of_sync", + Help: "Number of deployments for a specific bundle in an out of sync state.", + }, + bundleLabels, + ), + "modified": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "modified", + Help: "Number of deployments for a specific bundle in a modified state.", + }, + bundleLabels, + ), + "ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "ready", + Help: "Number of deployments for a specific bundle in a ready state.", + }, + bundleLabels, + ), + "pending": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "pending", + Help: "Number of deployments for a specific bundle in a pending state.", + }, + bundleLabels, + ), + "desired_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "desired_ready", + Help: "Number of deployments that are desired to be ready for a bundle.", + }, + bundleLabels, + ), + "state": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleSubsystem, + Name: "state", + Help: "Shows the state of this bundle based on the state label. A value of 1 is true 0 is false.", + }, + bundleLabels, + ), + }, + collector: collectBundleMetrics, + } +) + +func collectBundleMetrics(obj any, metrics map[string]prometheus.Collector) { + bundle, ok := obj.(*fleet.Bundle) + if !ok { + panic("unexpected object type") + } + + currentState := summary.GetSummaryState(bundle.Status.Summary) + labels := prometheus.Labels{ + "name": bundle.Name, + "namespace": bundle.Namespace, + "commit": bundle.ObjectMeta.Labels[commitLabel], + "repo": bundle.ObjectMeta.Labels[repoNameLabel], + "generation": fmt.Sprintf("%d", bundle.ObjectMeta.Generation), + "state": string(currentState), + } + + metrics["not_ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.NotReady)) + metrics["wait_applied"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.WaitApplied)) + metrics["err_applied"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.ErrApplied)) + metrics["out_of_sync"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.OutOfSync)) + metrics["modified"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.Modified)) + metrics["ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.Ready)) + metrics["pending"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.Pending)) + metrics["desired_ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(bundle.Status.Summary.DesiredReady)) + + for _, state := range bundleStates { + labels["state"] = string(state) + + if state == currentState { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(1) + } else { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(0) + } + } +} diff --git a/internal/metrics/bundledeployment_metrics.go b/internal/metrics/bundledeployment_metrics.go new file mode 100644 index 0000000000..92a87a7458 --- /dev/null +++ b/internal/metrics/bundledeployment_metrics.go @@ -0,0 +1,74 @@ +package metrics + +import ( + "fmt" + + "github.com/rancher/fleet/internal/cmd/controller/summary" + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + bundleDeploymentSubsystem = "bundledeployment" + bundleDeploymentLabels = []string{ + "name", + "namespace", + "cluster_name", + "cluster_namespace", + "repo", + "commit", + "bundle", + "bundle_namespace", + "generation", + "state", + } + BundleDeploymentCollector = CollectorCollection{ + subsystem: bundleDeploymentSubsystem, + metrics: map[string]prometheus.Collector{ + "state": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: bundleDeploymentSubsystem, + Name: "state", + Help: "Shows the state of this bundle deployment based on the state label. " + + "A value of 1 is true 0 is false.", + }, + bundleDeploymentLabels, + ), + }, + collector: collectBundleDeploymentMetrics, + } +) + +func collectBundleDeploymentMetrics(obj any, metrics map[string]prometheus.Collector) { + bundleDep, ok := obj.(*fleet.BundleDeployment) + if !ok { + panic("unexpected object type") + } + + currentState := summary.GetDeploymentState(bundleDep) + labels := prometheus.Labels{ + "name": bundleDep.Name, + "namespace": bundleDep.Namespace, + "cluster_name": bundleDep.ObjectMeta.Labels["fleet.cattle.io/cluster"], + "cluster_namespace": bundleDep.ObjectMeta.Labels["fleet.cattle.io/cluster-namespace"], + "repo": bundleDep.ObjectMeta.Labels[repoNameLabel], + "commit": bundleDep.ObjectMeta.Labels[commitLabel], + "bundle": bundleDep.ObjectMeta.Labels["fleet.cattle.io/bundle-name"], + "bundle_namespace": bundleDep.ObjectMeta.Labels["fleet.cattle.io/bundle-namespace"], + "generation": fmt.Sprintf("%d", bundleDep.ObjectMeta.Generation), + "state": string(currentState), + } + + for _, state := range bundleStates { + labels["state"] = string(state) + + if state == currentState { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(1) + } else { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(0) + } + } +} diff --git a/internal/metrics/cluster_metrics.go b/internal/metrics/cluster_metrics.go new file mode 100644 index 0000000000..89d29815cf --- /dev/null +++ b/internal/metrics/cluster_metrics.go @@ -0,0 +1,188 @@ +package metrics + +import ( + "fmt" + + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + clusterSubsystem = "cluster" + clusterLabels = []string{ + "name", + "namespace", + // The name as given per "management.cattle.io/cluster-name" label. This + // may but does not have to be different from `name` label and is added + // by Rancher. + "cluster_name", + "cluster_display_name", + "generation", + "state", + } + + clusterNameLabel = "management.cattle.io/cluster-name" + clusterDisplayNameLabel = "management.cattle.io/cluster-display-name" + clusterStates = []string{ + string(fleet.NotReady), + string(fleet.Ready), + "WaitCheckIn", + } + + ClusterCollector = NewCollectorCollection( + clusterSubsystem, + clusterMetrics, + collectClusterMetrics, + ) + + clusterMetrics = map[string]prometheus.Collector{ + "desired_ready_git_repos": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "desired_ready_git_repos", + Help: "The desired number of GitRepos to be in a ready state.", + }, + clusterLabels, + ), + "ready_git_repos": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "ready_git_repos", + Help: "The number of GitRepos in a ready state.", + }, + clusterLabels, + ), + "resources_count_desiredready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_desiredready", + Help: "The number of resources for the given cluster desired to be in the Ready state.", + }, + clusterLabels, + ), + "resources_count_missing": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_missing", + Help: "The number of resources in the Missing state.", + }, + clusterLabels, + ), + "resources_count_modified": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_modified", + Help: "The number of resources in the Modified state.", + }, + clusterLabels, + ), + "resources_count_notready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_notready", + Help: "The number of resources in the NotReady state.", + }, + clusterLabels, + ), + "resources_count_orphaned": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_orphaned", + Help: "The number of resources in the Orphaned state.", + }, + clusterLabels, + ), + "resources_count_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_ready", + Help: "The number of resources in the Ready state.", + }, + clusterLabels, + ), + "resources_count_unknown": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_unknown", + Help: "The number of resources in the Unknown state.", + }, + clusterLabels, + ), + "resources_count_waitapplied": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "resources_count_waitapplied", + Help: "The number of resources in the WaitApplied state.", + }, + clusterLabels, + ), + "state": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterSubsystem, + Name: "state", + Help: "The current state of a given cluster", + }, + clusterLabels, + ), + } +) + +func collectClusterMetrics(obj any, metrics map[string]prometheus.Collector) { + cluster, ok := obj.(*fleet.Cluster) + if !ok { + panic("unexpected object type") + } + + labels := prometheus.Labels{ + "name": cluster.Name, + "namespace": cluster.Namespace, + "cluster_name": cluster.ObjectMeta.Labels[clusterNameLabel], + "cluster_display_name": cluster.ObjectMeta.Labels[clusterDisplayNameLabel], + "generation": fmt.Sprintf("%d", cluster.ObjectMeta.Generation), + "state": cluster.Status.Display.State, + } + + metrics["desired_ready_git_repos"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.DesiredReadyGitRepos)) + metrics["ready_git_repos"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ReadyGitRepos)) + metrics["resources_count_desiredready"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.DesiredReady)) + metrics["resources_count_missing"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.Missing)) + metrics["resources_count_modified"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.Modified)) + metrics["resources_count_notready"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.NotReady)) + metrics["resources_count_orphaned"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.Orphaned)) + metrics["resources_count_ready"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.Ready)) + metrics["resources_count_unknown"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.Unknown)) + metrics["resources_count_waitapplied"].(*prometheus.GaugeVec). + With(labels).Set(float64(cluster.Status.ResourceCounts.WaitApplied)) + + for _, state := range clusterStates { + labels["state"] = state + + if state == cluster.Status.Display.State { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(1) + } else { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(0) + } + } +} diff --git a/internal/metrics/clustergroup_metrics.go b/internal/metrics/clustergroup_metrics.go new file mode 100644 index 0000000000..2853d255d0 --- /dev/null +++ b/internal/metrics/clustergroup_metrics.go @@ -0,0 +1,192 @@ +package metrics + +import ( + "fmt" + + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + clusterGroupSubsystem = "cluster_group" + clusterGroupLabels = []string{"name", "namespace", "generation", "state"} + clusterGroupStates = []string{ + string(fleet.NotReady), + string(fleet.Ready), + } + ClusterGroupCollector = NewCollectorCollection( + clusterGroupSubsystem, + clusterGroupMetrics, + collectClusterGroupMetrics, + ) + clusterGroupMetrics = map[string]prometheus.Collector{ + "cluster_count": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "cluster_count", + Help: "The count of clusters in this cluster group.", + }, + clusterGroupLabels, + ), + "non_ready_cluster_count": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "non_ready_cluster_count", + Help: "The count of non ready clusters in this cluster group.", + }, + clusterGroupLabels, + ), + "resource_count_desired_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_desired_ready", + Help: "The count of resources that are desired to be in the Ready state.", + }, + clusterGroupLabels, + ), + "resource_count_missing": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_missing", + Help: "The count of resources that are in a Missing state.", + }, + clusterGroupLabels, + ), + "resource_count_modified": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_modified", + Help: "The count of resources that are in a Modified state.", + }, + clusterGroupLabels, + ), + "resource_count_notready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_notready", + Help: "The count of resources that are in a NotReady state.", + }, + clusterGroupLabels, + ), + "resource_count_orphaned": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_orphaned", + Help: "The count of resources that are in an Orphaned state.", + }, + clusterGroupLabels, + ), + "resource_count_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_ready", + Help: "The count of resources that are in a Ready state.", + }, + clusterGroupLabels, + ), + "resource_count_unknown": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_unknown", + Help: "The count of resources that are in an Unknown state.", + }, + clusterGroupLabels, + ), + "resource_count_waitapplied": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "resource_count_waitapplied", + Help: "The count of resources that are in a WaitApplied state.", + }, + clusterGroupLabels, + ), + "bundle_desired_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "bundle_desired_ready", + Help: "The count of bundles that are desired to be in a Ready state.", + }, + clusterGroupLabels, + ), + "bundle_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "bundle_ready", + Help: "The count of bundles that are in a Ready state in the Cluster Group.", + }, + clusterGroupLabels, + ), + "state": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: clusterGroupSubsystem, + Name: "state", + Help: "The current state of a given cluster group.", + }, + clusterGroupLabels, + ), + } +) + +func collectClusterGroupMetrics(obj any, metrics map[string]prometheus.Collector) { + clusterGroup, ok := obj.(*fleet.ClusterGroup) + if !ok { + panic("unexpected object type") + } + + labels := prometheus.Labels{ + "name": clusterGroup.Name, + "namespace": clusterGroup.Namespace, + "generation": fmt.Sprintf("%d", clusterGroup.ObjectMeta.Generation), + "state": clusterGroup.Status.Display.State, + } + + metrics["cluster_count"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ClusterCount)) + metrics["non_ready_cluster_count"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.NonReadyClusterCount)) + metrics["resource_count_desired_ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.DesiredReady)) + metrics["resource_count_missing"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.Missing)) + metrics["resource_count_modified"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.Modified)) + metrics["resource_count_notready"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.NotReady)) + metrics["resource_count_orphaned"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.Orphaned)) + metrics["resource_count_ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.Ready)) + metrics["resource_count_unknown"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.Unknown)) + metrics["resource_count_waitapplied"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.ResourceCounts.WaitApplied)) + metrics["bundle_desired_ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.Summary.DesiredReady)) + metrics["bundle_ready"].(*prometheus.GaugeVec).With(labels). + Set(float64(clusterGroup.Status.Summary.Ready)) + + for _, state := range clusterGroupStates { + labels["state"] = state + + if state == clusterGroup.Status.Display.State { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(1) + } else { + metrics["state"].(*prometheus.GaugeVec).With(labels).Set(0) + } + } +} diff --git a/internal/metrics/gitrepo_metrics.go b/internal/metrics/gitrepo_metrics.go new file mode 100644 index 0000000000..551bfe73fc --- /dev/null +++ b/internal/metrics/gitrepo_metrics.go @@ -0,0 +1,149 @@ +package metrics + +import ( + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" +) + +var ( + gitRepoSubsystem = "gitrepo" + gitRepoLabels = []string{"name", "namespace", "repo", "branch", "paths"} + GitRepoCollector = NewCollectorCollection( + gitRepoSubsystem, + gitRepoMetrics, + collectGitRepoMetrics, + ) + gitRepoMetrics = map[string]prometheus.Collector{ + "resources_desired_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_desired_ready", + Help: "The count of resources that are desired to be in a Ready state.", + }, + gitRepoLabels, + ), + "resources_missing": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_missing", + Help: "The count of resources that are in a Missing state.", + }, + gitRepoLabels, + ), + "resources_modified": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_modified", + Help: "The count of resources that are in a Modified state.", + }, + gitRepoLabels, + ), + "resources_not_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_not_ready", + Help: "The count of resources that are in a NotReady state.", + }, + gitRepoLabels, + ), + "resources_orphaned": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_orphaned", + Help: "The count of resources that are in an Orphaned state.", + }, + gitRepoLabels, + ), + "resources_ready": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_ready", + Help: "The count of resources that are in a Ready state.", + }, + gitRepoLabels, + ), + "resources_unknown": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_unknown", + Help: "The count of resources that are in an Unknown state.", + }, + gitRepoLabels, + ), + "resources_wait_applied": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "resources_wait_applied", + Help: "The count of resources that are in a WaitApplied state.", + }, + gitRepoLabels, + ), + "desired_ready_clusters": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "desired_ready_clusters", + Help: "The amount of clusters desired to be in a ready state.", + }, + gitRepoLabels, + ), + "ready_clusters": promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricPrefix, + Subsystem: gitRepoSubsystem, + Name: "ready_clusters", + Help: "The count of cluster in a Ready state.", + }, + gitRepoLabels, + ), + } + collectGitRepoMetrics = func( + obj any, + metrics map[string]prometheus.Collector, + ) { + gitrepo, ok := obj.(*fleet.GitRepo) + if !ok { + panic("unexpected object type") + } + + labels := prometheus.Labels{ + "name": gitrepo.Name, + "namespace": gitrepo.Namespace, + "repo": gitrepo.Spec.Repo, + "branch": gitrepo.Spec.Branch, + "paths": strings.Join(gitrepo.Spec.Paths, ";"), + } + + metrics["desired_ready_clusters"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.DesiredReadyClusters)) + metrics["ready_clusters"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ReadyClusters)) + metrics["resources_missing"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.Missing)) + metrics["resources_modified"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.Modified)) + metrics["resources_not_ready"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.NotReady)) + metrics["resources_orphaned"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.Orphaned)) + metrics["resources_desired_ready"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.DesiredReady)) + metrics["resources_ready"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.Ready)) + metrics["resources_unknown"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.Unknown)) + metrics["resources_wait_applied"].(*prometheus.GaugeVec). + With(labels).Set(float64(gitrepo.Status.ResourceCounts.WaitApplied)) + } +) diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000000..567e06db38 --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,108 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/metrics" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + metricPrefix = "fleet" + commitLabel = "fleet.cattle.io/commit" + repoNameLabel = "fleet.cattle.io/repo-name" +) + +var ( + bundleStates = []fleet.BundleState{ + fleet.Ready, + fleet.NotReady, + fleet.Pending, + fleet.OutOfSync, + fleet.Modified, + fleet.WaitApplied, + fleet.ErrApplied, + } + enabled = false +) + +func RegisterMetrics() { + enabled = true + + GitRepoCollector.Register() + ClusterCollector.Register() + ClusterGroupCollector.Register() + BundleCollector.Register() + BundleDeploymentCollector.Register() +} + +// CollectorCollection implements the generic methods `Delete` and `Register` +// for a collection of Prometheus collectors. It is used to manage the lifecycle +// of a collection of Prometheus collectors. +type CollectorCollection struct { + subsystem string + metrics map[string]prometheus.Collector + collector func(obj any, metrics map[string]prometheus.Collector) +} + +// Collect collects the metrics for the given object. It deletes the metrics for +// the object if they already exist and then collects the metrics for the +// object. +// +// The metrics need to be deleted because the values of the metrics may have +// changed and this would create a new instance of those metrics, keeping the +// old one around. Metrics are deleted by their name and namespace label values. +func (c *CollectorCollection) Collect(obj metav1.ObjectMetaAccessor) { + if !enabled { + return + } + c.Delete(obj.GetObjectMeta().GetName(), obj.GetObjectMeta().GetNamespace()) + c.collector(obj, c.metrics) +} + +// NewCollectorCollection creates a new CollectorCollection with the given +// subsystem and metrics. It registers the metrics with the Prometheus registry. +func NewCollectorCollection( + subsystem string, + metrics map[string]prometheus.Collector, + collector func(obj any, metrics map[string]prometheus.Collector), +) *CollectorCollection { + cc := &CollectorCollection{ + subsystem: subsystem, + metrics: metrics, + collector: collector, + } + // cc.Register() + return cc +} + +// Delete deletes the metric with the given name and namespace labels. It +// returns the number of metrics deleted. It does a DeletePartialMatch on the +// metric with the given name and namespace labels. +func (c *CollectorCollection) Delete(name, namespace string) (deleted int) { + identityLabels := prometheus.Labels{ + "name": name, + "namespace": namespace, + } + for _, collector := range c.metrics { + switch metric := collector.(type) { + case *prometheus.MetricVec: + deleted += metric.DeletePartialMatch(identityLabels) + case *prometheus.CounterVec: + deleted += metric.DeletePartialMatch(identityLabels) + case *prometheus.GaugeVec: + deleted += metric.DeletePartialMatch(identityLabels) + default: + panic("unexpected metric type") + } + } + + return deleted +} + +func (c *CollectorCollection) Register() { + for _, metric := range c.metrics { + metrics.Registry.MustRegister(metric) + } +}