Skip to content

Commit

Permalink
[SURE-4340] Add Prometheus Metrics (#2172)
Browse files Browse the repository at this point in the history
* Add Prometheus metrics to fleet-controller

Refers to #2172, SURE-4340

Expose Prometheus metrics of the fleet-controller for the following
controllers:

- GitRepo
- Bundle
- BundleDeployment
- Cluster
- ClusterGroup
  • Loading branch information
p-se authored Apr 23, 2024
1 parent 8dc4d77 commit d5d1b44
Show file tree
Hide file tree
Showing 32 changed files with 2,017 additions and 15 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/e2e-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,21 +106,24 @@ jobs:
# 1. Run test cases not needing infra
ginkgo --label-filter='!infra-setup' e2e/single-cluster e2e/keep-resources
# 2. Run tests requiring only the git server
# 2. Run tests for metrics
ginkgo e2e/metrics
# 3. Run tests requiring only the git server
e2e/testenv/infra/infra setup --git-server=true
ginkgo --label-filter='infra-setup && !helm-registry && !oci-registry' e2e/single-cluster/
# 3. Run tests requiring a Helm registry
# 4. Run tests requiring a Helm registry
e2e/testenv/infra/infra setup --helm-registry=true
ginkgo --label-filter='helm-registry' e2e/single-cluster
e2e/testenv/infra/infra teardown --helm-registry=true
# 4. Run tests requiring an OCI registry
# 5. Run tests requiring an OCI registry
e2e/testenv/infra/infra setup --oci-registry=true
ginkgo --label-filter='oci-registry' e2e/single-cluster
# 5. Tear down all infra
# 6. Tear down all infra
e2e/testenv/infra/infra teardown
-
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ e2e/testenv/infra/infra
^fleet$
FleetCI-RootCA
.envrc
env.multi-cluster
env.single-cluster
11 changes: 11 additions & 0 deletions charts/fleet/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ spec:
labels:
app: fleet-controller
shard: "{{ . }}"
{{- if empty . }}
shard-default: "true"
{{- end }}
spec:
containers:
- env:
Expand Down Expand Up @@ -64,6 +67,11 @@ spec:
image: '{{ template "system_default_registry" $ }}{{ $.Values.image.repository }}:{{ $.Values.image.tag }}'
name: fleet-controller
imagePullPolicy: "{{ $.Values.image.imagePullPolicy }}"
{{- if $.Values.metrics.enabled }}
ports:
- containerPort: 8080
name: metrics
{{- end }}
command:
- fleetcontroller
{{- if not $.Values.gitops.enabled }}
Expand All @@ -73,6 +81,9 @@ spec:
- --shard-id
- {{ quote . }}
{{- end }}
{{- if not $.Values.metrics.enabled }}
- --disable-metrics
{{- end }}
{{- if $.Values.debug }}
- --debug
- --debug-level
Expand Down
17 changes: 17 additions & 0 deletions charts/fleet/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{- if .Values.metrics.enabled }}
apiVersion: v1
kind: Service
metadata:
name: monitoring-fleet-controller
labels:
app: fleet-controller
spec:
type: ClusterIP
ports:
- port: 8080
targetPort: 8080
protocol: TCP
name: metrics
selector:
app: fleet-controller
{{- end }}
3 changes: 3 additions & 0 deletions charts/fleet/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ priorityClassName: ""
gitops:
enabled: true

metrics:
enabled: true

debug: false
debugLevel: 0
propagateDebugSettingsToAgents: true
Expand Down
11 changes: 11 additions & 0 deletions e2e/assets/clustergroup-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: fleet.cattle.io/v1alpha1
kind: ClusterGroup
metadata:
name: {{ .Name }}
namespace: {{ .Namespace }}
spec:
selector:
matchLabels:
{{- range $key, $value := .MatchLabels}}
{{$key}}: {{$value}}
{{- end}}
13 changes: 13 additions & 0 deletions e2e/assets/metrics/fleetcontroller_service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: {{ .Name }}
spec:
selector:
app: fleet-controller
shard-default: "true"
ports:
- protocol: TCP
port: {{ .Port }}
targetPort: metrics
type: LoadBalancer
181 changes: 181 additions & 0 deletions e2e/metrics/bundle_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package metrics_test

import (
"fmt"
"maps"
"math/rand"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"github.com/rancher/fleet/e2e/metrics"
"github.com/rancher/fleet/e2e/testenv"
"github.com/rancher/fleet/e2e/testenv/kubectl"
fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"
)

var _ = Describe("Bundle Metrics", Label("bundle"), func() {
const (
gitRepoName = "metrics"
branch = "master"
)

var (
// kw is the kubectl command for namespace the workload is deployed to
kw kubectl.Command
namespace string
)

BeforeEach(func() {
k = env.Kubectl.Namespace(env.Namespace)
namespace = testenv.NewNamespaceName(
gitRepoName,
rand.New(rand.NewSource(time.Now().UnixNano())),
)
kw = k.Namespace(namespace)

out, err := k.Create("ns", namespace)
Expect(err).ToNot(HaveOccurred(), out)

err = testenv.CreateGitRepo(
kw,
namespace,
gitRepoName,
branch,
"simple-manifest",
)
Expect(err).ToNot(HaveOccurred())

DeferCleanup(func() {
out, err = k.Delete("ns", namespace)
Expect(err).ToNot(HaveOccurred(), out)
})
})

When("collecting Bundle metrics", func() {
bundleMetricNames := map[string]map[string][]string{
"fleet_bundle_desired_ready": {},
"fleet_bundle_err_applied": {},
"fleet_bundle_modified": {},
"fleet_bundle_not_ready": {},
"fleet_bundle_out_of_sync": {},
"fleet_bundle_pending": {},
"fleet_bundle_ready": {},
"fleet_bundle_wait_applied": {},
"fleet_bundle_state": {
"state": []string{
string(fleet.Ready),
string(fleet.NotReady),
string(fleet.WaitApplied),
string(fleet.ErrApplied),
string(fleet.OutOfSync),
string(fleet.Pending),
string(fleet.Modified),
},
},
}

// checkMetrics checks that the metrics exist or not exist. Custom
// checks can be added by passing a function to check. This can be used
// to check for the value of the metrics.
checkMetrics := func(
gitRepoName string,
expectExists bool,
check func(metric *metrics.Metric) error,
) func() error {
return func() error {
et := metrics.NewExporterTest(metricsURL)
expectOne := func(metricName string, labels map[string]string) error {
metric, err := et.FindOneMetric(metricName, labels)
if expectExists && err != nil {
return err
} else if !expectExists && err == nil {
return fmt.Errorf("metric %s found but not expected", metricName)
}

if check != nil {
err = check(metric)
if err != nil {
return err
}
}
return nil
}

for metricName, matchLabels := range bundleMetricNames {
identityLabels := map[string]string{
"name": gitRepoName,
"namespace": namespace,
}
labels := map[string]string{}
maps.Copy(labels, identityLabels)

if len(matchLabels) > 0 {
for labelName, labelValues := range matchLabels {
for _, labelValue := range labelValues {
labels[labelName] = labelValue
err := expectOne(metricName, labels)
if err != nil {
return err
}
}
}
} else {
err := expectOne(metricName, labels)
if err != nil {
return err
}
}
}
return nil
}
}

It("should have one metric for each specified metric and label value", func() {
Eventually(checkMetrics(gitRepoName+"-simple-manifest", true, func(metric *metrics.Metric) error {
// No cluster exists in the namespace where our GitRepo has been deployed, hence
// we expect the values of the metrics to be 0.
if value := metric.Gauge.GetValue(); value != float64(0) {
return fmt.Errorf("unexpected metric value: expected 0, found %f", value)
}
return nil
})).ShouldNot(HaveOccurred())
})

When("the GitRepo (and therefore Bundle) is changed", Label("bundle-modified"), func() {
It("should not duplicate metrics if Bundle is updated", Label("bundle-update"), func() {
// et := metrics.NewExporterTest(metricsURL)
out, err := kw.Patch(
"gitrepo", gitRepoName,
"--type=json",
"-p", `[{"op": "replace", "path": "/spec/paths", "value": ["simple-chart"]}]`,
)
Expect(err).ToNot(HaveOccurred(), out)
Expect(out).To(ContainSubstring("gitrepo.fleet.cattle.io/metrics patched"))

Eventually(checkMetrics(gitRepoName+"-simple-chart", true, func(metric *metrics.Metric) error {
if metric.LabelValue("paths") == "simple-manifest" {
return fmt.Errorf("path for metric %s unchanged", metric.Metric.String())
}
return nil
})).ShouldNot(HaveOccurred())
})

It("should not keep metrics if Bundle is deleted", Label("bundle-delete"), func() {
gitRepoName := gitRepoName + "-simple-manifest"

var (
out string
err error
)
Eventually(func() error {
out, err = kw.Delete("bundle", gitRepoName)
return err
}).ShouldNot(HaveOccurred(), out)

Eventually(checkMetrics(gitRepoName, false, nil)).ShouldNot(HaveOccurred())
})
})
})
})
Loading

0 comments on commit d5d1b44

Please sign in to comment.