Skip to content

Commit

Permalink
metrics: Refactor metrics label filter logic
Browse files Browse the repository at this point in the history
Tetragon has a functionality to disable some of the high-cardinality metrics
labels. So far it worked like this:
1. Configure option.Config.MetricsLabelFilter as a set (map[string]interface{})
   of enabled labels.
2. metrics.granularLabelFilter wraps enabled labels and configurable labels in
   one global variable.
3. Define a metric using a "granular metric" wrapper, which uses
   granularLabelFilter.
4. When the metric is registered, the inner Prometheus metric is registered
   with the enabled labels only.
5. Then we update the metric like a regular Prometheus metric, using
   WithLabelValues.

Refactor this logic so that now it works like this:
1. Configure MetricsLabelFilter similarly like before, but as map[string]bool
2. metrics.ProcessLabels struct implements metrics.FilteredLabels interface and
   contains label values of configurable labels. Disabled labels are set to ""
   when the struct is instantiated.
3. Define a metric using a "granular metric" wrapper, which now uses Go generics
   to specify configurable labels (FilteredMetrics type).
4. The inner Prometheus metric is now registered with all labels.
5. Then we update the metric using a slightly different implementation of
   WithLabelValues, which takes a generic FileredLabels struct and any
   additional label values as strings.

This approach provides better types, so that it's not as easy for developers to
make a mistake and pass incorrect labels when updating a metric. Additionally,
it makes it easy to define a different set of configurable labels (another
FilteredMetrics type) and to implement dynamic metrics labels configuration (as
it doesn't require re-registering metrics when the filter changes).

Signed-off-by: Anna Kapuscinska <anna@isovalent.com>
  • Loading branch information
lambdanis committed Apr 15, 2024
1 parent 6bdd2c4 commit dd68c2a
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 264 deletions.
13 changes: 11 additions & 2 deletions pkg/metrics/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,20 @@ package consts

const MetricsNamespace = "tetragon"

var KnownMetricLabelFilters = []string{"namespace", "workload", "pod", "binary"}
var DefaultProcessLabels = map[string]bool{
"namespace": true,
"workload": true,
"pod": true,
"binary": true,
}

var (
ExamplePolicyLabel = "example-tracingpolicy"
ExampleKprobeLabel = "example_kprobe"
ExampleSyscallLabel = "example_syscall"
ExampleProcessLabels = []string{"example-namespace", "example-workload", "example-pod", "example-binary"}
ExampleNamespace = "example-namespace"
ExampleWorkload = "example-workload"
ExamplePod = "example-pod"
ExampleBinary = "example-binary"
ExampleProcessLabels = []string{ExampleNamespace, ExampleWorkload, ExamplePod, ExampleBinary}
)
17 changes: 8 additions & 9 deletions pkg/metrics/eventmetrics/eventmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
package eventmetrics

import (
"slices"

"github.com/cilium/tetragon/api/v1/tetragon"
"github.com/cilium/tetragon/api/v1/tetragon/codegen/helpers"
"github.com/cilium/tetragon/pkg/api/processapi"
Expand All @@ -22,7 +20,7 @@ import (
)

var (
EventsProcessed = metrics.MustNewGranularCounter(prometheus.CounterOpts{
EventsProcessed = metrics.MustNewGranularCounter[metrics.ProcessLabels](prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "events_total",
Help: "The total number of Tetragon events",
Expand All @@ -46,7 +44,7 @@ var (
ConstLabels: nil,
})

policyStats = metrics.MustNewGranularCounter(prometheus.CounterOpts{
policyStats = metrics.MustNewGranularCounter[metrics.ProcessLabels](prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "policy_events_total",
Help: "Policy events calls observed.",
Expand Down Expand Up @@ -78,12 +76,13 @@ func InitEventsMetricsForDocs(registry *prometheus.Registry) {
InitEventsMetrics(registry)

// Initialize metrics with example labels
processLabels := metrics.NewProcessLabels(consts.ExampleNamespace, consts.ExampleWorkload, consts.ExamplePod, consts.ExampleBinary)
for ev, evString := range tetragon.EventType_name {
if tetragon.EventType(ev) != tetragon.EventType_UNDEF && tetragon.EventType(ev) != tetragon.EventType_TEST {
EventsProcessed.WithLabelValues(slices.Concat([]string{evString}, consts.ExampleProcessLabels)...).Add(0)
EventsProcessed.WithLabelValues(processLabels, evString).Add(0)
}
}
policyStats.WithLabelValues(slices.Concat([]string{consts.ExamplePolicyLabel, consts.ExampleKprobeLabel}, consts.ExampleProcessLabels)...).Add(0)
policyStats.WithLabelValues(processLabels, consts.ExamplePolicyLabel, consts.ExampleKprobeLabel).Add(0)
}

func GetProcessInfo(process *tetragon.Process) (binary, pod, workload, namespace string) {
Expand Down Expand Up @@ -125,11 +124,11 @@ func handleProcessedEvent(pInfo *tracingpolicy.PolicyInfo, processedEvent interf
default:
eventType = "unknown"
}
EventsProcessed.WithLabelValues(eventType, namespace, workload, pod, binary).Inc()
processLabels := metrics.NewProcessLabels(namespace, workload, pod, binary)
EventsProcessed.WithLabelValues(processLabels, eventType).Inc()
if pInfo != nil && pInfo.Name != "" {
policyStats.
WithLabelValues(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary).
Inc()
WithLabelValues(processLabels, pInfo.Name, pInfo.Hook).Inc()
}
}

Expand Down
49 changes: 49 additions & 0 deletions pkg/metrics/filteredlabels.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package metrics

import (
"github.com/cilium/tetragon/pkg/option"
)

type FilteredLabels interface {
Keys() []string
Values() []string
}

type ProcessLabels struct {
Namespace string
Workload string
Pod string
Binary string
}

func NewProcessLabels(namespace, workload, pod, binary string) *ProcessLabels {
if !option.Config.MetricsLabelFilter["namespace"] {
namespace = ""
}
if !option.Config.MetricsLabelFilter["workload"] {
workload = ""
}
if !option.Config.MetricsLabelFilter["pod"] {
pod = ""
}
if !option.Config.MetricsLabelFilter["binary"] {
binary = ""
}
return &ProcessLabels{
Namespace: namespace,
Workload: workload,
Pod: pod,
Binary: binary,
}
}

func (l ProcessLabels) Keys() []string {
return []string{"namespace", "workload", "pod", "binary"}
}

func (l ProcessLabels) Values() []string {
return []string{l.Namespace, l.Workload, l.Pod, l.Binary}
}
37 changes: 37 additions & 0 deletions pkg/metrics/filteredlabels_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package metrics

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/cilium/tetragon/pkg/option"
)

func TestProcessLabels(t *testing.T) {
namespace := "test-namespace"
workload := "test-deployment"
pod := "test-deployment-d9jo2"
binary := "test-binary"

// check that all labels are enabled by default (this relies on other tests
// not changing option.Config.MetricsLabelFilter)
processLabels := NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{namespace, workload, pod, binary})

// enable only namespace and binary
option.Config.MetricsLabelFilter = map[string]bool{
"namespace": true,
"binary": true,
}
// check that labels are filtered correctly
processLabels = NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{namespace, "", "", binary})

// clean up - reset the config back to the default
option.Config.MetricsLabelFilter = consts.DefaultProcessLabels
}
Loading

0 comments on commit dd68c2a

Please sign in to comment.