Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: Refactor metrics label filter logic #2321

Merged
merged 8 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/content/en/docs/reference/helm-chart.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion docs/data/tetragon_flags.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion install/kubernetes/tetragon/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions install/kubernetes/tetragon/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ tetragon:
address: ""
# -- The port at which to expose metrics.
port: 2112
# -- The labels to include with supporting metrics.
# The possible values are "namespace", "workload", "pod" and "binary".
# -- Comma-separated list of enabled metrics labels.
# The configurable labels are: namespace, workload, pod, binary. Unkown labels will be ignored.
# Removing some labels from the list might help reduce the metrics cardinality if needed.
metricsLabelFilter: "namespace,workload,pod,binary"
serviceMonitor:
# -- Whether to create a 'ServiceMonitor' resource targeting the tetragon pods.
Expand Down
13 changes: 11 additions & 2 deletions pkg/metrics/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,20 @@ package consts

const MetricsNamespace = "tetragon"

var KnownMetricLabelFilters = []string{"namespace", "workload", "pod", "binary"}
var DefaultLabelsFilter = map[string]bool{
"namespace": true,
"workload": true,
"pod": true,
"binary": true,
}

var (
ExamplePolicyLabel = "example-tracingpolicy"
ExampleKprobeLabel = "example_kprobe"
ExampleSyscallLabel = "example_syscall"
ExampleProcessLabels = []string{"example-namespace", "example-workload", "example-pod", "example-binary"}
ExampleNamespace = "example-namespace"
ExampleWorkload = "example-workload"
ExamplePod = "example-pod"
ExampleBinary = "example-binary"
ExampleProcessLabels = []string{ExampleNamespace, ExampleWorkload, ExamplePod, ExampleBinary}
)
21 changes: 10 additions & 11 deletions pkg/metrics/eventmetrics/eventmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
package eventmetrics

import (
"slices"

"github.com/cilium/tetragon/api/v1/tetragon"
"github.com/cilium/tetragon/api/v1/tetragon/codegen/helpers"
"github.com/cilium/tetragon/pkg/api/processapi"
Expand All @@ -22,7 +20,7 @@ import (
)

var (
EventsProcessed = metrics.MustNewGranularCounter(prometheus.CounterOpts{
EventsProcessed = metrics.MustNewGranularCounter[metrics.ProcessLabels](prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "events_total",
Help: "The total number of Tetragon events",
Expand All @@ -46,7 +44,7 @@ var (
ConstLabels: nil,
})

policyStats = metrics.MustNewGranularCounter(prometheus.CounterOpts{
policyStats = metrics.MustNewGranularCounter[metrics.ProcessLabels](prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "policy_events_total",
Help: "Policy events calls observed.",
Expand All @@ -70,20 +68,21 @@ func InitHealthMetrics(registry *prometheus.Registry) {
}

func InitEventsMetrics(registry *prometheus.Registry) {
registry.MustRegister(EventsProcessed.ToProm())
registry.MustRegister(policyStats.ToProm())
registry.MustRegister(EventsProcessed)
registry.MustRegister(policyStats)
}

func InitEventsMetricsForDocs(registry *prometheus.Registry) {
InitEventsMetrics(registry)

// Initialize metrics with example labels
processLabels := metrics.NewProcessLabels(consts.ExampleNamespace, consts.ExampleWorkload, consts.ExamplePod, consts.ExampleBinary)
for ev, evString := range tetragon.EventType_name {
if tetragon.EventType(ev) != tetragon.EventType_UNDEF && tetragon.EventType(ev) != tetragon.EventType_TEST {
EventsProcessed.WithLabelValues(slices.Concat([]string{evString}, consts.ExampleProcessLabels)...).Add(0)
EventsProcessed.WithLabelValues(processLabels, evString).Add(0)
}
}
policyStats.WithLabelValues(slices.Concat([]string{consts.ExamplePolicyLabel, consts.ExampleKprobeLabel}, consts.ExampleProcessLabels)...).Add(0)
policyStats.WithLabelValues(processLabels, consts.ExamplePolicyLabel, consts.ExampleKprobeLabel).Add(0)
}

func GetProcessInfo(process *tetragon.Process) (binary, pod, workload, namespace string) {
Expand Down Expand Up @@ -125,11 +124,11 @@ func handleProcessedEvent(pInfo *tracingpolicy.PolicyInfo, processedEvent interf
default:
eventType = "unknown"
}
EventsProcessed.WithLabelValues(eventType, namespace, workload, pod, binary).Inc()
processLabels := metrics.NewProcessLabels(namespace, workload, pod, binary)
EventsProcessed.WithLabelValues(processLabels, eventType).Inc()
if pInfo != nil && pInfo.Name != "" {
policyStats.
WithLabelValues(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary).
Inc()
WithLabelValues(processLabels, pInfo.Name, pInfo.Hook).Inc()
}
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/metrics/eventmetrics/eventmetrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

func TestHandleProcessedEvent(t *testing.T) {
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed.ToProm(), strings.NewReader("")))
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed, strings.NewReader("")))
handleProcessedEvent(nil, nil)
// empty process
handleProcessedEvent(nil, &tetragon.GetEventsResponse{Event: &tetragon.GetEventsResponse_ProcessKprobe{ProcessKprobe: &tetragon.ProcessKprobe{}}})
Expand Down Expand Up @@ -79,7 +79,7 @@ tetragon_events_total{binary="binary_c",namespace="namespace_c",pod="pod_c",type
tetragon_events_total{binary="binary_e",namespace="",pod="",type="PROCESS_EXIT",workload=""} 1
tetragon_events_total{binary="binary_e",namespace="namespace_e",pod="pod_e",type="PROCESS_EXIT",workload="workload_e"} 1
`)
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed.ToProm(), expected))
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed, expected))
}

func TestHandleOriginalEvent(t *testing.T) {
Expand Down
53 changes: 53 additions & 0 deletions pkg/metrics/filteredlabels.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package metrics

import (
"github.com/cilium/tetragon/pkg/option"
)

type FilteredLabels interface {
Keys() []string
Values() []string
}

type ProcessLabels struct {
namespace string
workload string
pod string
binary string
}

// NewProcessLabels creates a new ProcessLabels struct with the global labels
// filter applied. To have a metric respect the labels filter, we have to:
// 1. Define a granular metric with ProcessLabels type parameter (see granularmetric.go).
// 2. When calling WithLabelValues, pass a ProcessLabels struct created with NewProcessLabels.
func NewProcessLabels(namespace, workload, pod, binary string) *ProcessLabels {
if !option.Config.MetricsLabelFilter["namespace"] {
namespace = ""
}
if !option.Config.MetricsLabelFilter["workload"] {
workload = ""
}
if !option.Config.MetricsLabelFilter["pod"] {
pod = ""
}
if !option.Config.MetricsLabelFilter["binary"] {
binary = ""
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Configure MetricsLabelFilter similarly like before, but as map[string]bool
  2. metrics.ProcessLabels struct implements metrics.FilteredLabels interface and
    contains label values of configurable labels. Disabled labels are set to ""
    when the struct is instantiated.
  3. Define a metric using a "granular metric" wrapper, which now uses Go generics
    to specify configurable labels (FilteredMetrics type).
  4. The inner Prometheus metric is now registered with all labels.
  5. Then we update the metric using a slightly different implementation of
    WithLabelValues, which takes a generic FileredLabels struct and any
    additional label values as strings.

AFAIU, the disabled labels are set to "". The thing I'm not sure I understand is where does the filtering happens? Is it .WithLabelValues()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When calling WithLabelValues() you need to pass a ProcessLabels struct. If it's created with NewProcessLabels then disabled labels are set to "", what from Prometheus perspective is equivalent to not having these labels at all. So filtering relies on always creating ProcessLabels with NewProcessLabels. I'll add a comment about it, as it's not obvious indeed.

return &ProcessLabels{
namespace: namespace,
workload: workload,
pod: pod,
binary: binary,
}
}

func (l ProcessLabels) Keys() []string {
return []string{"namespace", "workload", "pod", "binary"}
}

func (l ProcessLabels) Values() []string {
return []string{l.namespace, l.workload, l.pod, l.binary}
}
53 changes: 53 additions & 0 deletions pkg/metrics/filteredlabels_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package metrics

import (
"maps"
"testing"

"github.com/stretchr/testify/assert"

"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/cilium/tetragon/pkg/option"
)

func TestProcessLabels(t *testing.T) {
t.Cleanup(func() {
// reset global config back to the default
option.Config.MetricsLabelFilter = maps.Clone(consts.DefaultLabelsFilter)
})

namespace := "test-namespace"
workload := "test-deployment"
pod := "test-deployment-d9jo2"
binary := "test-binary"

// by default all labels should be enabled
processLabels := NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{namespace, workload, pod, binary})

// disable workload and pod
option.Config.MetricsLabelFilter["workload"] = false
option.Config.MetricsLabelFilter["pod"] = false
processLabels = NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{namespace, "", "", binary})

// delete binary (this shouldn't really happen, we set the values to false instead)
delete(option.Config.MetricsLabelFilter, "binary")
processLabels = NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{namespace, "", "", ""})

// disable all
for l := range consts.DefaultLabelsFilter {
option.Config.MetricsLabelFilter[l] = false
}
processLabels = NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{"", "", "", ""})

// clear label filter (this shouldn't really happen, we set the values to false instead)
option.Config.MetricsLabelFilter = map[string]bool{}
processLabels = NewProcessLabels(namespace, workload, pod, binary)
assert.Equal(t, processLabels.Values(), []string{"", "", "", ""})
}
Loading
Loading