Skip to content

Commit

Permalink
metrics: Add metrics label filter configuration
Browse files Browse the repository at this point in the history
Currently, metrics are all-or-nothing.
Certain labels may cause cardinality issues.

This patch introduces a new configuration option - MetricsLabelFilter.
It is an allow-list for configuring namespace, workload, pod, and binary.
Labels that utilize these fields will only add them if configured for it.

Fixes: #1037

Signed-off-by: Nick Peluso <10912027+nap32@users.noreply.github.com>
  • Loading branch information
nap32 committed Sep 12, 2023
1 parent dca73e3 commit 129a569
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 18 deletions.
18 changes: 10 additions & 8 deletions cmd/tetragon/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@ const (
keyEnableCiliumAPI = "enable-cilium-api"
keyEnableProcessAncestors = "enable-process-ancestors"

keyMetricsServer = "metrics-server"
keyServerAddress = "server-address"
keyGopsAddr = "gops-address"
keyEnableProcessCred = "enable-process-cred"
keyEnableProcessNs = "enable-process-ns"
keyConfigFile = "config-file"
keyTracingPolicy = "tracing-policy"
keyTracingPolicyDir = "tracing-policy-dir"
keyMetricsServer = "metrics-server"
keyMetricsLabelFilter = "metrics-label-filter"
keyServerAddress = "server-address"
keyGopsAddr = "gops-address"
keyEnableProcessCred = "enable-process-cred"
keyEnableProcessNs = "enable-process-ns"
keyConfigFile = "config-file"
keyTracingPolicy = "tracing-policy"
keyTracingPolicyDir = "tracing-policy-dir"

keyCpuProfile = "cpuprofile"
keyMemProfile = "memprofile"
Expand Down Expand Up @@ -114,6 +115,7 @@ func readAndSetFlags() {
option.Config.DataCacheSize = viper.GetInt(keyDataCacheSize)

option.Config.MetricsServer = viper.GetString(keyMetricsServer)
option.Config.MetricsLabelFilter = viper.GetStringMap(keyMetricsLabelFilter)
option.Config.ServerAddress = viper.GetString(keyServerAddress)

option.Config.ExportFilename = viper.GetString(keyExportFilename)
Expand Down
3 changes: 3 additions & 0 deletions install/kubernetes/templates/tetragon_configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ data:
{{- else }}
metrics-server: ""
{{- end }}
{{- if .Values.tetragon.prometheus.enabled }}
metrics-label-filter: {{.Values.tetragon.prometheus.metricsLabelFilter | join "," }}
{{- end }}
{{- if .Values.tetragon.grpc.enabled }}
server-address: {{ .Values.tetragon.grpc.address }}
{{- else }}
Expand Down
3 changes: 3 additions & 0 deletions install/kubernetes/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ tetragon:
address: ""
# -- The port at which to expose metrics.
port: 2112
# -- The labels to include with supporting metrics.
# The possible values are "namespace", "workload", "pod" and "binary".
metricsLabelFilter: ["namespace", "workload", "pod", "binary"]
serviceMonitor:
# -- Whether to create a 'ServiceMonitor' resource targeting the 'tetragon' pods.
enabled: false
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
package consts

var MetricsNamespace = "tetragon"
var KnownMetricLabelFilters = []string{"namespace", "workload", "pod", "binary"}
8 changes: 4 additions & 4 deletions pkg/metrics/eventmetrics/eventmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ var (
Name: "events_total",
Help: "The total number of Tetragon events",
ConstLabels: nil,
}, []string{"type", "namespace", "workload", "pod", "binary"})
}, metrics.FilterMetricLabels("type", "namespace", "workload", "pod", "binary"))
FlagCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "flags_total",
Expand All @@ -44,7 +44,7 @@ var (
Name: "policy_events_total",
Help: "Policy events calls observed.",
ConstLabels: nil,
}, []string{"policy", "hook", "namespace", "workload", "pod", "binary"})
}, metrics.FilterMetricLabels("policy", "hook", "namespace", "workload", "pod", "binary"))
)

func InitMetrics(registry *prometheus.Registry) {
Expand Down Expand Up @@ -93,10 +93,10 @@ func handleProcessedEvent(pInfo *tracingpolicy.PolicyInfo, processedEvent interf
default:
eventType = "unknown"
}
EventsProcessed.WithLabelValues(eventType, namespace, workload, pod, binary).Inc()
EventsProcessed.WithLabelValues(metrics.FilterMetricLabels(eventType, namespace, workload, pod, binary)...).Inc()
if pInfo != nil && pInfo.Name != "" {
policyStats.
WithLabelValues(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary).
WithLabelValues(metrics.FilterMetricLabels(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary)...).
Inc()
}
}
Expand Down
19 changes: 19 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"time"

"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/cilium/tetragon/pkg/option"
"github.com/cilium/tetragon/pkg/podhooks"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand Down Expand Up @@ -142,3 +144,20 @@ func EnableMetrics(address string) {
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}))
http.ListenAndServe(address, nil)
}

// The FilterMetricLabels func takes in string arguments and returns a slice of those strings omitting the labels it is not configured for.
// IMPORTANT! The filtered metric labels must be passed last and in the exact order of consts.KnownMetricLabelFilters.
func FilterMetricLabels(labels ...string) []string {
offset := len(labels) - len(consts.KnownMetricLabelFilters)
if offset < 0 {
// Uh-oh.
return labels
}
result := labels[:offset]
for i, label := range consts.KnownMetricLabelFilters {
if _, ok := option.Config.MetricsLabelFilter[label]; ok {
result = append(result, labels[offset+i])
}
}
return result
}
29 changes: 29 additions & 0 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,41 @@ import (
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/metrics/config"
"github.com/cilium/tetragon/pkg/metrics/eventmetrics"
"github.com/cilium/tetragon/pkg/option"
)

var sampleMsgGenericTracepointUnix = tracing.MsgGenericTracepointUnix{
PolicyName: "fake-policy",
}

func TestFilterMetricLabels(t *testing.T) {
option.Config.MetricsLabelFilter = map[string]interface{}{
"namespace": nil,
"workload": nil,
"pod": nil,
"binary": nil,
}
assert.Equal(t, []string{"type", "namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("type", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"syscall", "namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("syscall", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("namespace", "workspace", "pod", "binary"))

option.Config.MetricsLabelFilter = map[string]interface{}{
"namespace": nil,
"workload": nil,
}
assert.Equal(t, []string{"type", "namespace", "workspace"}, metrics.FilterMetricLabels("type", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"syscall", "namespace", "workspace"}, metrics.FilterMetricLabels("syscall", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"namespace", "workspace"}, metrics.FilterMetricLabels("namespace", "workspace", "pod", "binary"))

option.Config.MetricsLabelFilter = map[string]interface{}{
"namespace": nil,
"workload": nil,
"pod": nil,
"binary": nil,
}
assert.Equal(t, []string{"type", "syscall"}, metrics.FilterMetricLabels("type", "syscall"))
}

func TestPodDelete(t *testing.T) {
reg := metrics.GetRegistry()
config.InitAllMetrics(reg)
Expand Down
4 changes: 2 additions & 2 deletions pkg/metrics/syscallmetrics/syscallmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ var (
Name: "syscalls_total",
Help: "System calls observed.",
ConstLabels: nil,
}, []string{"syscall", "namespace", "workload", "pod", "binary"})
}, metrics.FilterMetricLabels("syscall", "namespace", "workload", "pod", "binary"))
)

func InitMetrics(registry *prometheus.Registry) {
Expand Down Expand Up @@ -46,7 +46,7 @@ func Handle(event interface{}) {
}

if syscall != "" {
syscallStats.WithLabelValues(syscall, namespace, workload, pod, binary).Inc()
syscallStats.WithLabelValues(metrics.FilterMetricLabels(syscall, namespace, workload, pod, binary)...).Inc()
}
}

Expand Down
19 changes: 15 additions & 4 deletions pkg/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/spf13/viper"
)

Expand Down Expand Up @@ -46,10 +47,11 @@ type config struct {
ProcessCacheSize int
DataCacheSize int

MetricsServer string
ServerAddress string
TracingPolicy string
TracingPolicyDir string
MetricsServer string
MetricsLabelFilter map[string]interface{}
ServerAddress string
TracingPolicy string
TracingPolicyDir string

ExportFilename string
ExportFileMaxSizeMB int
Expand Down Expand Up @@ -93,6 +95,15 @@ var (

// LogOpts contains logger parameters
LogOpts: make(map[string]string),

// Default to logging metrics with the greatest granularity.
MetricsLabelFilter: func() map[string]interface{} {
result := make(map[string]interface{})
for _, label := range consts.KnownMetricLabelFilters {
result[label] = nil
}
return result
}(),
}
)

Expand Down

0 comments on commit 129a569

Please sign in to comment.