Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: Add metrics label filter configuration #1444

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions cmd/tetragon/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package main

import (
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/config"
"github.com/cilium/tetragon/pkg/option"

"github.com/spf13/viper"
Expand All @@ -31,13 +32,14 @@ const (
keyEnableCiliumAPI = "enable-cilium-api"
keyEnableProcessAncestors = "enable-process-ancestors"

keyMetricsServer = "metrics-server"
keyServerAddress = "server-address"
keyGopsAddr = "gops-address"
keyEnableProcessCred = "enable-process-cred"
keyEnableProcessNs = "enable-process-ns"
keyTracingPolicy = "tracing-policy"
keyTracingPolicyDir = "tracing-policy-dir"
keyMetricsServer = "metrics-server"
keyMetricsLabelFilter = "metrics-label-filter"
keyServerAddress = "server-address"
keyGopsAddr = "gops-address"
keyEnableProcessCred = "enable-process-cred"
keyEnableProcessNs = "enable-process-ns"
keyTracingPolicy = "tracing-policy"
keyTracingPolicyDir = "tracing-policy-dir"

keyCpuProfile = "cpuprofile"
keyMemProfile = "memprofile"
Expand Down Expand Up @@ -113,6 +115,7 @@ func readAndSetFlags() {
option.Config.DataCacheSize = viper.GetInt(keyDataCacheSize)

option.Config.MetricsServer = viper.GetString(keyMetricsServer)
option.Config.MetricsLabelFilter = config.ParseMetricsLabelFilter(viper.GetString(keyMetricsLabelFilter))
option.Config.ServerAddress = viper.GetString(keyServerAddress)

option.Config.ExportFilename = viper.GetString(keyExportFilename)
Expand Down
1 change: 1 addition & 0 deletions docs/content/en/docs/reference/helm-chart.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ To use [the values available](#values), with `helm install` or `helm upgrade`, u
| tetragon.processCacheSize | int | `65536` | |
| tetragon.prometheus.address | string | `""` | The address at which to expose metrics. Set it to "" to expose on all available interfaces. |
| tetragon.prometheus.enabled | bool | `true` | Whether to enable exposing Tetragon metrics. |
| tetragon.prometheus.metricsLabelFilter | string | `"namespace,workload,pod,binary"` | The labels to include with supporting metrics. The possible values are "namespace", "workload", "pod" and "binary". |
| tetragon.prometheus.port | int | `2112` | The port at which to expose metrics. |
| tetragon.prometheus.serviceMonitor.enabled | bool | `false` | Whether to create a 'ServiceMonitor' resource targeting the 'tetragon' pods. |
| tetragon.prometheus.serviceMonitor.labelsOverride | object | `{}` | The set of labels to place on the 'ServiceMonitor' resource. |
Expand Down
1 change: 1 addition & 0 deletions install/kubernetes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Helm chart for Tetragon
| tetragon.processCacheSize | int | `65536` | |
| tetragon.prometheus.address | string | `""` | The address at which to expose metrics. Set it to "" to expose on all available interfaces. |
| tetragon.prometheus.enabled | bool | `true` | Whether to enable exposing Tetragon metrics. |
| tetragon.prometheus.metricsLabelFilter | string | `"namespace,workload,pod,binary"` | The labels to include with supporting metrics. The possible values are "namespace", "workload", "pod" and "binary". |
| tetragon.prometheus.port | int | `2112` | The port at which to expose metrics. |
| tetragon.prometheus.serviceMonitor.enabled | bool | `false` | Whether to create a 'ServiceMonitor' resource targeting the 'tetragon' pods. |
| tetragon.prometheus.serviceMonitor.labelsOverride | object | `{}` | The set of labels to place on the 'ServiceMonitor' resource. |
Expand Down
3 changes: 3 additions & 0 deletions install/kubernetes/templates/tetragon_configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ data:
{{- else }}
metrics-server: ""
{{- end }}
{{- if .Values.tetragon.prometheus.enabled }}
metrics-label-filter: {{ .Values.tetragon.prometheus.metricsLabelFilter }}
{{- end }}
{{- if .Values.tetragon.grpc.enabled }}
server-address: {{ .Values.tetragon.grpc.address }}
{{- else }}
Expand Down
3 changes: 3 additions & 0 deletions install/kubernetes/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ tetragon:
address: ""
# -- The port at which to expose metrics.
port: 2112
# -- The labels to include with supporting metrics.
nap32 marked this conversation as resolved.
Show resolved Hide resolved
# The possible values are "namespace", "workload", "pod" and "binary".
metricsLabelFilter: "namespace,workload,pod,binary"
serviceMonitor:
# -- Whether to create a 'ServiceMonitor' resource targeting the 'tetragon' pods.
enabled: false
Expand Down
10 changes: 10 additions & 0 deletions pkg/metrics/config/initmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
grpcmetrics "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"

"strings"
)

func InitAllMetrics(registry *prometheus.Registry) {
Expand All @@ -47,3 +49,11 @@ func InitAllMetrics(registry *prometheus.Registry) {
registry.MustRegister(grpcmetrics.NewServerMetrics())
version.InitMetrics(registry)
}

func ParseMetricsLabelFilter(labels string) map[string]interface{} {
result := make(map[string]interface{})
for _, label := range strings.Split(labels, ",") {
result[label] = nil
}
return result
}
1 change: 1 addition & 0 deletions pkg/metrics/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
package consts

var MetricsNamespace = "tetragon"
var KnownMetricLabelFilters = []string{"namespace", "workload", "pod", "binary"}
nap32 marked this conversation as resolved.
Show resolved Hide resolved
18 changes: 9 additions & 9 deletions pkg/metrics/eventmetrics/eventmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ import (
)

var (
EventsProcessed = metrics.NewCounterVecWithPod(prometheus.CounterOpts{
EventsProcessed = metrics.MustNewGranularCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "events_total",
Help: "The total number of Tetragon events",
ConstLabels: nil,
}, []string{"type", "namespace", "workload", "pod", "binary"})
}, []string{"type"})
FlagCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "flags_total",
Expand All @@ -39,19 +39,19 @@ var (
ConstLabels: nil,
})

policyStats = metrics.NewCounterVecWithPod(prometheus.CounterOpts{
policyStats = metrics.MustNewGranularCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "policy_events_total",
Help: "Policy events calls observed.",
ConstLabels: nil,
}, []string{"policy", "hook", "namespace", "workload", "pod", "binary"})
}, []string{"policy", "hook"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(EventsProcessed)
registry.MustRegister(EventsProcessed.ToProm())
registry.MustRegister(FlagCount)
registry.MustRegister(NotifyOverflowedEvents)
registry.MustRegister(policyStats)
registry.MustRegister(policyStats.ToProm())
}

func GetProcessInfo(process *tetragon.Process) (binary, pod, workload, namespace string) {
Expand Down Expand Up @@ -93,10 +93,10 @@ func handleProcessedEvent(pInfo *tracingpolicy.PolicyInfo, processedEvent interf
default:
eventType = "unknown"
}
EventsProcessed.WithLabelValues(eventType, namespace, workload, pod, binary).Inc()
EventsProcessed.ToProm().WithLabelValues(metrics.FilterMetricLabels(eventType, namespace, workload, pod, binary)...).Inc()
if pInfo != nil && pInfo.Name != "" {
policyStats.
WithLabelValues(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary).
policyStats.ToProm().
WithLabelValues(metrics.FilterMetricLabels(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary)...).
Inc()
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/metrics/eventmetrics/eventmetrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

func TestHandleProcessedEvent(t *testing.T) {
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed, strings.NewReader("")))
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed.ToProm(), strings.NewReader("")))
handleProcessedEvent(nil, nil)
// empty process
handleProcessedEvent(nil, &tetragon.GetEventsResponse{Event: &tetragon.GetEventsResponse_ProcessKprobe{ProcessKprobe: &tetragon.ProcessKprobe{}}})
Expand Down Expand Up @@ -79,7 +79,7 @@ tetragon_events_total{binary="binary_c",namespace="namespace_c",pod="pod_c",type
tetragon_events_total{binary="binary_e",namespace="",pod="",type="PROCESS_EXIT",workload=""} 1
tetragon_events_total{binary="binary_e",namespace="namespace_e",pod="pod_e",type="PROCESS_EXIT",workload="workload_e"} 1
`)
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed, expected))
assert.NoError(t, testutil.CollectAndCompare(EventsProcessed.ToProm(), expected))
}

func TestHandleOriginalEvent(t *testing.T) {
Expand Down
49 changes: 49 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
package metrics

import (
"fmt"
"net/http"
"sync"
"time"

"golang.org/x/exp/slices"

"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/cilium/tetragon/pkg/option"
"github.com/cilium/tetragon/pkg/podhooks"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand All @@ -27,6 +32,33 @@ var (
deleteDelay = 1 * time.Minute
)

type GranularCounter struct {
counter *prometheus.CounterVec
CounterOpts prometheus.CounterOpts
labels []string
register sync.Once
}

func MustNewGranularCounter(opts prometheus.CounterOpts, labels []string) *GranularCounter {
for _, label := range labels {
if slices.Contains(consts.KnownMetricLabelFilters, label) {
panic(fmt.Sprintf("labels passed to GranularCounter can't contain any of the following: %v. These labels are added by Tetragon.", consts.KnownMetricLabelFilters))
}
}
return &GranularCounter{
CounterOpts: opts,
labels: append(labels, consts.KnownMetricLabelFilters...),
}
}

func (m *GranularCounter) ToProm() *prometheus.CounterVec {
m.register.Do(func() {
m.labels = FilterMetricLabels(m.labels...)
m.counter = NewCounterVecWithPod(m.CounterOpts, m.labels)
})
return m.counter
}

// NewCounterVecWithPod is a wrapper around prometheus.NewCounterVec that also registers the metric
// to be cleaned up when a pod is deleted. It should be used only to register metrics that have
// "pod" and "namespace" labels.
Expand Down Expand Up @@ -142,3 +174,20 @@ func EnableMetrics(address string) {
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}))
http.ListenAndServe(address, nil)
}

// The FilterMetricLabels func takes in string arguments and returns a slice of those strings omitting the labels it is not configured for.
// IMPORTANT! The filtered metric labels must be passed last and in the exact order of consts.KnownMetricLabelFilters.
func FilterMetricLabels(labels ...string) []string {
offset := len(labels) - len(consts.KnownMetricLabelFilters)
lambdanis marked this conversation as resolved.
Show resolved Hide resolved
if offset < 0 {
logger.GetLogger().WithField("labels", labels).Debug("Not enough labels provided to metrics.FilterMetricLabels.")
return labels
}
result := labels[:offset]
for i, label := range consts.KnownMetricLabelFilters {
if _, ok := option.Config.MetricsLabelFilter[label]; ok {
result = append(result, labels[offset+i])
}
}
return result
nap32 marked this conversation as resolved.
Show resolved Hide resolved
}
29 changes: 29 additions & 0 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,41 @@ import (
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/metrics/config"
"github.com/cilium/tetragon/pkg/metrics/eventmetrics"
"github.com/cilium/tetragon/pkg/option"
)

var sampleMsgGenericTracepointUnix = tracing.MsgGenericTracepointUnix{
PolicyName: "fake-policy",
}

func TestFilterMetricLabels(t *testing.T) {
option.Config.MetricsLabelFilter = map[string]interface{}{
"namespace": nil,
"workload": nil,
"pod": nil,
"binary": nil,
}
assert.Equal(t, []string{"type", "namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("type", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"syscall", "namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("syscall", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("namespace", "workspace", "pod", "binary"))

option.Config.MetricsLabelFilter = map[string]interface{}{
"namespace": nil,
"workload": nil,
}
assert.Equal(t, []string{"type", "namespace", "workspace"}, metrics.FilterMetricLabels("type", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"syscall", "namespace", "workspace"}, metrics.FilterMetricLabels("syscall", "namespace", "workspace", "pod", "binary"))
assert.Equal(t, []string{"namespace", "workspace"}, metrics.FilterMetricLabels("namespace", "workspace", "pod", "binary"))

option.Config.MetricsLabelFilter = map[string]interface{}{
"namespace": nil,
"workload": nil,
"pod": nil,
"binary": nil,
}
assert.Equal(t, []string{"type", "syscall"}, metrics.FilterMetricLabels("type", "syscall"))
}

func TestPodDelete(t *testing.T) {
reg := metrics.GetRegistry()
config.InitAllMetrics(reg)
Expand Down
10 changes: 6 additions & 4 deletions pkg/metrics/syscallmetrics/syscallmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ import (
)

var (
syscallStats = metrics.NewCounterVecWithPod(prometheus.CounterOpts{
syscallStats = metrics.MustNewGranularCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "syscalls_total",
Help: "System calls observed.",
ConstLabels: nil,
}, []string{"syscall", "namespace", "workload", "pod", "binary"})
}, []string{"syscall"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(syscallStats)
registry.MustRegister(syscallStats.ToProm())
}

func Handle(event interface{}) {
Expand All @@ -46,7 +46,9 @@ func Handle(event interface{}) {
}

if syscall != "" {
syscallStats.WithLabelValues(syscall, namespace, workload, pod, binary).Inc()
syscallStats.ToProm().
WithLabelValues(metrics.FilterMetricLabels(syscall, namespace, workload, pod, binary)...).
Inc()
}
}

Expand Down
19 changes: 15 additions & 4 deletions pkg/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/spf13/viper"
)

Expand Down Expand Up @@ -46,10 +47,11 @@ type config struct {
ProcessCacheSize int
DataCacheSize int

MetricsServer string
ServerAddress string
TracingPolicy string
TracingPolicyDir string
MetricsServer string
MetricsLabelFilter map[string]interface{}
ServerAddress string
TracingPolicy string
TracingPolicyDir string

ExportFilename string
ExportFileMaxSizeMB int
Expand Down Expand Up @@ -93,6 +95,15 @@ var (

// LogOpts contains logger parameters
LogOpts: make(map[string]string),

// Default to logging metrics with the greatest granularity.
MetricsLabelFilter: func() map[string]interface{} {
result := make(map[string]interface{})
for _, label := range consts.KnownMetricLabelFilters {
result[label] = nil
}
return result
}(),
}
)

Expand Down