-
Notifications
You must be signed in to change notification settings - Fork 381
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
metrics: Delete metrics for deleted pods
Some of the exposed metrics have "pod" label, which contains the name of the monitored pod. So far when a pod got deleted, Tetragon kept exposing stale metrics for it. This was causing continuous increase in memory usage in Tetragon agent as well as in the metrics scraper. This commit fixes the issue. Now if metrics and k8s API are both enabled then an additional pod hook gets registered that on pod deletion deletes metrics associated with it. Signed-off-by: Anna Kapuscinska <anna@isovalent.com>
- Loading branch information
Showing
5 changed files
with
165 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// Copyright Authors of Tetragon | ||
|
||
package metrics | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
io_prometheus_client "github.com/prometheus/client_model/go" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
corev1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
|
||
"github.com/cilium/tetragon/api/v1/tetragon" | ||
"github.com/cilium/tetragon/pkg/grpc/tracing" | ||
"github.com/cilium/tetragon/pkg/metrics/eventmetrics" | ||
) | ||
|
||
var sampleMsgGenericTracepointUnix = tracing.MsgGenericTracepointUnix{ | ||
PolicyName: "fake-policy", | ||
} | ||
|
||
func TestPodDelete(t *testing.T) { | ||
reg := prometheus.NewRegistry() | ||
InitAllMetrics(reg) | ||
|
||
// Process four events, each one with different combination of pod/namespace. | ||
// These events should be counted by multiple metrics with a "pod" label: | ||
// * tetragon_events_total | ||
// * tetragon_policy_events_total | ||
// * tetragon_syscalls_total | ||
for _, namespace := range []string{"fake-namespace", "other-namespace"} { | ||
for _, pod := range []string{"fake-pod", "other-pod"} { | ||
event := tetragon.GetEventsResponse{ | ||
Event: &tetragon.GetEventsResponse_ProcessTracepoint{ | ||
ProcessTracepoint: &tetragon.ProcessTracepoint{ | ||
Subsys: "raw_syscalls", | ||
Event: "sys_enter", | ||
Process: &tetragon.Process{ | ||
Pod: &tetragon.Pod{ | ||
Namespace: namespace, | ||
Name: pod, | ||
}, | ||
}, | ||
Args: []*tetragon.KprobeArgument{ | ||
{ | ||
Arg: &tetragon.KprobeArgument_LongArg{ | ||
LongArg: 0, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
eventmetrics.ProcessEvent(&sampleMsgGenericTracepointUnix, &event) | ||
} | ||
} | ||
checkMetricSeriesCount(t, reg, 4) | ||
|
||
// Exactly one timeseries should be deleted for each metric (matching both | ||
// pod name and namespace). | ||
DeleteMetricsForPod(&corev1.Pod{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "fake-pod", | ||
Namespace: "fake-namespace", | ||
}, | ||
}) | ||
checkMetricSeriesCount(t, reg, 3) | ||
} | ||
|
||
func checkMetricSeriesCount(t *testing.T, registry *prometheus.Registry, seriesCount int) { | ||
metricFamilies, err := registry.Gather() | ||
require.NoError(t, err) | ||
|
||
metricNameToSeries := map[string]*io_prometheus_client.MetricFamily{} | ||
for _, metricFamily := range metricFamilies { | ||
metricNameToSeries[*metricFamily.Name] = metricFamily | ||
} | ||
for _, metric := range []string{"tetragon_events_total", "tetragon_policy_events_total", "tetragon_syscalls_total"} { | ||
metricFamily := metricNameToSeries[metric] | ||
require.NotNil(t, metricFamily) | ||
assert.Len(t, metricFamily.Metric, seriesCount) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters