From fbaf52a53dd680ced262763450bc023c8ee4a13c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 11 Sep 2023 13:13:25 -0700 Subject: [PATCH] tetragon: Add metric to report rate limited events Tetragon can set a maximum rate limit to push events. And when this is done we also track the number of events that were dropped due to a rate limit. When drops occur we generate a summary event to report how many drops happened. This adds a metric for this as well so it can be tracked easily from prometheous. Signed-off-by: John Fastabend --- pkg/metrics/config/initmetrics.go | 2 ++ .../ratelimitmetrics/ratelimitmetrics.go | 22 +++++++++++++++++++ pkg/ratelimit/ratelimit.go | 2 ++ 3 files changed, 26 insertions(+) create mode 100644 pkg/metrics/ratelimitmetrics/ratelimitmetrics.go diff --git a/pkg/metrics/config/initmetrics.go b/pkg/metrics/config/initmetrics.go index cd0da5b637b..107c5715989 100644 --- a/pkg/metrics/config/initmetrics.go +++ b/pkg/metrics/config/initmetrics.go @@ -13,6 +13,7 @@ import ( "github.com/cilium/tetragon/pkg/metrics/opcodemetrics" pfmetrics "github.com/cilium/tetragon/pkg/metrics/policyfilter" "github.com/cilium/tetragon/pkg/metrics/processexecmetrics" + "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" "github.com/cilium/tetragon/pkg/metrics/ringbufmetrics" "github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics" "github.com/cilium/tetragon/pkg/metrics/syscallmetrics" @@ -39,6 +40,7 @@ func InitAllMetrics(registry *prometheus.Registry) { watchermetrics.InitMetrics(registry) observer.InitMetrics(registry) tracing.InitMetrics(registry) + ratelimitmetrics.InitMetrics(registry) registry.MustRegister(collectors.NewGoCollector()) registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) diff --git a/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go b/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go new file mode 100644 index 00000000000..caa64296294 --- /dev/null +++ b/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package ratelimitmetrics + +import ( + "github.com/cilium/tetragon/pkg/metrics/consts" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + RateLimitDropped = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: consts.MetricsNamespace, + Name: "ratelimit_dropped_total", + Help: "The total number of rate limit Tetragon drops", + ConstLabels: nil, + }) +) + +func InitMetrics(registry *prometheus.Registry) { + registry.MustRegister(RateLimitDropped) +} diff --git a/pkg/ratelimit/ratelimit.go b/pkg/ratelimit/ratelimit.go index 52c5512a4e2..4043952258b 100644 --- a/pkg/ratelimit/ratelimit.go +++ b/pkg/ratelimit/ratelimit.go @@ -11,6 +11,7 @@ import ( "github.com/cilium/tetragon/api/v1/tetragon" "github.com/cilium/tetragon/pkg/encoder" "github.com/cilium/tetragon/pkg/logger" + "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" "github.com/cilium/tetragon/pkg/reader/node" "golang.org/x/time/rate" "google.golang.org/protobuf/types/known/timestamppb" @@ -77,4 +78,5 @@ func (r *RateLimiter) reportRateLimitInfo(encoder encoder.EventEncoder) { func (r *RateLimiter) Drop() { atomic.AddUint64(&r.dropped, 1) + ratelimitmetrics.RateLimitDropped.Inc() }