Skip to content

Commit

Permalink
tetragon: Add events buffer queue metrics
Browse files Browse the repository at this point in the history
Adding following metric counters for events buffer queue:

  eventsQueueReceived - number of records received from perf event reader go routine
  eventsQueueLost     - number of records lost because the RBQueue channel was full

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
  • Loading branch information
olsajiri committed Aug 16, 2023
1 parent 3f3d565 commit f5e3f16
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 0 deletions.
37 changes: 37 additions & 0 deletions pkg/metrics/eventsqueuemetrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package eventsqueuemetrics

import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
)

var (
eventsQueueReceived = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: consts.MetricsNamespace,
Name: "eventsqueue_stats_received",
Help: "The total number of Tetragon events queue received.",
ConstLabels: nil,
}, nil)
eventsQueueLost = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: consts.MetricsNamespace,
Name: "eventsqueue_stats_lost",
Help: "The total number of Tetragon events queue lost.",
ConstLabels: nil,
}, nil)
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(eventsQueueReceived)
registry.MustRegister(eventsQueueLost)
}

func ReceivedInc() {
eventsQueueReceived.WithLabelValues().Inc()
}

func LostInc() {
eventsQueueLost.WithLabelValues().Inc()
}
2 changes: 2 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
"github.com/cilium/tetragon/pkg/metrics/eventcachemetrics"
"github.com/cilium/tetragon/pkg/metrics/eventmetrics"
"github.com/cilium/tetragon/pkg/metrics/eventsqueuemetrics"
"github.com/cilium/tetragon/pkg/metrics/kprobemetrics"
"github.com/cilium/tetragon/pkg/metrics/mapmetrics"
"github.com/cilium/tetragon/pkg/metrics/opcodemetrics"
Expand All @@ -34,6 +35,7 @@ func InitAllMetrics(registry *prometheus.Registry) {
pfmetrics.InitMetrics(registry)
processexecmetrics.InitMetrics(registry)
ringbufmetrics.InitMetrics(registry)
eventsqueuemetrics.InitMetrics(registry)
syscallmetrics.InitMetrics(registry)
watchermetrics.InitMetrics(registry)
observer.InitMetrics(registry)
Expand Down
3 changes: 3 additions & 0 deletions pkg/observer/observer.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/cilium/tetragon/pkg/bpf"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
"github.com/cilium/tetragon/pkg/metrics/eventsqueuemetrics"
"github.com/cilium/tetragon/pkg/metrics/opcodemetrics"
"github.com/cilium/tetragon/pkg/metrics/ringbufmetrics"
"github.com/cilium/tetragon/pkg/option"
Expand Down Expand Up @@ -242,6 +243,7 @@ func (k *Observer) RunEvents(stopCtx context.Context, ready func()) error {
case eventsQueue <- &record:
default:
// eventsQueue channel is full, drop the event
eventsqueuemetrics.LostInc()
}
k.recvCntr++
ringbufmetrics.PerfEventReceived.Inc()
Expand All @@ -263,6 +265,7 @@ func (k *Observer) RunEvents(stopCtx context.Context, ready func()) error {
select {
case event := <-eventsQueue:
k.receiveEvent(event.RawSample)
eventsqueuemetrics.ReceivedInc()
case <-stopCtx.Done():
k.log.WithError(stopCtx.Err()).Infof("Listening for events completed.")
k.log.Debugf("Unprocessed events in RB queue: %d", len(eventsQueue))
Expand Down

0 comments on commit f5e3f16

Please sign in to comment.