From d153fda1d8b75c7e424f355c27d0a5bb7838ba70 Mon Sep 17 00:00:00 2001 From: hugoShaka Date: Thu, 9 Jan 2025 17:45:31 -0500 Subject: [PATCH] Do not error when facing duplicate metrics + log with component --- lib/service/service.go | 22 ++++++++++++++++++++-- lib/service/service_test.go | 13 ------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lib/service/service.go b/lib/service/service.go index 088aeef2c2308..9e83774d36496 100644 --- a/lib/service/service.go +++ b/lib/service/service.go @@ -3416,6 +3416,16 @@ func (process *TeleportProcess) initUploaderService() error { return nil } +// promHTTPLogAdapter adapts a slog.Logger into a promhttp.Logger. +type promHTTPLogAdapter struct { + *slog.Logger +} + +// Println implements the promhttp.Logger interface. +func (l promHTTPLogAdapter) Println(v ...interface{}) { + l.Error(fmt.Sprint(v...)) +} + // initMetricsService starts the metrics service currently serving metrics for // prometheus consumption func (process *TeleportProcess) initMetricsService() error { @@ -3424,12 +3434,20 @@ func (process *TeleportProcess) initMetricsService() error { // We gather metrics both from the in-process registry (preferred metrics registration method) // and the global registry (used by some Teleport services and many dependencies). gatherers := prometheus.Gatherers{ - prometheus.DefaultGatherer, process.metricsRegistry, + prometheus.DefaultGatherer, } metricsHandler := promhttp.InstrumentMetricHandler( - process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{}), + process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{ + // Errors can happen if metrics are registered with identical names in both the local and the global registry. + // In this case, we log the error but continue collecting metrics. The first collected metric will win + // (the one from the local metrics registry takes precedence). + // As we move more things to the local registry, especially in other tools like tbot, we will have less + // conflicts in tests. + ErrorHandling: promhttp.ContinueOnError, + ErrorLog: promHTTPLogAdapter{process.logger.With(teleport.ComponentKey, teleport.ComponentMetrics)}, + }), ) mux.Handle("/metrics", metricsHandler) diff --git a/lib/service/service_test.go b/lib/service/service_test.go index 773642d7a7b74..4c08a87689145 100644 --- a/lib/service/service_test.go +++ b/lib/service/service_test.go @@ -2020,19 +2020,6 @@ func TestMetricsService(t *testing.T) { metricsURL.Path = "/metrics" resp, err := http.Get(metricsURL.String()) require.NoError(t, err) - // WIP: the test is failing in the CI only. This piece of code can be - // ignored and will be removed from the PR once I know why the HTTP status - // code is 500. - if resp.StatusCode != 200 { - t.Log("Non 200 status code received") - body, err := io.ReadAll(resp.Body) - if err != nil { - t.Logf("Cannot read body: %s", err.Error()) - } else { - t.Logf("Body: %s", string(body)) - } - t.Logf("Header: %#v", resp.Header) - } require.Equal(t, http.StatusOK, resp.StatusCode) body, err := io.ReadAll(resp.Body)