diff --git a/lib/service/service.go b/lib/service/service.go index 3e2754107fdd5..11a1b4c39ef22 100644 --- a/lib/service/service.go +++ b/lib/service/service.go @@ -665,7 +665,7 @@ type TeleportProcess struct { // conflicts. // // Both the metricsRegistry and the default global registry are gathered by - // Telpeort's metric service. + // Telepeort's metric service. metricsRegistry *prometheus.Registry } diff --git a/lib/service/service_test.go b/lib/service/service_test.go index 52e59387ff580..46efb8e64d0fc 100644 --- a/lib/service/service_test.go +++ b/lib/service/service_test.go @@ -23,9 +23,11 @@ import ( "crypto/tls" "errors" "fmt" + "io" "log/slog" "net" "net/http" + "net/url" "os" "path/filepath" "strings" @@ -39,6 +41,8 @@ import ( "github.com/google/uuid" "github.com/gravitational/trace" "github.com/jonboulle/clockwork" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" "google.golang.org/grpc" @@ -1887,7 +1891,7 @@ func TestAgentRolloutController(t *testing.T) { dataDir := makeTempDir(t) cfg := servicecfg.MakeDefaultConfig() - // We use a real clock because too many sevrices are using the clock and it's not possible to accurately wait for + // We use a real clock because too many services are using the clock and it's not possible to accurately wait for // each one of them to reach the point where they wait for the clock to advance. If we add a WaitUntil(X waiters) // check, this will break the next time we add a new waiter. cfg.Clock = clockwork.NewRealClock() @@ -1906,7 +1910,7 @@ func TestAgentRolloutController(t *testing.T) { process, err := NewTeleport(cfg) require.NoError(t, err) - // Test setup: start the Teleport auth and wait for it to beocme ready + // Test setup: start the Teleport auth and wait for it to become ready require.NoError(t, process.Start()) // Test setup: wait for every service to start @@ -1949,6 +1953,84 @@ func TestAgentRolloutController(t *testing.T) { }, 5*time.Second, 10*time.Millisecond) } +func TestMetricsService(t *testing.T) { + t.Parallel() + // Test setup: create a listener for the metrics server, get its file descriptor. + + // Note: this code is copied from integrations/helpers/NewListenerOn() to avoid including helpers in a production + // build and avoid a cyclic dependency. + metricsListener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + t.Cleanup(func() { + assert.NoError(t, metricsListener.Close()) + }) + require.IsType(t, &net.TCPListener{}, metricsListener) + metricsListenerFile, err := metricsListener.(*net.TCPListener).File() + require.NoError(t, err) + + // Test setup: create a new teleport process + dataDir := makeTempDir(t) + cfg := servicecfg.MakeDefaultConfig() + cfg.DataDir = dataDir + cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}) + cfg.Auth.Enabled = true + cfg.Proxy.Enabled = false + cfg.SSH.Enabled = false + cfg.DebugService.Enabled = false + cfg.Auth.StorageConfig.Params["path"] = dataDir + cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} + cfg.Metrics.Enabled = true + + // Configure the metrics server to use the listener we previously created. + cfg.Metrics.ListenAddr = &utils.NetAddr{AddrNetwork: "tcp", Addr: metricsListener.Addr().String()} + cfg.FileDescriptors = []*servicecfg.FileDescriptor{ + {Type: string(ListenerMetrics), Address: metricsListener.Addr().String(), File: metricsListenerFile}, + } + + // Create and start the Teleport service. + process, err := NewTeleport(cfg) + require.NoError(t, err) + require.NoError(t, process.Start()) + t.Cleanup(func() { + assert.NoError(t, process.Close()) + assert.NoError(t, process.Wait()) + }) + + // Test setup: create our test metrics. + nonce := strings.ReplaceAll(uuid.New().String(), "-", "") + localMetric := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "test", + Name: "local_metric_" + nonce, + }) + globalMetric := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "test", + Name: "global_metric_" + nonce, + }) + require.NoError(t, process.metricsRegistry.Register(localMetric)) + require.NoError(t, prometheus.Register(globalMetric)) + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + t.Cleanup(cancel) + _, err = process.WaitForEvent(ctx, MetricsReady) + require.NoError(t, err) + + // Test execution: get metrics and check the tests metrics are here. + metricsURL, err := url.Parse("http://" + metricsListener.Addr().String()) + require.NoError(t, err) + metricsURL.Path = "/metrics" + resp, err := http.Get(metricsURL.String()) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.NoError(t, resp.Body.Close()) + + // Test validation: check that the metrics server served both the local and global registry. + require.Contains(t, string(body), "local_metric_"+nonce) + require.Contains(t, string(body), "global_metric_"+nonce) +} + // makeTempDir makes a temp dir with a shorter name than t.TempDir() in order to // avoid https://github.com/golang/go/issues/62614. func makeTempDir(t *testing.T) string {