Skip to content

Commit

Permalink
server: actually check that catalog doesn't contain non-existent metrics
Browse files Browse the repository at this point in the history
TestChartCatalogMetrics is our main enforcement mechanism around the
hygiene of the chart catalog. Concretely, it checks that all metrics
are present in the catalog. However, its attempt to check the other
direction, namely that any metric mentioned in the catalog actually
(still) exists was thwarted because `GenerateCatalog` silently drops
such metrics (and so the test didn't ever get to see the stale entries).

This commit adds a `strict` flag to `GenerateCatalog` and removes all
stale metrics from the catalog.

Release note: None
  • Loading branch information
tbg committed May 17, 2022
1 parent d63a369 commit 3cb6d73
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 257 deletions.
127 changes: 127 additions & 0 deletions pkg/ccl/serverccl/chart_catalog_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
//

package serverccl_test

import (
"context"
"sort"
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/ts/catalog"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
io_prometheus_client "github.com/prometheus/client_model/go"
)

// TestChartCatalogMetric ensures that all metrics are included in at least one
// chart, and that every metric included in a chart is still part of the metrics
// registry.
//
// This test lives in CCL code so that it can pick up the full set of metrics,
// including those registered from CCL code.
func TestChartCatalogMetrics(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
s := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{})
defer s.Stopper().Stop(context.Background())

metricsMetadata := s.Server(0).MetricsRecorder().GetMetricsMetadata()

chartCatalog, err := catalog.GenerateCatalog(metricsMetadata, true /* strict */)

if err != nil {
t.Fatal(err)
}

// Each metric in metricsMetadata should have at least one entry in
// chartCatalog, which we track by deleting the metric from metricsMetadata.
for _, v := range chartCatalog {
deleteSeenMetrics(&v, metricsMetadata, t)
}

if len(metricsMetadata) > 0 {
var metricNames []string
for metricName := range metricsMetadata {
metricNames = append(metricNames, metricName)
}
sort.Strings(metricNames)
t.Errorf(`The following metrics need to be added to the chart catalog
(pkg/ts/catalog/chart_catalog.go): %v`, metricNames)
}

internalTSDBMetricNamesWithoutPrefix := map[string]struct{}{}
for _, name := range catalog.AllInternalTimeseriesMetricNames() {
name = strings.TrimPrefix(name, "cr.node.")
name = strings.TrimPrefix(name, "cr.store.")
internalTSDBMetricNamesWithoutPrefix[name] = struct{}{}
}
walkAllSections(chartCatalog, func(cs *catalog.ChartSection) {
for _, chart := range cs.Charts {
for _, metric := range chart.Metrics {
if *metric.MetricType.Enum() != io_prometheus_client.MetricType_HISTOGRAM {
continue
}
// We have a histogram. Make sure that it is properly represented in
// AllInternalTimeseriesMetricNames(). It's not a complete check but good enough in
// practice. Ideally we wouldn't require `histogramMetricsNames` and
// the associated manual step when adding a histogram. See:
// https://github.com/cockroachdb/cockroach/issues/64373
_, ok := internalTSDBMetricNamesWithoutPrefix[metric.Name+"-p50"]
if !ok {
t.Errorf("histogram %s needs to be added to `catalog.histogramMetricsNames` manually",
metric.Name)
}
}
}
})
}

// deleteSeenMetrics removes all metrics in a section from the metricMetadata map.
func deleteSeenMetrics(c *catalog.ChartSection, metadata map[string]metric.Metadata, t *testing.T) {
// if c.Title == "SQL" {
// t.Log(c)
// }
for _, x := range c.Charts {
if x.Title == "Connections" || x.Title == "Byte I/O" {
t.Log(x)
}

for _, metric := range x.Metrics {
if metric.Name == "sql.new_conns" || metric.Name == "sql.bytesin" {
t.Logf("found %v\n", metric.Name)
}
_, ok := metadata[metric.Name]
if ok {
delete(metadata, metric.Name)
}
}
}

for _, x := range c.Subsections {
deleteSeenMetrics(x, metadata, t)
}
}

// walkAllSections invokes the visitor on each of the ChartSections nestled under
// the input one.
func walkAllSections(chartCatalog []catalog.ChartSection, visit func(c *catalog.ChartSection)) {
for _, c := range chartCatalog {
visit(&c)
for _, ic := range c.Subsections {
visit(ic)
}
}
}
2 changes: 1 addition & 1 deletion pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func (s *adminServer) ChartCatalog(
) (*serverpb.ChartCatalogResponse, error) {
metricsMetadata := s.server.recorder.GetMetricsMetadata()

chartCatalog, err := catalog.GenerateCatalog(metricsMetadata)
chartCatalog, err := catalog.GenerateCatalog(metricsMetadata, false /* strict */)
if err != nil {
return nil, serverError(ctx, err)
}
Expand Down
134 changes: 3 additions & 131 deletions pkg/server/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,13 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
"github.com/kr/pretty"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -913,7 +911,9 @@ func TestChartCatalogGen(t *testing.T) {

metricsMetadata := s.recorder.GetMetricsMetadata()

chartCatalog, err := catalog.GenerateCatalog(metricsMetadata)
// NB: strict mode verifies that all metrics mentioned in the catalog
// exist.
chartCatalog, err := catalog.GenerateCatalog(metricsMetadata, true /* strict */)

if err != nil {
t.Fatal(err)
Expand All @@ -932,134 +932,6 @@ func TestChartCatalogGen(t *testing.T) {
}
}

// walkAllSections invokes the visitor on each of the ChartSections nestled under
// the input one.
func walkAllSections(chartCatalog []catalog.ChartSection, visit func(c *catalog.ChartSection)) {
for _, c := range chartCatalog {
visit(&c)
for _, ic := range c.Subsections {
visit(ic)
}
}
}

// findUndefinedMetrics finds metrics listed in pkg/ts/catalog/chart_catalog.go
// that are not defined. This is most likely caused by a metric being removed.
func findUndefinedMetrics(c *catalog.ChartSection, metadata map[string]metric.Metadata) []string {
var undefinedMetrics []string
for _, ic := range c.Charts {
for _, metric := range ic.Metrics {
_, ok := metadata[metric.Name]
if !ok {
undefinedMetrics = append(undefinedMetrics, metric.Name)
}
}
}

for _, x := range c.Subsections {
undefinedMetrics = append(undefinedMetrics, findUndefinedMetrics(x, metadata)...)
}

return undefinedMetrics
}

// deleteSeenMetrics removes all metrics in a section from the metricMetadata map.
func deleteSeenMetrics(c *catalog.ChartSection, metadata map[string]metric.Metadata, t *testing.T) {
// if c.Title == "SQL" {
// t.Log(c)
// }
for _, x := range c.Charts {
if x.Title == "Connections" || x.Title == "Byte I/O" {
t.Log(x)
}

for _, metric := range x.Metrics {
if metric.Name == "sql.new_conns" || metric.Name == "sql.bytesin" {
t.Logf("found %v\n", metric.Name)
}
_, ok := metadata[metric.Name]
if ok {
delete(metadata, metric.Name)
}
}
}

for _, x := range c.Subsections {
deleteSeenMetrics(x, metadata, t)
}
}

// TestChartCatalogMetric ensures that all metrics are included in at least one
// chart, and that every metric included in a chart is still part of the metrics
// registry.
func TestChartCatalogMetrics(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
s := startServer(t)
defer s.Stopper().Stop(context.Background())

metricsMetadata := s.recorder.GetMetricsMetadata()

chartCatalog, err := catalog.GenerateCatalog(metricsMetadata)

if err != nil {
t.Fatal(err)
}

// Each metric referenced in the chartCatalog must have a definition in metricsMetadata
var undefinedMetrics []string
for _, cs := range chartCatalog {
undefinedMetrics = append(undefinedMetrics, findUndefinedMetrics(&cs, metricsMetadata)...)
}

if len(undefinedMetrics) > 0 {
t.Fatalf(`The following metrics need are no longer present and need to be removed
from the chart catalog (pkg/ts/catalog/chart_catalog.go):%v`, undefinedMetrics)
}

// Each metric in metricsMetadata should have at least one entry in
// chartCatalog, which we track by deleting the metric from metricsMetadata.
for _, v := range chartCatalog {
deleteSeenMetrics(&v, metricsMetadata, t)
}

if len(metricsMetadata) > 0 {
var metricNames []string
for metricName := range metricsMetadata {
metricNames = append(metricNames, metricName)
}
sort.Strings(metricNames)
t.Errorf(`The following metrics need to be added to the chart catalog
(pkg/ts/catalog/chart_catalog.go): %v`, metricNames)
}

internalTSDBMetricNamesWithoutPrefix := map[string]struct{}{}
for _, name := range catalog.AllInternalTimeseriesMetricNames() {
name = strings.TrimPrefix(name, "cr.node.")
name = strings.TrimPrefix(name, "cr.store.")
internalTSDBMetricNamesWithoutPrefix[name] = struct{}{}
}
walkAllSections(chartCatalog, func(cs *catalog.ChartSection) {
for _, chart := range cs.Charts {
for _, metric := range chart.Metrics {
if *metric.MetricType.Enum() != io_prometheus_client.MetricType_HISTOGRAM {
continue
}
// We have a histogram. Make sure that it is properly represented in
// AllInternalTimeseriesMetricNames(). It's not a complete check but good enough in
// practice. Ideally we wouldn't require `histogramMetricsNames` and
// the associated manual step when adding a histogram. See:
// https://github.com/cockroachdb/cockroach/issues/64373
_, ok := internalTSDBMetricNamesWithoutPrefix[metric.Name+"-p50"]
if !ok {
t.Errorf("histogram %s needs to be added to `catalog.histogramMetricsNames` manually",
metric.Name)
}
}
}
})
}

func TestHotRangesResponse(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down
32 changes: 25 additions & 7 deletions pkg/ts/catalog/catalog_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package catalog

import (
"fmt"
"regexp"
"strings"

Expand Down Expand Up @@ -44,6 +45,7 @@ const (
ReplicationLayer = `Replication Layer`
StorageLayer = `Storage Layer`
Timeseries = `Timeseries`
Tenants = `Tenants`
Jobs = `Jobs`
)

Expand Down Expand Up @@ -251,33 +253,46 @@ var derKey = map[DescribeDerivative]tspb.TimeSeriesQueryDerivative{

// GenerateCatalog creates an array of ChartSections, which is served at
// /_admin/v1/chartcatalog.
func GenerateCatalog(metadata map[string]metric.Metadata) ([]ChartSection, error) {
//
// In strict mode, any metric that is present in the chart catalog but for
// which no metadata is found triggers an error. In non-strict mode, such
// metrics are simply ignored.
func GenerateCatalog(metadata map[string]metric.Metadata, strict bool) ([]ChartSection, error) {
var strictBuf *strings.Builder
if strict {
strictBuf = new(strings.Builder)
}

if !catalogGenerated {
if strictBuf != nil || !catalogGenerated {
for _, sd := range charts {

if err := createIndividualCharts(metadata, sd); err != nil {
if err := createIndividualCharts(metadata, sd, strictBuf); err != nil {
return nil, err
}
}
catalogGenerated = true
}

if strictBuf != nil && strictBuf.Len() > 0 {
return nil, errors.Errorf("%s", strictBuf)
}

return chartCatalog, nil
}

// createIndividualChart creates IndividualCharts, and ultimately places them
// in the appropriate place in chartCatalog based on the hierarchy described
// in sd.Organization.
func createIndividualCharts(metadata map[string]metric.Metadata, sd sectionDescription) error {
func createIndividualCharts(
metadata map[string]metric.Metadata, sd sectionDescription, strictBuf *strings.Builder,
) error {

var ics []*IndividualChart

for _, cd := range sd.Charts {

ic := new(IndividualChart)

if err := ic.addMetrics(cd, metadata); err != nil {
if err := ic.addMetrics(cd, metadata, strictBuf); err != nil {
return err
}

Expand Down Expand Up @@ -328,7 +343,7 @@ func createIndividualCharts(metadata map[string]metric.Metadata, sd sectionDescr
// addMetrics sets the IndividualChart's Metric values by looking up the
// chartDescription metrics in the metadata map.
func (ic *IndividualChart) addMetrics(
cd chartDescription, metadata map[string]metric.Metadata,
cd chartDescription, metadata map[string]metric.Metadata, strictBuf *strings.Builder,
) error {
for _, x := range cd.Metrics {

Expand All @@ -338,6 +353,9 @@ func (ic *IndividualChart) addMetrics(
// insecure nodes do not metadata related to SSL certificates, so those metrics
// should not be added to any charts.
if !ok {
if strictBuf != nil {
fmt.Fprintf(strictBuf, "metadata missing for metric %s\n", x)
}
continue
}

Expand Down
Loading

0 comments on commit 3cb6d73

Please sign in to comment.