From 4d21ac23e642391a5c7f75962cd5c939679f7dd2 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Sat, 22 Apr 2023 03:14:19 +0800 Subject: [PATCH 1/8] Implement bucket limit for native histograms Signed-off-by: Jeanette Tan --- config/config.go | 3 ++ scrape/scrape.go | 44 +++++++++++++++++++---- scrape/scrape_test.go | 82 +++++++++++++++++++++++++------------------ scrape/target.go | 26 ++++++++++++++ storage/interface.go | 1 + 5 files changed, 116 insertions(+), 40 deletions(-) diff --git a/config/config.go b/config/config.go index 5c51d5a0d85..31ebb288e30 100644 --- a/config/config.go +++ b/config/config.go @@ -489,6 +489,9 @@ type ScrapeConfig struct { // More than this label value length post metric-relabeling will cause the // scrape to fail. LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` + // More than this many buckets in a native histogram will cause the scrape to + // fail. + NativeHistogramBucketLimit uint `yaml:"bucket_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. diff --git a/scrape/scrape.go b/scrape/scrape.go index 5c649e729ab..7c24c1070b5 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -191,6 +191,12 @@ var ( }, []string{"scrape_job"}, ) + targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_histogram_exceeded_bucket_limit_total", + Help: "Total number of native histograms rejected due to exceeding the bucket limit.", + }, + ) ) func init() { @@ -216,6 +222,7 @@ func init() { targetScrapeExemplarOutOfOrder, targetScrapePoolExceededLabelLimits, targetSyncFailed, + targetScrapeNativeHistogramBucketLimit, ) } @@ -256,6 +263,7 @@ type scrapeLoopOptions struct { target *Target scraper scraper sampleLimit int + bucketLimit int labelLimits *labelLimits honorLabels bool honorTimestamps bool @@ -319,6 +327,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, jitterSeed jitterSeed, opts.honorTimestamps, opts.sampleLimit, + opts.bucketLimit, opts.labelLimits, opts.interval, opts.timeout, @@ -412,6 +421,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { timeout = time.Duration(sp.config.ScrapeTimeout) bodySizeLimit = int64(sp.config.BodySizeLimit) sampleLimit = int(sp.config.SampleLimit) + bucketLimit = int(sp.config.NativeHistogramBucketLimit) labelLimits = &labelLimits{ labelLimit: int(sp.config.LabelLimit), labelNameLengthLimit: int(sp.config.LabelNameLengthLimit), @@ -446,6 +456,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { target: t, scraper: s, sampleLimit: sampleLimit, + bucketLimit: bucketLimit, labelLimits: labelLimits, honorLabels: honorLabels, honorTimestamps: honorTimestamps, @@ -530,6 +541,7 @@ func (sp *scrapePool) sync(targets []*Target) { timeout = time.Duration(sp.config.ScrapeTimeout) bodySizeLimit = int64(sp.config.BodySizeLimit) sampleLimit = int(sp.config.SampleLimit) + bucketLimit = int(sp.config.NativeHistogramBucketLimit) labelLimits = &labelLimits{ labelLimit: int(sp.config.LabelLimit), labelNameLengthLimit: int(sp.config.LabelNameLengthLimit), @@ -559,6 +571,7 @@ func (sp *scrapePool) sync(targets []*Target) { target: t, scraper: s, sampleLimit: sampleLimit, + bucketLimit: bucketLimit, labelLimits: labelLimits, honorLabels: honorLabels, honorTimestamps: honorTimestamps, @@ -731,7 +744,7 @@ func mutateReportSampleLabels(lset labels.Labels, target *Target) labels.Labels } // appender returns an appender for ingested samples from the target. -func appender(app storage.Appender, limit int) storage.Appender { +func appender(app storage.Appender, limit, bucketLimit int) storage.Appender { app = &timeLimitAppender{ Appender: app, maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)), @@ -744,6 +757,13 @@ func appender(app storage.Appender, limit int) storage.Appender { limit: limit, } } + + if bucketLimit > 0 { + app = &bucketLimitAppender{ + Appender: app, + limit: bucketLimit, + } + } return app } @@ -872,6 +892,7 @@ type scrapeLoop struct { forcedErr error forcedErrMtx sync.Mutex sampleLimit int + bucketLimit int labelLimits *labelLimits interval time.Duration timeout time.Duration @@ -1152,6 +1173,7 @@ func newScrapeLoop(ctx context.Context, jitterSeed uint64, honorTimestamps bool, sampleLimit int, + bucketLimit int, labelLimits *labelLimits, interval time.Duration, timeout time.Duration, @@ -1195,6 +1217,7 @@ func newScrapeLoop(ctx context.Context, appenderCtx: appenderCtx, honorTimestamps: honorTimestamps, sampleLimit: sampleLimit, + bucketLimit: bucketLimit, labelLimits: labelLimits, interval: interval, timeout: timeout, @@ -1462,10 +1485,11 @@ func (sl *scrapeLoop) getCache() *scrapeCache { } type appendErrors struct { - numOutOfOrder int - numDuplicates int - numOutOfBounds int - numExemplarOutOfOrder int + numOutOfOrder int + numDuplicates int + numOutOfBounds int + numExemplarOutOfOrder int + numHistogramBucketLimit int } func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { @@ -1510,7 +1534,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, } // Take an appender with limits. - app = appender(app, sl.sampleLimit) + app = appender(app, sl.sampleLimit, sl.bucketLimit) defer func() { if err != nil { @@ -1693,6 +1717,9 @@ loop: if appErrs.numExemplarOutOfOrder > 0 { level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) } + if appErrs.numHistogramBucketLimit > 0 { + level.Warn(sl.l).Log("msg", "Error on ingesting native histograms that exceeded bucket limit", "num_dropped", appErrs.numHistogramBucketLimit) + } if err == nil { sl.cache.forEachStale(func(lset labels.Labels) bool { // Series no longer exposed, mark it stale. @@ -1735,6 +1762,11 @@ func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err e level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met)) targetScrapeSampleOutOfBounds.Inc() return false, nil + case storage.ErrHistogramBucketLimit: + appErrs.numHistogramBucketLimit++ + level.Debug(sl.l).Log("msg", "Exceeded bucket limit for native histograms", "series", string(met)) + targetScrapeNativeHistogramBucketLimit.Inc() + return false, nil case errSampleLimit: // Keep on parsing output if we hit the limit, so we report the correct // total number of samples scraped. diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 2a2ca094850..c815bf5c601 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -487,7 +487,7 @@ func TestScrapePoolAppender(t *testing.T) { appl, ok := loop.(*scrapeLoop) require.True(t, ok, "Expected scrapeLoop but got %T", loop) - wrapped := appender(appl.appender(context.Background()), 0) + wrapped := appender(appl.appender(context.Background()), 0, 0) tl, ok := wrapped.(*timeLimitAppender) require.True(t, ok, "Expected timeLimitAppender but got %T", wrapped) @@ -503,7 +503,7 @@ func TestScrapePoolAppender(t *testing.T) { appl, ok = loop.(*scrapeLoop) require.True(t, ok, "Expected scrapeLoop but got %T", loop) - wrapped = appender(appl.appender(context.Background()), sampleLimit) + wrapped = appender(appl.appender(context.Background()), sampleLimit, 0) sl, ok := wrapped.(*limitAppender) require.True(t, ok, "Expected limitAppender but got %T", wrapped) @@ -513,6 +513,20 @@ func TestScrapePoolAppender(t *testing.T) { _, ok = tl.Appender.(nopAppender) require.True(t, ok, "Expected base appender but got %T", tl.Appender) + + wrapped = appender(appl.appender(context.Background()), sampleLimit, 100) + + bl, ok := wrapped.(*bucketLimitAppender) + require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped) + + sl, ok = bl.Appender.(*limitAppender) + require.True(t, ok, "Expected limitAppender but got %T", bl) + + tl, ok = sl.Appender.(*timeLimitAppender) + require.True(t, ok, "Expected timeLimitAppender but got %T", sl.Appender) + + _, ok = tl.Appender.(nopAppender) + require.True(t, ok, "Expected base appender but got %T", tl.Appender) } func TestScrapePoolRaces(t *testing.T) { @@ -610,7 +624,7 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) { nopMutator, nil, nil, 0, true, - 0, + 0, 0, nil, 1, 0, @@ -682,7 +696,7 @@ func TestScrapeLoopStop(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -758,7 +772,7 @@ func TestScrapeLoopRun(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, time.Second, time.Hour, @@ -813,7 +827,7 @@ func TestScrapeLoopRun(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, time.Second, 100*time.Millisecond, @@ -872,7 +886,7 @@ func TestScrapeLoopForcedErr(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, time.Second, time.Hour, @@ -930,7 +944,7 @@ func TestScrapeLoopMetadata(t *testing.T) { cache, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -987,7 +1001,7 @@ func simpleTestScrapeLoop(t testing.TB) (context.Context, *scrapeLoop) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1047,7 +1061,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1125,7 +1139,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -1188,7 +1202,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -1254,7 +1268,7 @@ func TestScrapeLoopCache(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -1337,7 +1351,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -1451,7 +1465,7 @@ func TestScrapeLoopAppend(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1546,7 +1560,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) { return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil) }, nil, - func(ctx context.Context) storage.Appender { return app }, nil, 0, true, 0, nil, 0, 0, false, false, nil, false, + func(ctx context.Context) storage.Appender { return app }, nil, 0, true, 0, 0, nil, 0, 0, false, false, nil, false, ) slApp := sl.appender(context.Background()) _, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC)) @@ -1577,7 +1591,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1635,7 +1649,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { nil, 0, true, - app.limit, + app.limit, 0, nil, 0, 0, @@ -1712,7 +1726,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1760,7 +1774,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1811,7 +1825,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1922,7 +1936,7 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000 nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -1987,7 +2001,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2039,7 +2053,7 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -2075,7 +2089,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -2124,7 +2138,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2169,7 +2183,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2441,7 +2455,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) { func(ctx context.Context) storage.Appender { return capp }, nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2482,7 +2496,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) { func(ctx context.Context) storage.Appender { return capp }, nil, 0, false, - 0, + 0, 0, nil, 0, 0, @@ -2522,7 +2536,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2580,7 +2594,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2843,7 +2857,7 @@ func TestScrapeAddFast(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 0, 0, @@ -2929,7 +2943,7 @@ func TestScrapeReportSingleAppender(t *testing.T) { nil, 0, true, - 0, + 0, 0, nil, 10*time.Millisecond, time.Hour, @@ -3131,7 +3145,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) { nil, 0, true, - 0, + 0, 0, &test.labelLimits, 0, 0, diff --git a/scrape/target.go b/scrape/target.go index 6c470311865..f916e454903 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" "github.com/prometheus/prometheus/model/textparse" @@ -355,6 +356,31 @@ func (app *timeLimitAppender) Append(ref storage.SeriesRef, lset labels.Labels, return ref, nil } +// bucketLimitAppender limits the number of total appended samples in a batch. +type bucketLimitAppender struct { + storage.Appender + + limit int +} + +func (app *bucketLimitAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit { + return 0, storage.ErrHistogramBucketLimit + } + } + if fh != nil { + if len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit { + return 0, storage.ErrHistogramBucketLimit + } + } + ref, err := app.Appender.AppendHistogram(ref, lset, t, h, fh) + if err != nil { + return 0, err + } + return ref, nil +} + // PopulateLabels builds a label set from the given label set and scrape configuration. // It returns a label set before relabeling was applied as the second return value. // Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling. diff --git a/storage/interface.go b/storage/interface.go index b282f1fc628..18119f25477 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -46,6 +46,7 @@ var ( ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") + ErrHistogramBucketLimit = errors.New("histogram bucket limit exceeded") ) // SeriesRef is a generic series reference. In prometheus it is either a From d3ad158a660de06201d28a384cc2175e0488bab2 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Sat, 22 Apr 2023 03:14:19 +0800 Subject: [PATCH 2/8] Update docs and comments Signed-off-by: Jeanette Tan --- config/config.go | 4 ++-- docs/configuration/configuration.md | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/config/config.go b/config/config.go index 31ebb288e30..956e1c6c651 100644 --- a/config/config.go +++ b/config/config.go @@ -489,8 +489,8 @@ type ScrapeConfig struct { // More than this label value length post metric-relabeling will cause the // scrape to fail. LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` - // More than this many buckets in a native histogram will cause the scrape to - // fail. + // More than this many buckets in a native histogram will cause the histogram + // to be ignored, but it will not make the whole scrape fail. NativeHistogramBucketLimit uint `yaml:"bucket_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index f27f8256a54..f51227320df 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -376,6 +376,11 @@ metric_relabel_configs: # 0 means no limit. This is an experimental feature, this behaviour could # change in the future. [ target_limit: | default = 0 ] + +# Limit on total number of positive and negative buckets allowed in a native +# histogram. If this is exceeded, the histogram will be ignored, but this will +# not make the scrape fail. 0 means no limit. +[ sample_limit: | default = 0 ] ``` Where `` must be unique across all scrape configurations. From 071426f72f3546149b8d6d6ae3c77bf044e8fc93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Sat, 22 Apr 2023 03:14:19 +0800 Subject: [PATCH 3/8] Add unit test for bucket limit appender MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors textparser test to use a common test utility to create protobuf representation from MetricFamily Signed-off-by: György Krajcsovits --- model/textparse/protobufparse_test.go | 12 +--- scrape/scrape_test.go | 90 +++++++++++++++++++++++++++ util/testutil/protobuf.go | 50 +++++++++++++++ 3 files changed, 143 insertions(+), 9 deletions(-) create mode 100644 util/testutil/protobuf.go diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index 90c6a90f326..fb8669197e3 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -15,7 +15,6 @@ package textparse import ( "bytes" - "encoding/binary" "errors" "io" "testing" @@ -26,8 +25,9 @@ import ( "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/testutil" - dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" + dto "github.com/prometheus/client_model/go" ) func TestProtobufParse(t *testing.T) { @@ -449,7 +449,6 @@ metric: < `, } - varintBuf := make([]byte, binary.MaxVarintLen32) inputBuf := &bytes.Buffer{} for _, tmf := range textMetricFamilies { @@ -457,13 +456,8 @@ metric: < // From text to proto message. require.NoError(t, proto.UnmarshalText(tmf, pb)) // From proto message to binary protobuf. - protoBuf, err := proto.Marshal(pb) + err := testutil.AddMetricFamilyToProtobuf(inputBuf, pb) require.NoError(t, err) - - // Write first length, then binary protobuf. - varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) - inputBuf.Write(varintBuf[:varintLength]) - inputBuf.Write(protoBuf) } exp := []struct { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index c815bf5c601..b5cabea52ef 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -30,6 +30,7 @@ import ( "github.com/go-kit/log" "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -1709,6 +1710,95 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { require.Equal(t, 0, seriesAdded) } +func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { + resApp := &collectResultAppender{} + app := &bucketLimitAppender{Appender: resApp, limit: 2} + + sl := newScrapeLoop(context.Background(), + nil, nil, nil, + func(l labels.Labels) labels.Labels { + if l.Has("deleteme") { + return labels.EmptyLabels() + } + return l + }, + nopMutator, + func(ctx context.Context) storage.Appender { return app }, + nil, + 0, + true, + app.limit, 0, + nil, + 0, + 0, + false, + false, + nil, + false, + ) + + metric := dto.Metric{} + err := targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + beforeMetricValue := metric.GetCounter().GetValue() + + nativeHistogram := prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "testing", + Name: "example_native_histogram", + Help: "This is used for testing", + ConstLabels: map[string]string{"some": "value"}, + NativeHistogramBucketFactor: 1.1, // 10% increase from bucket to bucket + NativeHistogramMaxBucketNumber: 100, // intentionally higher than the limit we'll use in the scraper + }) + registry := prometheus.NewRegistry() + registry.Register(nativeHistogram) + nativeHistogram.Observe(1.0) + nativeHistogram.Observe(10.0) // in different bucket since > 1*1.1 + + gathered, err := registry.Gather() + require.NoError(t, err) + require.NotEmpty(t, gathered) + + histogramMetricFamily := gathered[0] + msg, err := testutil.MetricFamilyToProtobuf(histogramMetricFamily) + require.NoError(t, err) + + now := time.Now() + total, added, seriesAdded, err := sl.append(app, msg, "application/vnd.google.protobuf", now) + require.NoError(t, err) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 1, seriesAdded) + + err = targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + metricValue := metric.GetCounter().GetValue() + require.Equal(t, beforeMetricValue, metricValue) + beforeMetricValue = metricValue + + nativeHistogram.Observe(100.0) // in different bucket since > 10*1.1 + + gathered, err = registry.Gather() + require.NoError(t, err) + require.NotEmpty(t, gathered) + + histogramMetricFamily = gathered[0] + msg, err = testutil.MetricFamilyToProtobuf(histogramMetricFamily) + require.NoError(t, err) + + now = time.Now() + total, added, seriesAdded, err = sl.append(app, msg, "application/vnd.google.protobuf", now) + require.NoError(t, err) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 0, seriesAdded) + + err = targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + metricValue = metric.GetCounter().GetValue() + require.Equal(t, beforeMetricValue+1, metricValue) +} + func TestScrapeLoop_ChangingMetricString(t *testing.T) { // This is a regression test for the scrape loop cache not properly maintaining // IDs when the string representation of a metric changes across a scrape. Thus diff --git a/util/testutil/protobuf.go b/util/testutil/protobuf.go new file mode 100644 index 00000000000..8650f137301 --- /dev/null +++ b/util/testutil/protobuf.go @@ -0,0 +1,50 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "bytes" + "encoding/binary" + + "github.com/gogo/protobuf/proto" + dto "github.com/prometheus/client_model/go" +) + +// Write a MetricFamily into a protobuf +func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) { + buffer := &bytes.Buffer{} + err := AddMetricFamilyToProtobuf(buffer, metricFamily) + if err != nil { + return nil, err + } + return buffer.Bytes(), nil +} + +// Append a MetricFamily protobuf representation to a buffer +func AddMetricFamilyToProtobuf(buffer *bytes.Buffer, metricFamily *dto.MetricFamily) error { + protoBuf, err := proto.Marshal(metricFamily) + if err != nil { + return err + } + + varintBuf := make([]byte, binary.MaxVarintLen32) + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + + _, err = buffer.Write(varintBuf[:varintLength]) + if err != nil { + return err + } + _, err = buffer.Write(protoBuf) + return err +} From 2ad39baa726958d9688ae2094bcedf7002c46c56 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Sat, 22 Apr 2023 03:14:19 +0800 Subject: [PATCH 4/8] Treat bucket limit like sample limit and make it fail the whole scrape and return an error Signed-off-by: Jeanette Tan --- config/config.go | 4 +-- docs/configuration/configuration.md | 4 +-- scrape/scrape.go | 40 ++++++++++++++++------------- scrape/scrape_test.go | 7 +++-- scrape/target.go | 9 ++++--- storage/interface.go | 1 - 6 files changed, 37 insertions(+), 28 deletions(-) diff --git a/config/config.go b/config/config.go index 956e1c6c651..31ebb288e30 100644 --- a/config/config.go +++ b/config/config.go @@ -489,8 +489,8 @@ type ScrapeConfig struct { // More than this label value length post metric-relabeling will cause the // scrape to fail. LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` - // More than this many buckets in a native histogram will cause the histogram - // to be ignored, but it will not make the whole scrape fail. + // More than this many buckets in a native histogram will cause the scrape to + // fail. NativeHistogramBucketLimit uint `yaml:"bucket_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index f51227320df..cfb11e21e46 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -378,8 +378,8 @@ metric_relabel_configs: [ target_limit: | default = 0 ] # Limit on total number of positive and negative buckets allowed in a native -# histogram. If this is exceeded, the histogram will be ignored, but this will -# not make the scrape fail. 0 means no limit. +# histogram. If this is exceeded, the entire scrape will be treated as failed. +# 0 means no limit. [ sample_limit: | default = 0 ] ``` diff --git a/scrape/scrape.go b/scrape/scrape.go index 7c24c1070b5..179adbb2a2b 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -194,7 +194,7 @@ var ( targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter( prometheus.CounterOpts{ Name: "prometheus_target_scrapes_histogram_exceeded_bucket_limit_total", - Help: "Total number of native histograms rejected due to exceeding the bucket limit.", + Help: "Total number of scrapes that hit the native histograms bucket limit and were rejected.", }, ) ) @@ -1485,11 +1485,10 @@ func (sl *scrapeLoop) getCache() *scrapeCache { } type appendErrors struct { - numOutOfOrder int - numDuplicates int - numOutOfBounds int - numExemplarOutOfOrder int - numHistogramBucketLimit int + numOutOfOrder int + numDuplicates int + numOutOfBounds int + numExemplarOutOfOrder int } func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { @@ -1506,6 +1505,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, defTime = timestamp.FromTime(ts) appErrs = appendErrors{} sampleLimitErr error + bucketLimitErr error e exemplar.Exemplar // escapes to heap so hoisted out of loop meta metadata.Metadata metadataChanged bool @@ -1655,7 +1655,7 @@ loop: } else { ref, err = app.Append(ref, lset, t, val) } - sampleAdded, err = sl.checkAddError(ce, met, parsedTimestamp, err, &sampleLimitErr, &appErrs) + sampleAdded, err = sl.checkAddError(ce, met, parsedTimestamp, err, &sampleLimitErr, &bucketLimitErr, &appErrs) if err != nil { if err != storage.ErrNotFound { level.Debug(sl.l).Log("msg", "Unexpected error", "series", string(met), "err", err) @@ -1669,7 +1669,7 @@ loop: sl.cache.trackStaleness(hash, lset) } sl.cache.addRef(met, ref, lset, hash) - if sampleAdded && sampleLimitErr == nil { + if sampleAdded && sampleLimitErr == nil && bucketLimitErr == nil { seriesAdded++ } } @@ -1705,6 +1705,13 @@ loop: // We only want to increment this once per scrape, so this is Inc'd outside the loop. targetScrapeSampleLimit.Inc() } + if bucketLimitErr != nil { + if err == nil { + err = bucketLimitErr // if sample limit is hit, that error takes precedence + } + // We only want to increment this once per scrape, so this is Inc'd outside the loop. + targetScrapeNativeHistogramBucketLimit.Inc() + } if appErrs.numOutOfOrder > 0 { level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) } @@ -1717,9 +1724,6 @@ loop: if appErrs.numExemplarOutOfOrder > 0 { level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) } - if appErrs.numHistogramBucketLimit > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting native histograms that exceeded bucket limit", "num_dropped", appErrs.numHistogramBucketLimit) - } if err == nil { sl.cache.forEachStale(func(lset labels.Labels) bool { // Series no longer exposed, mark it stale. @@ -1737,8 +1741,8 @@ loop: } // Adds samples to the appender, checking the error, and then returns the # of samples added, -// whether the caller should continue to process more samples, and any sample limit errors. -func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err error, sampleLimitErr *error, appErrs *appendErrors) (bool, error) { +// whether the caller should continue to process more samples, and any sample or bucket limit errors. +func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (bool, error) { switch errors.Cause(err) { case nil: if tp == nil && ce != nil { @@ -1762,16 +1766,16 @@ func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err e level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met)) targetScrapeSampleOutOfBounds.Inc() return false, nil - case storage.ErrHistogramBucketLimit: - appErrs.numHistogramBucketLimit++ - level.Debug(sl.l).Log("msg", "Exceeded bucket limit for native histograms", "series", string(met)) - targetScrapeNativeHistogramBucketLimit.Inc() - return false, nil case errSampleLimit: // Keep on parsing output if we hit the limit, so we report the correct // total number of samples scraped. *sampleLimitErr = err return false, nil + case errBucketLimit: + // Keep on parsing output if we hit the limit, so we report the correct + // total number of samples scraped. + *bucketLimitErr = err + return false, nil default: return false, err } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index b5cabea52ef..8822ed96de4 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1788,7 +1788,10 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { now = time.Now() total, added, seriesAdded, err = sl.append(app, msg, "application/vnd.google.protobuf", now) - require.NoError(t, err) + if err != errBucketLimit { + t.Fatalf("Did not see expected histogram bucket limit error: %s", err) + } + require.NoError(t, app.Rollback()) require.Equal(t, 1, total) require.Equal(t, 1, added) require.Equal(t, 0, seriesAdded) @@ -3010,7 +3013,7 @@ func TestReuseCacheRace(*testing.T) { func TestCheckAddError(t *testing.T) { var appErrs appendErrors sl := scrapeLoop{l: log.NewNopLogger()} - sl.checkAddError(nil, nil, nil, storage.ErrOutOfOrderSample, nil, &appErrs) + sl.checkAddError(nil, nil, nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) require.Equal(t, 1, appErrs.numOutOfOrder) } diff --git a/scrape/target.go b/scrape/target.go index f916e454903..a655e85413c 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -314,7 +314,10 @@ func (ts Targets) Len() int { return len(ts) } func (ts Targets) Less(i, j int) bool { return ts[i].URL().String() < ts[j].URL().String() } func (ts Targets) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] } -var errSampleLimit = errors.New("sample limit exceeded") +var ( + errSampleLimit = errors.New("sample limit exceeded") + errBucketLimit = errors.New("histogram bucket limit exceeded") +) // limitAppender limits the number of total appended samples in a batch. type limitAppender struct { @@ -366,12 +369,12 @@ type bucketLimitAppender struct { func (app *bucketLimitAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { if h != nil { if len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit { - return 0, storage.ErrHistogramBucketLimit + return 0, errBucketLimit } } if fh != nil { if len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit { - return 0, storage.ErrHistogramBucketLimit + return 0, errBucketLimit } } ref, err := app.Appender.AppendHistogram(ref, lset, t, h, fh) diff --git a/storage/interface.go b/storage/interface.go index 18119f25477..b282f1fc628 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -46,7 +46,6 @@ var ( ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") - ErrHistogramBucketLimit = errors.New("histogram bucket limit exceeded") ) // SeriesRef is a generic series reference. In prometheus it is either a From dfabc69303a92804883f597570569b0e95fd9836 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Tue, 25 Apr 2023 01:41:04 +0800 Subject: [PATCH 5/8] Add tests according to code review Signed-off-by: Jeanette Tan --- scrape/scrape_test.go | 38 +++++++++++++++------------ scrape/target_test.go | 61 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 8822ed96de4..745765ee8f5 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1742,18 +1742,24 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { require.NoError(t, err) beforeMetricValue := metric.GetCounter().GetValue() - nativeHistogram := prometheus.NewHistogram(prometheus.HistogramOpts{ - Namespace: "testing", - Name: "example_native_histogram", - Help: "This is used for testing", - ConstLabels: map[string]string{"some": "value"}, - NativeHistogramBucketFactor: 1.1, // 10% increase from bucket to bucket - NativeHistogramMaxBucketNumber: 100, // intentionally higher than the limit we'll use in the scraper - }) + nativeHistogram := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "testing", + Name: "example_native_histogram", + Help: "This is used for testing", + ConstLabels: map[string]string{"some": "value"}, + NativeHistogramBucketFactor: 1.1, // 10% increase from bucket to bucket + NativeHistogramMaxBucketNumber: 100, // intentionally higher than the limit we'll use in the scraper + }, + []string{"size"}, + ) registry := prometheus.NewRegistry() registry.Register(nativeHistogram) - nativeHistogram.Observe(1.0) - nativeHistogram.Observe(10.0) // in different bucket since > 1*1.1 + nativeHistogram.WithLabelValues("S").Observe(1.0) + nativeHistogram.WithLabelValues("M").Observe(1.0) + nativeHistogram.WithLabelValues("L").Observe(1.0) + nativeHistogram.WithLabelValues("M").Observe(10.0) + nativeHistogram.WithLabelValues("L").Observe(10.0) // in different bucket since > 1*1.1 gathered, err := registry.Gather() require.NoError(t, err) @@ -1766,9 +1772,9 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { now := time.Now() total, added, seriesAdded, err := sl.append(app, msg, "application/vnd.google.protobuf", now) require.NoError(t, err) - require.Equal(t, 1, total) - require.Equal(t, 1, added) - require.Equal(t, 1, seriesAdded) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 3, seriesAdded) err = targetScrapeNativeHistogramBucketLimit.Write(&metric) require.NoError(t, err) @@ -1776,7 +1782,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { require.Equal(t, beforeMetricValue, metricValue) beforeMetricValue = metricValue - nativeHistogram.Observe(100.0) // in different bucket since > 10*1.1 + nativeHistogram.WithLabelValues("L").Observe(100.0) // in different bucket since > 10*1.1 gathered, err = registry.Gather() require.NoError(t, err) @@ -1792,8 +1798,8 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { t.Fatalf("Did not see expected histogram bucket limit error: %s", err) } require.NoError(t, app.Rollback()) - require.Equal(t, 1, total) - require.Equal(t, 1, added) + require.Equal(t, 3, total) + require.Equal(t, 3, added) require.Equal(t, 0, seriesAdded) err = targetScrapeNativeHistogramBucketLimit.Write(&metric) diff --git a/scrape/target_test.go b/scrape/target_test.go index 9d25df41490..12d3b5a4d7f 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -31,6 +31,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" ) @@ -488,3 +489,63 @@ scrape_configs: }) } } + +func TestBucketLimitAppender(t *testing.T) { + example := histogram.Histogram{ + Schema: 0, + Count: 21, + Sum: 33, + ZeroThreshold: 0.001, + ZeroCount: 3, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{3, 0, 0}, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + NegativeBuckets: []int64{3, 0, 0}, + } + + cases := []struct { + h histogram.Histogram + limit int + expectError bool + }{ + { + h: example, + limit: 3, + expectError: true, + }, + { + h: example, + limit: 10, + expectError: false, + }, + } + + resApp := &collectResultAppender{} + + for _, c := range cases { + for _, floatHisto := range []bool{true, false} { + t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { + app := &bucketLimitAppender{Appender: resApp, limit: c.limit} + ts := int64(10 * time.Minute / time.Millisecond) + h := c.h + lbls := labels.FromStrings("__name__", "sparse_histogram_series") + var err error + if floatHisto { + _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat()) + } else { + _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) + } + if c.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + }) + } + } +} From e9b2d874431ca464f55ea11685e3278e4c19045a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Thu, 4 May 2023 08:16:37 +0200 Subject: [PATCH 6/8] Revert change to model/textparse/protobufparse_test.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- model/textparse/protobufparse_test.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index fb8669197e3..90c6a90f326 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -15,6 +15,7 @@ package textparse import ( "bytes" + "encoding/binary" "errors" "io" "testing" @@ -25,9 +26,8 @@ import ( "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" - dto "github.com/prometheus/client_model/go" + dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) func TestProtobufParse(t *testing.T) { @@ -449,6 +449,7 @@ metric: < `, } + varintBuf := make([]byte, binary.MaxVarintLen32) inputBuf := &bytes.Buffer{} for _, tmf := range textMetricFamilies { @@ -456,8 +457,13 @@ metric: < // From text to proto message. require.NoError(t, proto.UnmarshalText(tmf, pb)) // From proto message to binary protobuf. - err := testutil.AddMetricFamilyToProtobuf(inputBuf, pb) + protoBuf, err := proto.Marshal(pb) require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + inputBuf.Write(varintBuf[:varintLength]) + inputBuf.Write(protoBuf) } exp := []struct { From 19a4f314f53a80e653de6fcbf6b90d70a35fc93d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Thu, 4 May 2023 08:36:44 +0200 Subject: [PATCH 7/8] Refactor testutil/protobuf.go into scrape package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renamed to clientprotobuf.go and added comments to indicate the intended usage. Signed-off-by: György Krajcsovits --- util/testutil/protobuf.go => scrape/clientprotobuf.go | 9 ++++++--- scrape/scrape_test.go | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) rename util/testutil/protobuf.go => scrape/clientprotobuf.go (78%) diff --git a/util/testutil/protobuf.go b/scrape/clientprotobuf.go similarity index 78% rename from util/testutil/protobuf.go rename to scrape/clientprotobuf.go index 8650f137301..bf165e03437 100644 --- a/util/testutil/protobuf.go +++ b/scrape/clientprotobuf.go @@ -11,17 +11,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -package testutil +package scrape import ( "bytes" "encoding/binary" "github.com/gogo/protobuf/proto" + // Intentionally using client model to simulate client in tests. dto "github.com/prometheus/client_model/go" ) -// Write a MetricFamily into a protobuf +// Write a MetricFamily into a protobuf. +// This function is intended for testing scraping by providing protobuf serialized input. func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) { buffer := &bytes.Buffer{} err := AddMetricFamilyToProtobuf(buffer, metricFamily) @@ -31,7 +33,8 @@ func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) { return buffer.Bytes(), nil } -// Append a MetricFamily protobuf representation to a buffer +// Append a MetricFamily protobuf representation to a buffer. +// This function is intended for testing scraping by providing protobuf serialized input. func AddMetricFamilyToProtobuf(buffer *bytes.Buffer, metricFamily *dto.MetricFamily) error { protoBuf, err := proto.Marshal(metricFamily) if err != nil { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 745765ee8f5..fc3ad926eb6 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1766,7 +1766,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { require.NotEmpty(t, gathered) histogramMetricFamily := gathered[0] - msg, err := testutil.MetricFamilyToProtobuf(histogramMetricFamily) + msg, err := MetricFamilyToProtobuf(histogramMetricFamily) require.NoError(t, err) now := time.Now() @@ -1789,7 +1789,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { require.NotEmpty(t, gathered) histogramMetricFamily = gathered[0] - msg, err = testutil.MetricFamilyToProtobuf(histogramMetricFamily) + msg, err = MetricFamilyToProtobuf(histogramMetricFamily) require.NoError(t, err) now = time.Now() From 40240c9c1cb290fe95f1e61886b23fab860aeacd Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Fri, 5 May 2023 02:29:50 +0800 Subject: [PATCH 8/8] Update according to code review Signed-off-by: Jeanette Tan --- config/config.go | 2 +- docs/configuration/configuration.md | 8 ++++---- scrape/clientprotobuf.go | 1 + scrape/scrape.go | 14 +++++++------- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/config/config.go b/config/config.go index 31ebb288e30..1b6a6cf6b60 100644 --- a/config/config.go +++ b/config/config.go @@ -491,7 +491,7 @@ type ScrapeConfig struct { LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` // More than this many buckets in a native histogram will cause the scrape to // fail. - NativeHistogramBucketLimit uint `yaml:"bucket_limit,omitempty"` + NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index cfb11e21e46..0a8c4a5cdf7 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -377,10 +377,10 @@ metric_relabel_configs: # change in the future. [ target_limit: | default = 0 ] -# Limit on total number of positive and negative buckets allowed in a native -# histogram. If this is exceeded, the entire scrape will be treated as failed. -# 0 means no limit. -[ sample_limit: | default = 0 ] +# Limit on total number of positive and negative buckets allowed in a single +# native histogram. If this is exceeded, the entire scrape will be treated as +# failed. 0 means no limit. +[ native_histogram_bucket_limit: | default = 0 ] ``` Where `` must be unique across all scrape configurations. diff --git a/scrape/clientprotobuf.go b/scrape/clientprotobuf.go index bf165e03437..2213268d59c 100644 --- a/scrape/clientprotobuf.go +++ b/scrape/clientprotobuf.go @@ -18,6 +18,7 @@ import ( "encoding/binary" "github.com/gogo/protobuf/proto" + // Intentionally using client model to simulate client in tests. dto "github.com/prometheus/client_model/go" ) diff --git a/scrape/scrape.go b/scrape/scrape.go index 179adbb2a2b..f094ee82576 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -193,8 +193,8 @@ var ( ) targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter( prometheus.CounterOpts{ - Name: "prometheus_target_scrapes_histogram_exceeded_bucket_limit_total", - Help: "Total number of scrapes that hit the native histograms bucket limit and were rejected.", + Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total", + Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.", }, ) ) @@ -744,17 +744,17 @@ func mutateReportSampleLabels(lset labels.Labels, target *Target) labels.Labels } // appender returns an appender for ingested samples from the target. -func appender(app storage.Appender, limit, bucketLimit int) storage.Appender { +func appender(app storage.Appender, sampleLimit, bucketLimit int) storage.Appender { app = &timeLimitAppender{ Appender: app, maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)), } - // The limit is applied after metrics are potentially dropped via relabeling. - if limit > 0 { + // The sampleLimit is applied after metrics are potentially dropped via relabeling. + if sampleLimit > 0 { app = &limitAppender{ Appender: app, - limit: limit, + limit: sampleLimit, } } @@ -1707,7 +1707,7 @@ loop: } if bucketLimitErr != nil { if err == nil { - err = bucketLimitErr // if sample limit is hit, that error takes precedence + err = bucketLimitErr // If sample limit is hit, that error takes precedence. } // We only want to increment this once per scrape, so this is Inc'd outside the loop. targetScrapeNativeHistogramBucketLimit.Inc()