scrape: make the initial scrape offset configurable

This change changes the `NoJitter` boolean configuration to a confiurable `InitialScrapeOffset` option. This will let serverless setups configure more freely how it will want to scrape workloads. This was needed because in some deployments in serverless environments the scraper (colocated with the workload) might not want to wait an entire scrape interval (since the target could be shortlived) but also might not want to scrape immediately when the target is not ready. Making this configurable lets the scrape controller (OpenTelemetry Contrib's prometheusreceiver) choose that is appropriate for it depending on its environment. Signed-off-by: Ridwan Sharif <ridwanmsharif@google.com>
GoogleCloudPlatform · Oct 20, 2023 · 842a791 · 842a791
1 parent 8600c14
commit 842a791
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 47 deletions.
diff --git a/scrape/manager.go b/scrape/manager.go
@@ -141,12 +141,18 @@ type Options struct {
 	// Optional HTTP client options to use when scraping.
 	HTTPClientOptions []config_util.HTTPClientOption
 
-	// IgnoreJitter causes all targets managed by this manager to be scraped
-	// as soon as they are discovered. By default, all targets have offset,
-	// so we spread the scraping load evenly within Prometheus server.
+	// InitialScrapeOffset controls how long after startup we should scrape all
+	// targets.  By default, all targets have an offset so we spread the
+	// scraping load evenly within the Prometheus server. Configuring this will
+	// make it so all targets have the same configured offset, which may be
+	// undesirable as load is no longer evenly spread.  This is useful however
+	// in serverless deployments where we're sensitive to the intitial offsets
+	// and would like them to be small and configurable.
+	//
 	// NOTE(bwplotka): This option is experimental and not used by Prometheus.
-	// It was created for serverless flavors of OpenTelemetry contrib's prometheusreceiver.
-	IgnoreJitter bool
+	// It was created for serverless flavors of OpenTelemetry contrib's
+	// prometheusreceiver.
+	InitialScrapeOffset *time.Duration
 }
 
 // Manager maintains a set of scrape pools and manages start/stop cycles

diff --git a/scrape/manager_test.go b/scrape/manager_test.go
@@ -15,7 +15,6 @@ package scrape
 
 import (
 	"context"
-	"errors"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
@@ -34,7 +33,6 @@ import (
 	"github.com/prometheus/prometheus/discovery/targetgroup"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/relabel"
-	"github.com/prometheus/prometheus/util/runutil"
 )
 
 func TestPopulateLabels(t *testing.T) {
@@ -711,47 +709,88 @@ scrape_configs:
 }
 
 func TestManagerStopAfterScrapeAttempt(t *testing.T) {
+	noOffset := 0 * time.Nanosecond
+	largeOffset := 99 * time.Hour
+	oneSecondOffset := 1 * time.Second
+	tenSecondOffset := 10 * time.Second
 	for _, tcase := range []struct {
-		name            string
-		noJitter        bool
-		stop            func(m *Manager)
+		name string
+		// initialScrapeOffset defines how long to wait before scraping all targets.
+		initialScrapeOffset *time.Duration
+		// stopDelay defines how long the scrape loop should run before the the stopFunc is run.
+		stopDelay time.Duration
+		// stopFunc controls how the manager should be stopped.
+		stopFunc        func(m *Manager)
 		expectedSamples int
 	}{
 		{
-			name:            "no scrape stop, no jitter",
-			noJitter:        true,
-			stop:            func(m *Manager) { m.Stop() },
-			expectedSamples: 1,
+			name:                "no scrape on stop, no jitter",
+			initialScrapeOffset: &noOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.Stop() },
+			expectedSamples:     1,
 		},
 		{
 			name:            "no scrape on stop, with jitter",
-			stop:            func(m *Manager) { m.Stop() },
+			stopDelay:       5 * time.Second,
+			stopFunc:        func(m *Manager) { m.Stop() },
 			expectedSamples: 0,
 		},
 		{
-			name:            "scrape on stop, no jitter",
-			noJitter:        true,
-			stop:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
-			expectedSamples: 2,
+			name:                "scrape on stop, no jitter",
+			initialScrapeOffset: &noOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
+			expectedSamples:     2,
 		},
 		{
-			name:            "scrape on stop, but initial sample is fresh enough, no jitter",
-			noJitter:        true,
-			stop:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now().Add(-1 * time.Hour)) },
-			expectedSamples: 1,
+			name:                "scrape on stop, but initial sample is fresh enough, no jitter",
+			initialScrapeOffset: &noOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now().Add(-1 * time.Hour)) },
+			expectedSamples:     1,
 		},
 		{
 			name:            "scrape on stop, with jitter",
-			stop:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
+			stopDelay:       5 * time.Second,
+			stopFunc:        func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
 			expectedSamples: 1,
 		},
+		{
+			name:                "scrape on stop, with large offset",
+			initialScrapeOffset: &largeOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
+			expectedSamples:     1,
+		},
+		{
+			name:                "scrape on stop after 5s, with offset of 1s",
+			initialScrapeOffset: &oneSecondOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
+			expectedSamples:     2,
+		},
+		{
+			name:                "scrape on stop after 5s, with offset of 10s",
+			initialScrapeOffset: &tenSecondOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
+			expectedSamples:     1,
+		},
+		{
+			name:                "no scrape on stop, with offset of 10s",
+			initialScrapeOffset: &tenSecondOffset,
+			stopDelay:           5 * time.Second,
+			stopFunc:            func(m *Manager) { m.Stop() },
+			expectedSamples:     0,
+		},
 	} {
 		t.Run(tcase.name, func(t *testing.T) {
 			app := &collectResultAppender{}
 
 			// Setup scrape manager.
 			scrapeManager := NewManager(&Options{
-				IgnoreJitter: tcase.noJitter,
+				InitialScrapeOffset: tcase.initialScrapeOffset,
 
 				// Extremely high value to turn it off. We don't want to wait minimum 5s, so
 				// we reload manually.
@@ -795,27 +834,10 @@ func TestManagerStopAfterScrapeAttempt(t *testing.T) {
 			})
 			scrapeManager.reload()
 
-			// At this point the first sample is scheduled to be scraped after the initial
-			// jitter in the background scrape loop go-routine
-			//
-			// With jitter the first sample will appear after long time,
-			// given the extremely long scrape interval configured. We stop right
-			// away and expect only the last sample due to stop.
-			//
-			// With no jitter setting, we expect the first to be added straight away--wait
-			// for it, before stopping.
-			if tcase.noJitter {
-				ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error {
-					if countFloatSamples(app, "expected_metric") < 1 {
-						return errors.New("expected more then one expected_metric sample")
-					}
-					return nil
-				}), "after 5 seconds")
-			}
+			// Wait for the defined stop delay, before stopping.
+			time.Sleep(tcase.stopDelay)
 
-			tcase.stop(scrapeManager)
+			tcase.stopFunc(scrapeManager)
 
 			require.Equal(t, tcase.expectedSamples, countFloatSamples(app, "expected_metric"))
 		})

diff --git a/scrape/scrape.go b/scrape/scrape.go
@@ -1239,8 +1239,8 @@ func (sl *scrapeLoop) run(errc chan<- error) {
 	defer close(sl.stopAfterScrapeAttemptCh)
 
 	jitterDelayTime := sl.scraper.offset(sl.interval, sl.offsetSeed)
-	if sl.opts.IgnoreJitter {
-		jitterDelayTime = 0 * time.Second
+	if sl.opts.InitialScrapeOffset != nil {
+		jitterDelayTime = *sl.opts.InitialScrapeOffset
 	}
 
 	select {