Skip to content

Commit

Permalink
scrape: make the initial scrape offset configurable
Browse files Browse the repository at this point in the history
This change changes the `NoJitter` boolean configuration to a
confiurable `InitialScrapeOffset` option. This will let serverless
setups configure more freely how it will want to scrape workloads.

This was needed because in some deployments in serverless environments
the scraper (colocated with the workload) might not want to wait an
entire scrape interval (since the target could be shortlived) but also
might not want to scrape immediately when the target is not ready.

Making this configurable lets the scrape controller (OpenTelemetry
Contrib's prometheusreceiver) choose that is appropriate for it
depending on its environment.

Signed-off-by: Ridwan Sharif <ridwanmsharif@google.com>
  • Loading branch information
ridwanmsharif authored and bwplotka committed Oct 20, 2023
1 parent 8600c14 commit 842a791
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 47 deletions.
16 changes: 11 additions & 5 deletions scrape/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,18 @@ type Options struct {
// Optional HTTP client options to use when scraping.
HTTPClientOptions []config_util.HTTPClientOption

// IgnoreJitter causes all targets managed by this manager to be scraped
// as soon as they are discovered. By default, all targets have offset,
// so we spread the scraping load evenly within Prometheus server.
// InitialScrapeOffset controls how long after startup we should scrape all
// targets. By default, all targets have an offset so we spread the
// scraping load evenly within the Prometheus server. Configuring this will
// make it so all targets have the same configured offset, which may be
// undesirable as load is no longer evenly spread. This is useful however
// in serverless deployments where we're sensitive to the intitial offsets
// and would like them to be small and configurable.
//
// NOTE(bwplotka): This option is experimental and not used by Prometheus.
// It was created for serverless flavors of OpenTelemetry contrib's prometheusreceiver.
IgnoreJitter bool
// It was created for serverless flavors of OpenTelemetry contrib's
// prometheusreceiver.
InitialScrapeOffset *time.Duration
}

// Manager maintains a set of scrape pools and manages start/stop cycles
Expand Down
102 changes: 62 additions & 40 deletions scrape/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ package scrape

import (
"context"
"errors"
"net/http"
"net/http/httptest"
"net/url"
Expand All @@ -34,7 +33,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/util/runutil"
)

func TestPopulateLabels(t *testing.T) {
Expand Down Expand Up @@ -711,47 +709,88 @@ scrape_configs:
}

func TestManagerStopAfterScrapeAttempt(t *testing.T) {
noOffset := 0 * time.Nanosecond
largeOffset := 99 * time.Hour
oneSecondOffset := 1 * time.Second
tenSecondOffset := 10 * time.Second
for _, tcase := range []struct {
name string
noJitter bool
stop func(m *Manager)
name string
// initialScrapeOffset defines how long to wait before scraping all targets.
initialScrapeOffset *time.Duration
// stopDelay defines how long the scrape loop should run before the the stopFunc is run.
stopDelay time.Duration
// stopFunc controls how the manager should be stopped.
stopFunc func(m *Manager)
expectedSamples int
}{
{
name: "no scrape stop, no jitter",
noJitter: true,
stop: func(m *Manager) { m.Stop() },
expectedSamples: 1,
name: "no scrape on stop, no jitter",
initialScrapeOffset: &noOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.Stop() },
expectedSamples: 1,
},
{
name: "no scrape on stop, with jitter",
stop: func(m *Manager) { m.Stop() },
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.Stop() },
expectedSamples: 0,
},
{
name: "scrape on stop, no jitter",
noJitter: true,
stop: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
expectedSamples: 2,
name: "scrape on stop, no jitter",
initialScrapeOffset: &noOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
expectedSamples: 2,
},
{
name: "scrape on stop, but initial sample is fresh enough, no jitter",
noJitter: true,
stop: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now().Add(-1 * time.Hour)) },
expectedSamples: 1,
name: "scrape on stop, but initial sample is fresh enough, no jitter",
initialScrapeOffset: &noOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now().Add(-1 * time.Hour)) },
expectedSamples: 1,
},
{
name: "scrape on stop, with jitter",
stop: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
expectedSamples: 1,
},
{
name: "scrape on stop, with large offset",
initialScrapeOffset: &largeOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
expectedSamples: 1,
},
{
name: "scrape on stop after 5s, with offset of 1s",
initialScrapeOffset: &oneSecondOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
expectedSamples: 2,
},
{
name: "scrape on stop after 5s, with offset of 10s",
initialScrapeOffset: &tenSecondOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.StopAfterScrapeAttempt(time.Now()) },
expectedSamples: 1,
},
{
name: "no scrape on stop, with offset of 10s",
initialScrapeOffset: &tenSecondOffset,
stopDelay: 5 * time.Second,
stopFunc: func(m *Manager) { m.Stop() },
expectedSamples: 0,
},
} {
t.Run(tcase.name, func(t *testing.T) {
app := &collectResultAppender{}

// Setup scrape manager.
scrapeManager := NewManager(&Options{
IgnoreJitter: tcase.noJitter,
InitialScrapeOffset: tcase.initialScrapeOffset,

// Extremely high value to turn it off. We don't want to wait minimum 5s, so
// we reload manually.
Expand Down Expand Up @@ -795,27 +834,10 @@ func TestManagerStopAfterScrapeAttempt(t *testing.T) {
})
scrapeManager.reload()

// At this point the first sample is scheduled to be scraped after the initial
// jitter in the background scrape loop go-routine
//
// With jitter the first sample will appear after long time,
// given the extremely long scrape interval configured. We stop right
// away and expect only the last sample due to stop.
//
// With no jitter setting, we expect the first to be added straight away--wait
// for it, before stopping.
if tcase.noJitter {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error {
if countFloatSamples(app, "expected_metric") < 1 {
return errors.New("expected more then one expected_metric sample")
}
return nil
}), "after 5 seconds")
}
// Wait for the defined stop delay, before stopping.
time.Sleep(tcase.stopDelay)

tcase.stop(scrapeManager)
tcase.stopFunc(scrapeManager)

require.Equal(t, tcase.expectedSamples, countFloatSamples(app, "expected_metric"))
})
Expand Down
4 changes: 2 additions & 2 deletions scrape/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -1239,8 +1239,8 @@ func (sl *scrapeLoop) run(errc chan<- error) {
defer close(sl.stopAfterScrapeAttemptCh)

jitterDelayTime := sl.scraper.offset(sl.interval, sl.offsetSeed)
if sl.opts.IgnoreJitter {
jitterDelayTime = 0 * time.Second
if sl.opts.InitialScrapeOffset != nil {
jitterDelayTime = *sl.opts.InitialScrapeOffset
}

select {
Expand Down

0 comments on commit 842a791

Please sign in to comment.