From 7d501729c37124018a1e852a7732396f7d89bd7c Mon Sep 17 00:00:00 2001 From: Dylan Guedes Date: Thu, 13 Jan 2022 05:55:17 -0300 Subject: [PATCH] Add missing forked gateway tests (#5118) * Add missing tests for `storegateway`. * Fork cortex tsdb testutil package. * Fix import lint. --- pkg/storage/tsdb/testutil/block_mock.go | 68 + pkg/storage/tsdb/testutil/objstore.go | 26 + .../bucket_index_metadata_fetcher_test.go | 323 +++++ .../bucket_store_inmemory_server.go | 66 + pkg/storegateway/bucket_store_metrics_test.go | 510 ++++++++ pkg/storegateway/bucket_stores_test.go | 616 +++++++++ pkg/storegateway/chunk_bytes_pool_test.go | 37 + pkg/storegateway/gateway_ring_test.go | 72 ++ pkg/storegateway/gateway_test.go | 1132 +++++++++++++++++ .../metadata_fetcher_filters_test.go | 107 ++ .../metadata_fetcher_metrics_test.go | 111 ++ pkg/storegateway/partitioner_test.go | 58 + pkg/storegateway/sharding_strategy_test.go | 670 ++++++++++ 13 files changed, 3796 insertions(+) create mode 100644 pkg/storage/tsdb/testutil/block_mock.go create mode 100644 pkg/storage/tsdb/testutil/objstore.go create mode 100644 pkg/storegateway/bucket_index_metadata_fetcher_test.go create mode 100644 pkg/storegateway/bucket_store_inmemory_server.go create mode 100644 pkg/storegateway/bucket_store_metrics_test.go create mode 100644 pkg/storegateway/bucket_stores_test.go create mode 100644 pkg/storegateway/chunk_bytes_pool_test.go create mode 100644 pkg/storegateway/gateway_ring_test.go create mode 100644 pkg/storegateway/gateway_test.go create mode 100644 pkg/storegateway/metadata_fetcher_filters_test.go create mode 100644 pkg/storegateway/metadata_fetcher_metrics_test.go create mode 100644 pkg/storegateway/partitioner_test.go create mode 100644 pkg/storegateway/sharding_strategy_test.go diff --git a/pkg/storage/tsdb/testutil/block_mock.go b/pkg/storage/tsdb/testutil/block_mock.go new file mode 100644 index 0000000000000..a2a931aa59cd7 --- /dev/null +++ b/pkg/storage/tsdb/testutil/block_mock.go @@ -0,0 +1,68 @@ +package testutil + +import ( + "context" + "crypto/rand" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + "github.com/oklog/ulid" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/objstore" +) + +func MockStorageBlock(t testing.TB, bucket objstore.Bucket, userID string, minT, maxT int64) tsdb.BlockMeta { + // Generate a block ID whose timestamp matches the maxT (for simplicity we assume it + // has been compacted and shipped in zero time, even if not realistic). + id := ulid.MustNew(uint64(maxT), rand.Reader) + + meta := tsdb.BlockMeta{ + Version: 1, + ULID: id, + MinTime: minT, + MaxTime: maxT, + Compaction: tsdb.BlockMetaCompaction{ + Level: 1, + Sources: []ulid.ULID{id}, + }, + } + + metaContent, err := json.Marshal(meta) + if err != nil { + panic("failed to marshal mocked block meta") + } + + metaContentReader := strings.NewReader(string(metaContent)) + metaPath := fmt.Sprintf("%s/%s/meta.json", userID, id.String()) + require.NoError(t, bucket.Upload(context.Background(), metaPath, metaContentReader)) + + // Upload an empty index, just to make sure the meta.json is not the only object in the block location. + indexPath := fmt.Sprintf("%s/%s/index", userID, id.String()) + require.NoError(t, bucket.Upload(context.Background(), indexPath, strings.NewReader(""))) + + return meta +} + +func MockStorageDeletionMark(t testing.TB, bucket objstore.Bucket, userID string, meta tsdb.BlockMeta) *metadata.DeletionMark { + mark := metadata.DeletionMark{ + ID: meta.ULID, + DeletionTime: time.Now().Add(-time.Minute).Unix(), + Version: metadata.DeletionMarkVersion1, + } + + markContent, err := json.Marshal(mark) + if err != nil { + panic("failed to marshal mocked block meta") + } + + markContentReader := strings.NewReader(string(markContent)) + markPath := fmt.Sprintf("%s/%s/%s", userID, meta.ULID.String(), metadata.DeletionMarkFilename) + require.NoError(t, bucket.Upload(context.Background(), markPath, markContentReader)) + + return &mark +} diff --git a/pkg/storage/tsdb/testutil/objstore.go b/pkg/storage/tsdb/testutil/objstore.go new file mode 100644 index 0000000000000..94d12aed64782 --- /dev/null +++ b/pkg/storage/tsdb/testutil/objstore.go @@ -0,0 +1,26 @@ +package testutil + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/objstore" + + "github.com/cortexproject/cortex/pkg/storage/bucket/filesystem" +) + +func PrepareFilesystemBucket(t testing.TB) (objstore.Bucket, string) { + storageDir, err := ioutil.TempDir(os.TempDir(), "bucket") + require.NoError(t, err) + + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(storageDir)) + }) + + bkt, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + return objstore.BucketWithMetrics("test", bkt, nil), storageDir +} diff --git a/pkg/storegateway/bucket_index_metadata_fetcher_test.go b/pkg/storegateway/bucket_index_metadata_fetcher_test.go new file mode 100644 index 0000000000000..617cfb343dd49 --- /dev/null +++ b/pkg/storegateway/bucket_index_metadata_fetcher_test.go @@ -0,0 +1,323 @@ +package storegateway + +import ( + "bytes" + "context" + "path" + "strings" + "testing" + "time" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" + "github.com/go-kit/log" + "github.com/grafana/dskit/concurrency" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + + tsdb_testutil "github.com/grafana/loki/pkg/storage/tsdb/testutil" +) + +func TestBucketIndexMetadataFetcher_Fetch(t *testing.T) { + const userID = "user-1" + + bkt, _ := tsdb_testutil.PrepareFilesystemBucket(t) + reg := prometheus.NewPedanticRegistry() + ctx := context.Background() + now := time.Now() + logs := &concurrency.SyncBuffer{} + logger := log.NewLogfmtLogger(logs) + + // Create a bucket index. + block1 := &bucketindex.Block{ID: ulid.MustNew(1, nil)} + block2 := &bucketindex.Block{ID: ulid.MustNew(2, nil)} + block3 := &bucketindex.Block{ID: ulid.MustNew(3, nil)} + mark1 := &bucketindex.BlockDeletionMark{ID: block1.ID, DeletionTime: now.Add(-time.Hour).Unix()} // Below the ignore delay threshold. + mark2 := &bucketindex.BlockDeletionMark{ID: block2.ID, DeletionTime: now.Add(-3 * time.Hour).Unix()} // Above the ignore delay threshold. + + require.NoError(t, bucketindex.WriteIndex(ctx, bkt, userID, nil, &bucketindex.Index{ + Version: bucketindex.IndexVersion1, + Blocks: bucketindex.Blocks{block1, block2, block3}, + BlockDeletionMarks: bucketindex.BlockDeletionMarks{mark1, mark2}, + UpdatedAt: now.Unix(), + })) + + // Create a metadata fetcher with filters. + filters := []block.MetadataFilter{ + NewIgnoreDeletionMarkFilter(logger, bucket.NewUserBucketClient(userID, bkt, nil), 2*time.Hour, 1), + } + + fetcher := NewBucketIndexMetadataFetcher(userID, bkt, NewNoShardingStrategy(), nil, logger, reg, filters, nil) + metas, partials, err := fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Equal(t, map[ulid.ULID]*metadata.Meta{ + block1.ID: block1.ThanosMeta(userID), + block3.ID: block3.ThanosMeta(userID), + }, metas) + assert.Empty(t, partials) + assert.Empty(t, logs) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_modified Number of blocks whose metadata changed + # TYPE blocks_meta_modified gauge + blocks_meta_modified{modified="replica-label-removed"} 0 + + # HELP blocks_meta_sync_failures_total Total blocks metadata synchronization failures + # TYPE blocks_meta_sync_failures_total counter + blocks_meta_sync_failures_total 0 + + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 0 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 2 + blocks_meta_synced{state="marked-for-deletion"} 1 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 0 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + + # HELP blocks_meta_syncs_total Total blocks metadata synchronization attempts + # TYPE blocks_meta_syncs_total counter + blocks_meta_syncs_total 1 + `), + "blocks_meta_modified", + "blocks_meta_sync_failures_total", + "blocks_meta_synced", + "blocks_meta_syncs_total", + )) +} + +func TestBucketIndexMetadataFetcher_Fetch_NoBucketIndex(t *testing.T) { + const userID = "user-1" + + bkt, _ := tsdb_testutil.PrepareFilesystemBucket(t) + reg := prometheus.NewPedanticRegistry() + ctx := context.Background() + logs := &concurrency.SyncBuffer{} + logger := log.NewLogfmtLogger(logs) + + fetcher := NewBucketIndexMetadataFetcher(userID, bkt, NewNoShardingStrategy(), nil, logger, reg, nil, nil) + metas, partials, err := fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Empty(t, metas) + assert.Empty(t, partials) + assert.Empty(t, logs) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_modified Number of blocks whose metadata changed + # TYPE blocks_meta_modified gauge + blocks_meta_modified{modified="replica-label-removed"} 0 + + # HELP blocks_meta_sync_failures_total Total blocks metadata synchronization failures + # TYPE blocks_meta_sync_failures_total counter + blocks_meta_sync_failures_total 0 + + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 0 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 0 + blocks_meta_synced{state="marked-for-deletion"} 0 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 1 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + + # HELP blocks_meta_syncs_total Total blocks metadata synchronization attempts + # TYPE blocks_meta_syncs_total counter + blocks_meta_syncs_total 1 + `), + "blocks_meta_modified", + "blocks_meta_sync_failures_total", + "blocks_meta_synced", + "blocks_meta_syncs_total", + )) +} + +func TestBucketIndexMetadataFetcher_Fetch_CorruptedBucketIndex(t *testing.T) { + const userID = "user-1" + + bkt, _ := tsdb_testutil.PrepareFilesystemBucket(t) + reg := prometheus.NewPedanticRegistry() + ctx := context.Background() + logs := &concurrency.SyncBuffer{} + logger := log.NewLogfmtLogger(logs) + + // Upload a corrupted bucket index. + require.NoError(t, bkt.Upload(ctx, path.Join(userID, bucketindex.IndexCompressedFilename), strings.NewReader("invalid}!"))) + + fetcher := NewBucketIndexMetadataFetcher(userID, bkt, NewNoShardingStrategy(), nil, logger, reg, nil, nil) + metas, partials, err := fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Empty(t, metas) + assert.Empty(t, partials) + assert.Regexp(t, "corrupted bucket index found", logs) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_modified Number of blocks whose metadata changed + # TYPE blocks_meta_modified gauge + blocks_meta_modified{modified="replica-label-removed"} 0 + + # HELP blocks_meta_sync_failures_total Total blocks metadata synchronization failures + # TYPE blocks_meta_sync_failures_total counter + blocks_meta_sync_failures_total 0 + + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 1 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 0 + blocks_meta_synced{state="marked-for-deletion"} 0 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 0 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + + # HELP blocks_meta_syncs_total Total blocks metadata synchronization attempts + # TYPE blocks_meta_syncs_total counter + blocks_meta_syncs_total 1 + `), + "blocks_meta_modified", + "blocks_meta_sync_failures_total", + "blocks_meta_synced", + "blocks_meta_syncs_total", + )) +} + +func TestBucketIndexMetadataFetcher_Fetch_ShouldResetGaugeMetrics(t *testing.T) { + const userID = "user-1" + + bkt, _ := tsdb_testutil.PrepareFilesystemBucket(t) + reg := prometheus.NewPedanticRegistry() + ctx := context.Background() + now := time.Now() + logger := log.NewNopLogger() + strategy := &mockShardingStrategy{} + strategy.On("FilterUsers", mock.Anything, mock.Anything).Return([]string{userID}) + + // Corrupted bucket index. + require.NoError(t, bkt.Upload(ctx, path.Join(userID, bucketindex.IndexCompressedFilename), strings.NewReader("invalid}!"))) + + fetcher := NewBucketIndexMetadataFetcher(userID, bkt, strategy, nil, logger, reg, nil, nil) + metas, _, err := fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Len(t, metas, 0) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 1 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 0 + blocks_meta_synced{state="marked-for-deletion"} 0 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 0 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + `), "blocks_meta_synced")) + + // No bucket index. + require.NoError(t, bucketindex.DeleteIndex(ctx, bkt, userID, nil)) + + metas, _, err = fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Len(t, metas, 0) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 0 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 0 + blocks_meta_synced{state="marked-for-deletion"} 0 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 1 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + `), "blocks_meta_synced")) + + // Create a bucket index. + block1 := &bucketindex.Block{ID: ulid.MustNew(1, nil)} + block2 := &bucketindex.Block{ID: ulid.MustNew(2, nil)} + block3 := &bucketindex.Block{ID: ulid.MustNew(3, nil)} + + require.NoError(t, bucketindex.WriteIndex(ctx, bkt, userID, nil, &bucketindex.Index{ + Version: bucketindex.IndexVersion1, + Blocks: bucketindex.Blocks{block1, block2, block3}, + UpdatedAt: now.Unix(), + })) + + metas, _, err = fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Len(t, metas, 3) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 0 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 3 + blocks_meta_synced{state="marked-for-deletion"} 0 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 0 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + `), "blocks_meta_synced")) + + // Remove the tenant from the shard. + strategy = &mockShardingStrategy{} + strategy.On("FilterUsers", mock.Anything, mock.Anything).Return([]string{}) + fetcher.strategy = strategy + + metas, _, err = fetcher.Fetch(ctx) + require.NoError(t, err) + assert.Len(t, metas, 0) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP blocks_meta_synced Number of block metadata synced + # TYPE blocks_meta_synced gauge + blocks_meta_synced{state="corrupted-bucket-index"} 0 + blocks_meta_synced{state="corrupted-meta-json"} 0 + blocks_meta_synced{state="duplicate"} 0 + blocks_meta_synced{state="failed"} 0 + blocks_meta_synced{state="label-excluded"} 0 + blocks_meta_synced{state="loaded"} 0 + blocks_meta_synced{state="marked-for-deletion"} 0 + blocks_meta_synced{state="marked-for-no-compact"} 0 + blocks_meta_synced{state="no-bucket-index"} 0 + blocks_meta_synced{state="no-meta-json"} 0 + blocks_meta_synced{state="time-excluded"} 0 + blocks_meta_synced{state="too-fresh"} 0 + `), "blocks_meta_synced")) +} diff --git a/pkg/storegateway/bucket_store_inmemory_server.go b/pkg/storegateway/bucket_store_inmemory_server.go new file mode 100644 index 0000000000000..ff02afb44a725 --- /dev/null +++ b/pkg/storegateway/bucket_store_inmemory_server.go @@ -0,0 +1,66 @@ +package storegateway + +import ( + "context" + + "github.com/gogo/protobuf/types" + "github.com/pkg/errors" + "github.com/prometheus/prometheus/storage" + "github.com/thanos-io/thanos/pkg/store/hintspb" + "github.com/thanos-io/thanos/pkg/store/storepb" +) + +// bucketStoreSeriesServer is an fake in-memory gRPC server used to +// call Thanos BucketStore.Series() without having to go through the +// gRPC networking stack. +type bucketStoreSeriesServer struct { + // This field just exist to pseudo-implement the unused methods of the interface. + storepb.Store_SeriesServer + + ctx context.Context + + SeriesSet []*storepb.Series + Warnings storage.Warnings + Hints hintspb.SeriesResponseHints +} + +func newBucketStoreSeriesServer(ctx context.Context) *bucketStoreSeriesServer { + return &bucketStoreSeriesServer{ctx: ctx} +} + +func (s *bucketStoreSeriesServer) Send(r *storepb.SeriesResponse) error { + if r.GetWarning() != "" { + s.Warnings = append(s.Warnings, errors.New(r.GetWarning())) + } + + if rawHints := r.GetHints(); rawHints != nil { + // We expect only 1 hints entry so we just keep 1. + if err := types.UnmarshalAny(rawHints, &s.Hints); err != nil { + return errors.Wrap(err, "failed to unmarshal series hints") + } + } + + if recvSeries := r.GetSeries(); recvSeries != nil { + // Thanos uses a pool for the chunks and may use other pools in the future. + // Given we need to retain the reference after the pooled slices are recycled, + // we need to do a copy here. We prefer to stay on the safest side at this stage + // so we do a marshal+unmarshal to copy the whole series. + recvSeriesData, err := recvSeries.Marshal() + if err != nil { + return errors.Wrap(err, "marshal received series") + } + + copiedSeries := &storepb.Series{} + if err = copiedSeries.Unmarshal(recvSeriesData); err != nil { + return errors.Wrap(err, "unmarshal received series") + } + + s.SeriesSet = append(s.SeriesSet, copiedSeries) + } + + return nil +} + +func (s *bucketStoreSeriesServer) Context() context.Context { + return s.ctx +} diff --git a/pkg/storegateway/bucket_store_metrics_test.go b/pkg/storegateway/bucket_store_metrics_test.go new file mode 100644 index 0000000000000..2990c20a5c6a3 --- /dev/null +++ b/pkg/storegateway/bucket_store_metrics_test.go @@ -0,0 +1,510 @@ +package storegateway + +import ( + "bytes" + "fmt" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestBucketStoreMetrics(t *testing.T) { + mainReg := prometheus.NewPedanticRegistry() + + tsdbMetrics := NewBucketStoreMetrics() + mainReg.MustRegister(tsdbMetrics) + + tsdbMetrics.AddUserRegistry("user1", populateMockedBucketStoreMetrics(5328)) + tsdbMetrics.AddUserRegistry("user2", populateMockedBucketStoreMetrics(6908)) + tsdbMetrics.AddUserRegistry("user3", populateMockedBucketStoreMetrics(10283)) + + //noinspection ALL + err := testutil.GatherAndCompare(mainReg, bytes.NewBufferString(` + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 22519 + + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 45038 + + # HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts. + # TYPE cortex_bucket_store_block_load_failures_total counter + cortex_bucket_store_block_load_failures_total 67557 + + # HELP cortex_bucket_store_block_drops_total Total number of local blocks that were dropped. + # TYPE cortex_bucket_store_block_drops_total counter + cortex_bucket_store_block_drops_total 90076 + + # HELP cortex_bucket_store_block_drop_failures_total Total number of local blocks that failed to be dropped. + # TYPE cortex_bucket_store_block_drop_failures_total counter + cortex_bucket_store_block_drop_failures_total 112595 + + # HELP cortex_bucket_store_series_blocks_queried Number of blocks in a bucket store that were touched to satisfy a query. + # TYPE cortex_bucket_store_series_blocks_queried summary + cortex_bucket_store_series_blocks_queried_sum 1.283583e+06 + cortex_bucket_store_series_blocks_queried_count 9 + + # HELP cortex_bucket_store_series_data_fetched How many items of a data type in a block were fetched for a single series request. + # TYPE cortex_bucket_store_series_data_fetched summary + cortex_bucket_store_series_data_fetched_sum{data_type="fetched-a"} 202671 + cortex_bucket_store_series_data_fetched_count{data_type="fetched-a"} 3 + cortex_bucket_store_series_data_fetched_sum{data_type="fetched-b"} 225190 + cortex_bucket_store_series_data_fetched_count{data_type="fetched-b"} 3 + cortex_bucket_store_series_data_fetched_sum{data_type="fetched-c"} 247709 + cortex_bucket_store_series_data_fetched_count{data_type="fetched-c"} 3 + + # HELP cortex_bucket_store_series_data_size_fetched_bytes Size of all items of a data type in a block were fetched for a single series request. + # TYPE cortex_bucket_store_series_data_size_fetched_bytes summary + cortex_bucket_store_series_data_size_fetched_bytes_sum{data_type="size-fetched-a"} 337785 + cortex_bucket_store_series_data_size_fetched_bytes_count{data_type="size-fetched-a"} 3 + cortex_bucket_store_series_data_size_fetched_bytes_sum{data_type="size-fetched-b"} 360304 + cortex_bucket_store_series_data_size_fetched_bytes_count{data_type="size-fetched-b"} 3 + cortex_bucket_store_series_data_size_fetched_bytes_sum{data_type="size-fetched-c"} 382823 + cortex_bucket_store_series_data_size_fetched_bytes_count{data_type="size-fetched-c"} 3 + + # HELP cortex_bucket_store_series_data_size_touched_bytes Size of all items of a data type in a block were touched for a single series request. + # TYPE cortex_bucket_store_series_data_size_touched_bytes summary + cortex_bucket_store_series_data_size_touched_bytes_sum{data_type="size-touched-a"} 270228 + cortex_bucket_store_series_data_size_touched_bytes_count{data_type="size-touched-a"} 3 + cortex_bucket_store_series_data_size_touched_bytes_sum{data_type="size-touched-b"} 292747 + cortex_bucket_store_series_data_size_touched_bytes_count{data_type="size-touched-b"} 3 + cortex_bucket_store_series_data_size_touched_bytes_sum{data_type="size-touched-c"} 315266 + cortex_bucket_store_series_data_size_touched_bytes_count{data_type="size-touched-c"} 3 + + # HELP cortex_bucket_store_series_data_touched How many items of a data type in a block were touched for a single series request. + # TYPE cortex_bucket_store_series_data_touched summary + cortex_bucket_store_series_data_touched_sum{data_type="touched-a"} 135114 + cortex_bucket_store_series_data_touched_count{data_type="touched-a"} 3 + cortex_bucket_store_series_data_touched_sum{data_type="touched-b"} 157633 + cortex_bucket_store_series_data_touched_count{data_type="touched-b"} 3 + cortex_bucket_store_series_data_touched_sum{data_type="touched-c"} 180152 + cortex_bucket_store_series_data_touched_count{data_type="touched-c"} 3 + + # HELP cortex_bucket_store_series_get_all_duration_seconds Time it takes until all per-block prepares and preloads for a query are finished. + # TYPE cortex_bucket_store_series_get_all_duration_seconds histogram + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="0.001"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="0.3"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="0.6"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="1"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="3"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="6"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="9"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="20"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="30"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="60"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="90"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="120"} 0 + cortex_bucket_store_series_get_all_duration_seconds_bucket{le="+Inf"} 9 + cortex_bucket_store_series_get_all_duration_seconds_sum 1.486254e+06 + cortex_bucket_store_series_get_all_duration_seconds_count 9 + + # HELP cortex_bucket_store_series_merge_duration_seconds Time it takes to merge sub-results from all queried blocks into a single result. + # TYPE cortex_bucket_store_series_merge_duration_seconds histogram + cortex_bucket_store_series_merge_duration_seconds_bucket{le="0.001"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="0.3"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="0.6"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="1"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="3"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="6"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="9"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="20"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="30"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="60"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="90"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="120"} 0 + cortex_bucket_store_series_merge_duration_seconds_bucket{le="+Inf"} 9 + cortex_bucket_store_series_merge_duration_seconds_sum 1.688925e+06 + cortex_bucket_store_series_merge_duration_seconds_count 9 + + # HELP cortex_bucket_store_series_refetches_total Total number of cases where the built-in max series size was not enough to fetch series from index, resulting in refetch. + # TYPE cortex_bucket_store_series_refetches_total counter + cortex_bucket_store_series_refetches_total 743127 + + # HELP cortex_bucket_store_series_result_series Number of series observed in the final result of a query. + # TYPE cortex_bucket_store_series_result_series summary + cortex_bucket_store_series_result_series_sum 1.238545e+06 + cortex_bucket_store_series_result_series_count 6 + + # HELP cortex_bucket_store_queries_dropped_total Number of queries that were dropped due to the max chunks per query limit. + # TYPE cortex_bucket_store_queries_dropped_total counter + cortex_bucket_store_queries_dropped_total 698089 + + # HELP cortex_bucket_store_cached_postings_compressions_total Number of postings compressions and decompressions when storing to index cache. + # TYPE cortex_bucket_store_cached_postings_compressions_total counter + cortex_bucket_store_cached_postings_compressions_total{op="encode"} 1125950 + cortex_bucket_store_cached_postings_compressions_total{op="decode"} 1148469 + + # HELP cortex_bucket_store_cached_postings_compression_errors_total Number of postings compression and decompression errors. + # TYPE cortex_bucket_store_cached_postings_compression_errors_total counter + cortex_bucket_store_cached_postings_compression_errors_total{op="encode"} 1170988 + cortex_bucket_store_cached_postings_compression_errors_total{op="decode"} 1193507 + + # HELP cortex_bucket_store_cached_postings_compression_time_seconds Time spent compressing and decompressing postings when storing to / reading from postings cache. + # TYPE cortex_bucket_store_cached_postings_compression_time_seconds counter + cortex_bucket_store_cached_postings_compression_time_seconds{op="encode"} 1216026 + cortex_bucket_store_cached_postings_compression_time_seconds{op="decode"} 1238545 + + # HELP cortex_bucket_store_cached_postings_original_size_bytes_total Original size of postings stored into cache. + # TYPE cortex_bucket_store_cached_postings_original_size_bytes_total counter + cortex_bucket_store_cached_postings_original_size_bytes_total 1261064 + + # HELP cortex_bucket_store_cached_postings_compressed_size_bytes_total Compressed size of postings stored into cache. + # TYPE cortex_bucket_store_cached_postings_compressed_size_bytes_total counter + cortex_bucket_store_cached_postings_compressed_size_bytes_total 1283583 + + # HELP cortex_bucket_store_cached_series_fetch_duration_seconds Time it takes to fetch series to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses. + # TYPE cortex_bucket_store_cached_series_fetch_duration_seconds histogram + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="0.001"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="0.3"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="0.6"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="1"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="3"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="6"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="9"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="20"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="30"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="60"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="90"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="120"} 0 + cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_cached_series_fetch_duration_seconds_sum 1.306102e+06 + cortex_bucket_store_cached_series_fetch_duration_seconds_count 3 + + # HELP cortex_bucket_store_cached_postings_fetch_duration_seconds Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses. + # TYPE cortex_bucket_store_cached_postings_fetch_duration_seconds histogram + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.001"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.3"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.6"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="1"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="3"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="6"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="9"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="20"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="30"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="60"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="90"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="120"} 0 + cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_cached_postings_fetch_duration_seconds_sum 1.328621e+06 + cortex_bucket_store_cached_postings_fetch_duration_seconds_count 3 + + # HELP cortex_bucket_store_indexheader_lazy_load_duration_seconds Duration of the index-header lazy loading in seconds. + # TYPE cortex_bucket_store_indexheader_lazy_load_duration_seconds histogram + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="0.02"} 0 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="0.05"} 0 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="0.2"} 0 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="0.5"} 0 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="1"} 3 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="2"} 3 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="5"} 3 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum 1.9500000000000002 + cortex_bucket_store_indexheader_lazy_load_duration_seconds_count 3 + + # HELP cortex_bucket_store_indexheader_lazy_load_failed_total Total number of failed index-header lazy load operations. + # TYPE cortex_bucket_store_indexheader_lazy_load_failed_total counter + cortex_bucket_store_indexheader_lazy_load_failed_total 1.373659e+06 + + # HELP cortex_bucket_store_indexheader_lazy_load_total Total number of index-header lazy load operations. + # TYPE cortex_bucket_store_indexheader_lazy_load_total counter + cortex_bucket_store_indexheader_lazy_load_total 1.35114e+06 + + # HELP cortex_bucket_store_indexheader_lazy_unload_failed_total Total number of failed index-header lazy unload operations. + # TYPE cortex_bucket_store_indexheader_lazy_unload_failed_total counter + cortex_bucket_store_indexheader_lazy_unload_failed_total 1.418697e+06 + + # HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations. + # TYPE cortex_bucket_store_indexheader_lazy_unload_total counter + cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06 +`)) + require.NoError(t, err) +} + +func BenchmarkMetricsCollections10(b *testing.B) { + benchmarkMetricsCollection(b, 10) +} + +func BenchmarkMetricsCollections100(b *testing.B) { + benchmarkMetricsCollection(b, 100) +} + +func BenchmarkMetricsCollections1000(b *testing.B) { + benchmarkMetricsCollection(b, 1000) +} + +func BenchmarkMetricsCollections10000(b *testing.B) { + benchmarkMetricsCollection(b, 10000) +} + +func benchmarkMetricsCollection(b *testing.B, users int) { + mainReg := prometheus.NewRegistry() + + tsdbMetrics := NewBucketStoreMetrics() + mainReg.MustRegister(tsdbMetrics) + + base := 123456.0 + for i := 0; i < users; i++ { + tsdbMetrics.AddUserRegistry(fmt.Sprintf("user-%d", i), populateMockedBucketStoreMetrics(base*float64(i))) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = mainReg.Gather() + } +} + +func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry { + reg := prometheus.NewRegistry() + m := newMockedBucketStoreMetrics(reg) + + m.blocksLoaded.Add(1 * base) + m.blockLoads.Add(2 * base) + m.blockLoadFailures.Add(3 * base) + m.blockDrops.Add(4 * base) + m.blockDropFailures.Add(5 * base) + m.seriesDataTouched.WithLabelValues("touched-a").Observe(6 * base) + m.seriesDataTouched.WithLabelValues("touched-b").Observe(7 * base) + m.seriesDataTouched.WithLabelValues("touched-c").Observe(8 * base) + + m.seriesDataFetched.WithLabelValues("fetched-a").Observe(9 * base) + m.seriesDataFetched.WithLabelValues("fetched-b").Observe(10 * base) + m.seriesDataFetched.WithLabelValues("fetched-c").Observe(11 * base) + + m.seriesDataSizeTouched.WithLabelValues("size-touched-a").Observe(12 * base) + m.seriesDataSizeTouched.WithLabelValues("size-touched-b").Observe(13 * base) + m.seriesDataSizeTouched.WithLabelValues("size-touched-c").Observe(14 * base) + + m.seriesDataSizeFetched.WithLabelValues("size-fetched-a").Observe(15 * base) + m.seriesDataSizeFetched.WithLabelValues("size-fetched-b").Observe(16 * base) + m.seriesDataSizeFetched.WithLabelValues("size-fetched-c").Observe(17 * base) + + m.seriesBlocksQueried.Observe(18 * base) + m.seriesBlocksQueried.Observe(19 * base) + m.seriesBlocksQueried.Observe(20 * base) + + m.seriesGetAllDuration.Observe(21 * base) + m.seriesGetAllDuration.Observe(22 * base) + m.seriesGetAllDuration.Observe(23 * base) + + m.seriesMergeDuration.Observe(24 * base) + m.seriesMergeDuration.Observe(25 * base) + m.seriesMergeDuration.Observe(26 * base) + + m.resultSeriesCount.Observe(27 * base) + m.resultSeriesCount.Observe(28 * base) + + m.chunkSizeBytes.Observe(29 * base) + m.chunkSizeBytes.Observe(30 * base) + + m.queriesDropped.WithLabelValues("chunks").Add(31 * base) + m.queriesDropped.WithLabelValues("series").Add(0) + + m.seriesRefetches.Add(33 * base) + + m.cachedPostingsCompressions.WithLabelValues("encode").Add(50 * base) + m.cachedPostingsCompressions.WithLabelValues("decode").Add(51 * base) + + m.cachedPostingsCompressionErrors.WithLabelValues("encode").Add(52 * base) + m.cachedPostingsCompressionErrors.WithLabelValues("decode").Add(53 * base) + + m.cachedPostingsCompressionTimeSeconds.WithLabelValues("encode").Add(54 * base) + m.cachedPostingsCompressionTimeSeconds.WithLabelValues("decode").Add(55 * base) + + m.cachedPostingsOriginalSizeBytes.Add(56 * base) + m.cachedPostingsCompressedSizeBytes.Add(57 * base) + + m.seriesFetchDuration.Observe(58 * base) + m.postingsFetchDuration.Observe(59 * base) + + m.indexHeaderLazyLoadCount.Add(60 * base) + m.indexHeaderLazyLoadFailedCount.Add(61 * base) + m.indexHeaderLazyUnloadCount.Add(62 * base) + m.indexHeaderLazyUnloadFailedCount.Add(63 * base) + m.indexHeaderLazyLoadDuration.Observe(0.65) + + return reg +} + +// copied from Thanos, pkg/store/bucket.go +type mockedBucketStoreMetrics struct { + blocksLoaded prometheus.Gauge + blockLoads prometheus.Counter + blockLoadFailures prometheus.Counter + blockDrops prometheus.Counter + blockDropFailures prometheus.Counter + seriesDataTouched *prometheus.SummaryVec + seriesDataFetched *prometheus.SummaryVec + seriesDataSizeTouched *prometheus.SummaryVec + seriesDataSizeFetched *prometheus.SummaryVec + seriesBlocksQueried prometheus.Summary + seriesGetAllDuration prometheus.Histogram + seriesMergeDuration prometheus.Histogram + seriesRefetches prometheus.Counter + resultSeriesCount prometheus.Summary + chunkSizeBytes prometheus.Histogram + queriesDropped *prometheus.CounterVec + + cachedPostingsCompressions *prometheus.CounterVec + cachedPostingsCompressionErrors *prometheus.CounterVec + cachedPostingsCompressionTimeSeconds *prometheus.CounterVec + cachedPostingsOriginalSizeBytes prometheus.Counter + cachedPostingsCompressedSizeBytes prometheus.Counter + + seriesFetchDuration prometheus.Histogram + postingsFetchDuration prometheus.Histogram + + indexHeaderLazyLoadCount prometheus.Counter + indexHeaderLazyLoadFailedCount prometheus.Counter + indexHeaderLazyUnloadCount prometheus.Counter + indexHeaderLazyUnloadFailedCount prometheus.Counter + indexHeaderLazyLoadDuration prometheus.Histogram +} + +func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMetrics { + var m mockedBucketStoreMetrics + + m.blockLoads = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_block_loads_total", + Help: "Total number of remote block loading attempts.", + }) + m.blockLoadFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_block_load_failures_total", + Help: "Total number of failed remote block loading attempts.", + }) + m.blockDrops = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_block_drops_total", + Help: "Total number of local blocks that were dropped.", + }) + m.blockDropFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_block_drop_failures_total", + Help: "Total number of local blocks that failed to be dropped.", + }) + m.blocksLoaded = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + Name: "thanos_bucket_store_blocks_loaded", + Help: "Number of currently loaded blocks.", + }) + + m.seriesDataTouched = promauto.With(reg).NewSummaryVec(prometheus.SummaryOpts{ + Name: "thanos_bucket_store_series_data_touched", + Help: "How many items of a data type in a block were touched for a single series request.", + }, []string{"data_type"}) + m.seriesDataFetched = promauto.With(reg).NewSummaryVec(prometheus.SummaryOpts{ + Name: "thanos_bucket_store_series_data_fetched", + Help: "How many items of a data type in a block were fetched for a single series request.", + }, []string{"data_type"}) + + m.seriesDataSizeTouched = promauto.With(reg).NewSummaryVec(prometheus.SummaryOpts{ + Name: "thanos_bucket_store_series_data_size_touched_bytes", + Help: "Size of all items of a data type in a block were touched for a single series request.", + }, []string{"data_type"}) + m.seriesDataSizeFetched = promauto.With(reg).NewSummaryVec(prometheus.SummaryOpts{ + Name: "thanos_bucket_store_series_data_size_fetched_bytes", + Help: "Size of all items of a data type in a block were fetched for a single series request.", + }, []string{"data_type"}) + + m.seriesBlocksQueried = promauto.With(reg).NewSummary(prometheus.SummaryOpts{ + Name: "thanos_bucket_store_series_blocks_queried", + Help: "Number of blocks in a bucket store that were touched to satisfy a query.", + }) + m.seriesGetAllDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_series_get_all_duration_seconds", + Help: "Time it takes until all per-block prepares and preloads for a query are finished.", + Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}, + }) + m.seriesMergeDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_series_merge_duration_seconds", + Help: "Time it takes to merge sub-results from all queried blocks into a single result.", + Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}, + }) + m.resultSeriesCount = promauto.With(reg).NewSummary(prometheus.SummaryOpts{ + Name: "thanos_bucket_store_series_result_series", + Help: "Number of series observed in the final result of a query.", + }) + + m.chunkSizeBytes = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_sent_chunk_size_bytes", + Help: "Size in bytes of the chunks for the single series, which is adequate to the gRPC message size sent to querier.", + Buckets: []float64{ + 32, 256, 512, 1024, 32 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 32 * 1024 * 1024, 256 * 1024 * 1024, 512 * 1024 * 1024, + }, + }) + + m.queriesDropped = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_bucket_store_queries_dropped_total", + Help: "Number of queries that were dropped due to the limit.", + }, []string{"reason"}) + m.seriesRefetches = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_series_refetches_total", + Help: fmt.Sprintf("Total number of cases where %v bytes was not enough was to fetch series from index, resulting in refetch.", 64*1024), + }) + + m.cachedPostingsCompressions = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_bucket_store_cached_postings_compressions_total", + Help: "Number of postings compressions before storing to index cache.", + }, []string{"op"}) + m.cachedPostingsCompressionErrors = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_bucket_store_cached_postings_compression_errors_total", + Help: "Number of postings compression errors.", + }, []string{"op"}) + m.cachedPostingsCompressionTimeSeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_bucket_store_cached_postings_compression_time_seconds_total", + Help: "Time spent compressing postings before storing them into postings cache.", + }, []string{"op"}) + m.cachedPostingsOriginalSizeBytes = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_cached_postings_original_size_bytes_total", + Help: "Original size of postings stored into cache.", + }) + m.cachedPostingsCompressedSizeBytes = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_cached_postings_compressed_size_bytes_total", + Help: "Compressed size of postings stored into cache.", + }) + + m.seriesFetchDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_cached_series_fetch_duration_seconds", + Help: "Time it takes to fetch series from a bucket to respond a query. It also includes the time it takes to cache fetch and store operations.", + Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}, + }) + m.postingsFetchDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_cached_postings_fetch_duration_seconds", + Help: "Time it takes to fetch postings from a bucket to respond a query. It also includes the time it takes to cache fetch and store operations.", + Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}, + }) + + m.indexHeaderLazyLoadCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_indexheader_lazy_load_total", + Help: "Total number of index-header lazy load operations.", + }) + m.indexHeaderLazyLoadFailedCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_indexheader_lazy_load_failed_total", + Help: "Total number of failed index-header lazy load operations.", + }) + m.indexHeaderLazyUnloadCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_indexheader_lazy_unload_total", + Help: "Total number of index-header lazy unload operations.", + }) + m.indexHeaderLazyUnloadFailedCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_bucket_store_indexheader_lazy_unload_failed_total", + Help: "Total number of failed index-header lazy unload operations.", + }) + m.indexHeaderLazyLoadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_indexheader_lazy_load_duration_seconds", + Help: "Duration of the index-header lazy loading in seconds.", + Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, + }) + + return &m +} diff --git a/pkg/storegateway/bucket_stores_test.go b/pkg/storegateway/bucket_stores_test.go new file mode 100644 index 0000000000000..482b052bebe4a --- /dev/null +++ b/pkg/storegateway/bucket_stores_test.go @@ -0,0 +1,616 @@ +package storegateway + +import ( + "context" + "errors" + "fmt" + "io" + "io/ioutil" + "math" + "os" + "path/filepath" + "sort" + "strings" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/flagext" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + thanos_metadata "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/extprom" + "github.com/thanos-io/thanos/pkg/objstore" + "github.com/thanos-io/thanos/pkg/store" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb" + "github.com/weaveworks/common/logging" + "go.uber.org/atomic" + "google.golang.org/grpc/metadata" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + "github.com/cortexproject/cortex/pkg/storage/bucket/filesystem" + cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/util" +) + +func TestBucketStores_InitialSync(t *testing.T) { + userToMetric := map[string]string{ + "user-1": "series_1", + "user-2": "series_2", + } + + ctx := context.Background() + cfg, cleanup := prepareStorageConfig(t) + defer cleanup() + + storageDir, err := ioutil.TempDir(os.TempDir(), "storage-*") + require.NoError(t, err) + + for userID, metricName := range userToMetric { + generateStorageBlock(t, storageDir, userID, metricName, 10, 100, 15) + } + + bucket, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + reg := prometheus.NewPedanticRegistry() + stores, err := NewBucketStores(cfg, NewNoShardingStrategy(), bucket, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + + // Query series before the initial sync. + for userID, metricName := range userToMetric { + seriesSet, warnings, err := querySeries(stores, userID, metricName, 20, 40) + require.NoError(t, err) + assert.Empty(t, warnings) + assert.Empty(t, seriesSet) + } + + require.NoError(t, stores.InitialSync(ctx)) + + // Query series after the initial sync. + for userID, metricName := range userToMetric { + seriesSet, warnings, err := querySeries(stores, userID, metricName, 20, 40) + require.NoError(t, err) + assert.Empty(t, warnings) + require.Len(t, seriesSet, 1) + assert.Equal(t, []labelpb.ZLabel{{Name: labels.MetricName, Value: metricName}}, seriesSet[0].Labels) + } + + // Query series of another user. + seriesSet, warnings, err := querySeries(stores, "user-1", "series_2", 20, 40) + require.NoError(t, err) + assert.Empty(t, warnings) + assert.Empty(t, seriesSet) + + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 2 + + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 2 + + # HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts. + # TYPE cortex_bucket_store_block_load_failures_total counter + cortex_bucket_store_block_load_failures_total 0 + + # HELP cortex_bucket_stores_gate_queries_concurrent_max Number of maximum concurrent queries allowed. + # TYPE cortex_bucket_stores_gate_queries_concurrent_max gauge + cortex_bucket_stores_gate_queries_concurrent_max 100 + + # HELP cortex_bucket_stores_gate_queries_in_flight Number of queries that are currently in flight. + # TYPE cortex_bucket_stores_gate_queries_in_flight gauge + cortex_bucket_stores_gate_queries_in_flight 0 + `), + "cortex_bucket_store_blocks_loaded", + "cortex_bucket_store_block_loads_total", + "cortex_bucket_store_block_load_failures_total", + "cortex_bucket_stores_gate_queries_concurrent_max", + "cortex_bucket_stores_gate_queries_in_flight", + )) + + assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0)) +} + +func TestBucketStores_InitialSyncShouldRetryOnFailure(t *testing.T) { + ctx := context.Background() + cfg, cleanup := prepareStorageConfig(t) + defer cleanup() + + storageDir, err := ioutil.TempDir(os.TempDir(), "storage-*") + require.NoError(t, err) + + // Generate a block for the user in the storage. + generateStorageBlock(t, storageDir, "user-1", "series_1", 10, 100, 15) + + bucket, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + // Wrap the bucket to fail the 1st Get() request. + bucket = &failFirstGetBucket{Bucket: bucket} + + reg := prometheus.NewPedanticRegistry() + stores, err := NewBucketStores(cfg, NewNoShardingStrategy(), bucket, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + + // Initial sync should succeed even if a transient error occurs. + require.NoError(t, stores.InitialSync(ctx)) + + // Query series after the initial sync. + seriesSet, warnings, err := querySeries(stores, "user-1", "series_1", 20, 40) + require.NoError(t, err) + assert.Empty(t, warnings) + require.Len(t, seriesSet, 1) + assert.Equal(t, []labelpb.ZLabel{{Name: labels.MetricName, Value: "series_1"}}, seriesSet[0].Labels) + + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_blocks_meta_syncs_total Total blocks metadata synchronization attempts + # TYPE cortex_blocks_meta_syncs_total counter + cortex_blocks_meta_syncs_total 2 + + # HELP cortex_blocks_meta_sync_failures_total Total blocks metadata synchronization failures + # TYPE cortex_blocks_meta_sync_failures_total counter + cortex_blocks_meta_sync_failures_total 1 + + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 1 + + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 1 + + # HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts. + # TYPE cortex_bucket_store_block_load_failures_total counter + cortex_bucket_store_block_load_failures_total 0 + `), + "cortex_blocks_meta_syncs_total", + "cortex_blocks_meta_sync_failures_total", + "cortex_bucket_store_block_loads_total", + "cortex_bucket_store_block_load_failures_total", + "cortex_bucket_store_blocks_loaded", + )) + + assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0)) +} + +func TestBucketStores_SyncBlocks(t *testing.T) { + const ( + userID = "user-1" + metricName = "series_1" + ) + + ctx := context.Background() + cfg, cleanup := prepareStorageConfig(t) + defer cleanup() + + storageDir, err := ioutil.TempDir(os.TempDir(), "storage-*") + require.NoError(t, err) + + bucket, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + reg := prometheus.NewPedanticRegistry() + stores, err := NewBucketStores(cfg, NewNoShardingStrategy(), bucket, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + + // Run an initial sync to discover 1 block. + generateStorageBlock(t, storageDir, userID, metricName, 10, 100, 15) + require.NoError(t, stores.InitialSync(ctx)) + + // Query a range for which we have no samples. + seriesSet, warnings, err := querySeries(stores, userID, metricName, 150, 180) + require.NoError(t, err) + assert.Empty(t, warnings) + assert.Empty(t, seriesSet) + + // Generate another block and sync blocks again. + generateStorageBlock(t, storageDir, userID, metricName, 100, 200, 15) + require.NoError(t, stores.SyncBlocks(ctx)) + + seriesSet, warnings, err = querySeries(stores, userID, metricName, 150, 180) + require.NoError(t, err) + assert.Empty(t, warnings) + assert.Len(t, seriesSet, 1) + assert.Equal(t, []labelpb.ZLabel{{Name: labels.MetricName, Value: metricName}}, seriesSet[0].Labels) + + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 2 + + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 2 + + # HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts. + # TYPE cortex_bucket_store_block_load_failures_total counter + cortex_bucket_store_block_load_failures_total 0 + + # HELP cortex_bucket_stores_gate_queries_concurrent_max Number of maximum concurrent queries allowed. + # TYPE cortex_bucket_stores_gate_queries_concurrent_max gauge + cortex_bucket_stores_gate_queries_concurrent_max 100 + + # HELP cortex_bucket_stores_gate_queries_in_flight Number of queries that are currently in flight. + # TYPE cortex_bucket_stores_gate_queries_in_flight gauge + cortex_bucket_stores_gate_queries_in_flight 0 + `), + "cortex_bucket_store_blocks_loaded", + "cortex_bucket_store_block_loads_total", + "cortex_bucket_store_block_load_failures_total", + "cortex_bucket_stores_gate_queries_concurrent_max", + "cortex_bucket_stores_gate_queries_in_flight", + )) + + assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0)) +} + +func TestBucketStores_syncUsersBlocks(t *testing.T) { + allUsers := []string{"user-1", "user-2", "user-3"} + + tests := map[string]struct { + shardingStrategy ShardingStrategy + expectedStores int32 + }{ + "when sharding is disabled all users should be synced": { + shardingStrategy: NewNoShardingStrategy(), + expectedStores: 3, + }, + "when sharding is enabled only stores for filtered users should be created": { + shardingStrategy: func() ShardingStrategy { + s := &mockShardingStrategy{} + s.On("FilterUsers", mock.Anything, allUsers).Return([]string{"user-1", "user-2"}) + return s + }(), + expectedStores: 2, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + cfg, cleanup := prepareStorageConfig(t) + cfg.BucketStore.TenantSyncConcurrency = 2 + defer cleanup() + + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", allUsers, nil) + + stores, err := NewBucketStores(cfg, testData.shardingStrategy, bucketClient, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), nil) + require.NoError(t, err) + + // Sync user stores and count the number of times the callback is called. + var storesCount atomic.Int32 + err = stores.syncUsersBlocks(context.Background(), func(ctx context.Context, bs *store.BucketStore) error { + storesCount.Inc() + return nil + }) + + assert.NoError(t, err) + bucketClient.AssertNumberOfCalls(t, "Iter", 1) + assert.Equal(t, storesCount.Load(), testData.expectedStores) + }) + } +} + +func TestBucketStores_Series_ShouldCorrectlyQuerySeriesSpanningMultipleChunks(t *testing.T) { + for _, lazyLoadingEnabled := range []bool{true, false} { + t.Run(fmt.Sprintf("lazy loading enabled = %v", lazyLoadingEnabled), func(t *testing.T) { + testBucketStoresSeriesShouldCorrectlyQuerySeriesSpanningMultipleChunks(t, lazyLoadingEnabled) + }) + } +} + +func testBucketStoresSeriesShouldCorrectlyQuerySeriesSpanningMultipleChunks(t *testing.T, lazyLoadingEnabled bool) { + const ( + userID = "user-1" + metricName = "series_1" + ) + + ctx := context.Background() + cfg, cleanup := prepareStorageConfig(t) + cfg.BucketStore.IndexHeaderLazyLoadingEnabled = lazyLoadingEnabled + cfg.BucketStore.IndexHeaderLazyLoadingIdleTimeout = time.Minute + defer cleanup() + + storageDir, err := ioutil.TempDir(os.TempDir(), "storage-*") + require.NoError(t, err) + + // Generate a single block with 1 series and a lot of samples. + generateStorageBlock(t, storageDir, userID, metricName, 0, 10000, 1) + + bucket, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + reg := prometheus.NewPedanticRegistry() + stores, err := NewBucketStores(cfg, NewNoShardingStrategy(), bucket, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + require.NoError(t, stores.InitialSync(ctx)) + + tests := map[string]struct { + reqMinTime int64 + reqMaxTime int64 + expectedSamples int + }{ + "query the entire block": { + reqMinTime: math.MinInt64, + reqMaxTime: math.MaxInt64, + expectedSamples: 10000, + }, + "query the beginning of the block": { + reqMinTime: 0, + reqMaxTime: 100, + expectedSamples: store.MaxSamplesPerChunk, + }, + "query the middle of the block": { + reqMinTime: 4000, + reqMaxTime: 4050, + expectedSamples: store.MaxSamplesPerChunk, + }, + "query the end of the block": { + reqMinTime: 9800, + reqMaxTime: 10000, + expectedSamples: (store.MaxSamplesPerChunk * 2) + (10000 % store.MaxSamplesPerChunk), + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Query a range for which we have no samples. + seriesSet, warnings, err := querySeries(stores, userID, metricName, testData.reqMinTime, testData.reqMaxTime) + require.NoError(t, err) + assert.Empty(t, warnings) + assert.Len(t, seriesSet, 1) + + // Count returned samples. + samples, err := readSamplesFromChunks(seriesSet[0].Chunks) + require.NoError(t, err) + assert.Equal(t, testData.expectedSamples, len(samples)) + }) + } +} + +func prepareStorageConfig(t *testing.T) (cortex_tsdb.BlocksStorageConfig, func()) { + tmpDir, err := ioutil.TempDir(os.TempDir(), "blocks-sync-*") + require.NoError(t, err) + + cfg := cortex_tsdb.BlocksStorageConfig{} + flagext.DefaultValues(&cfg) + cfg.BucketStore.SyncDir = tmpDir + + cleanup := func() { + require.NoError(t, os.RemoveAll(tmpDir)) + } + + return cfg, cleanup +} + +func generateStorageBlock(t *testing.T, storageDir, userID string, metricName string, minT, maxT int64, step int) { + // Create a directory for the user (if doesn't already exist). + userDir := filepath.Join(storageDir, userID) + if _, err := os.Stat(userDir); err != nil { + require.NoError(t, os.Mkdir(userDir, os.ModePerm)) + } + + // Create a temporary directory where the TSDB is opened, + // then it will be snapshotted to the storage directory. + tmpDir, err := ioutil.TempDir(os.TempDir(), "tsdb-*") + require.NoError(t, err) + defer func() { + require.NoError(t, os.RemoveAll(tmpDir)) + }() + + db, err := tsdb.Open(tmpDir, log.NewNopLogger(), nil, tsdb.DefaultOptions(), nil) + require.NoError(t, err) + defer func() { + require.NoError(t, db.Close()) + }() + + series := labels.Labels{labels.Label{Name: labels.MetricName, Value: metricName}} + + app := db.Appender(context.Background()) + for ts := minT; ts < maxT; ts += int64(step) { + _, err = app.Append(0, series, ts, 1) + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + + // Snapshot TSDB to the storage directory. + require.NoError(t, db.Snapshot(userDir, true)) +} + +func querySeries(stores *BucketStores, userID, metricName string, minT, maxT int64) ([]*storepb.Series, storage.Warnings, error) { + req := &storepb.SeriesRequest{ + MinTime: minT, + MaxTime: maxT, + Matchers: []storepb.LabelMatcher{{ + Type: storepb.LabelMatcher_EQ, + Name: labels.MetricName, + Value: metricName, + }}, + PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT, + } + + ctx := setUserIDToGRPCContext(context.Background(), userID) + srv := newBucketStoreSeriesServer(ctx) + err := stores.Series(req, srv) + + return srv.SeriesSet, srv.Warnings, err +} + +func mockLoggingLevel() logging.Level { + level := logging.Level{} + err := level.Set("info") + if err != nil { + panic(err) + } + + return level +} + +func setUserIDToGRPCContext(ctx context.Context, userID string) context.Context { + // We have to store it in the incoming metadata because we have to emulate the + // case it's coming from a gRPC request, while here we're running everything in-memory. + return metadata.NewIncomingContext(ctx, metadata.Pairs(cortex_tsdb.TenantIDExternalLabel, userID)) +} + +func TestBucketStores_deleteLocalFilesForExcludedTenants(t *testing.T) { + const ( + user1 = "user-1" + user2 = "user-2" + ) + + userToMetric := map[string]string{ + user1: "series_1", + user2: "series_2", + } + + ctx := context.Background() + cfg, cleanup := prepareStorageConfig(t) + defer cleanup() + + storageDir, err := ioutil.TempDir(os.TempDir(), "storage-*") + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(storageDir)) + }) + + for userID, metricName := range userToMetric { + generateStorageBlock(t, storageDir, userID, metricName, 10, 100, 15) + } + + bucket, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + sharding := userShardingStrategy{} + + reg := prometheus.NewPedanticRegistry() + stores, err := NewBucketStores(cfg, &sharding, bucket, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + + // Perform sync. + sharding.users = []string{user1, user2} + require.NoError(t, stores.InitialSync(ctx)) + require.Equal(t, []string{user1, user2}, getUsersInDir(t, cfg.BucketStore.SyncDir)) + + metricNames := []string{"cortex_bucket_store_block_drops_total", "cortex_bucket_store_block_loads_total", "cortex_bucket_store_blocks_loaded"} + + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_store_block_drops_total Total number of local blocks that were dropped. + # TYPE cortex_bucket_store_block_drops_total counter + cortex_bucket_store_block_drops_total 0 + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 2 + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 2 + `), metricNames...)) + + // Single user left in shard. + sharding.users = []string{user1} + require.NoError(t, stores.SyncBlocks(ctx)) + require.Equal(t, []string{user1}, getUsersInDir(t, cfg.BucketStore.SyncDir)) + + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_store_block_drops_total Total number of local blocks that were dropped. + # TYPE cortex_bucket_store_block_drops_total counter + cortex_bucket_store_block_drops_total 1 + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 2 + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 1 + `), metricNames...)) + + // No users left in this shard. + sharding.users = nil + require.NoError(t, stores.SyncBlocks(ctx)) + require.Equal(t, []string(nil), getUsersInDir(t, cfg.BucketStore.SyncDir)) + + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_store_block_drops_total Total number of local blocks that were dropped. + # TYPE cortex_bucket_store_block_drops_total counter + cortex_bucket_store_block_drops_total 2 + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 2 + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 0 + `), metricNames...)) + + // We can always get user back. + sharding.users = []string{user1} + require.NoError(t, stores.SyncBlocks(ctx)) + require.Equal(t, []string{user1}, getUsersInDir(t, cfg.BucketStore.SyncDir)) + + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` + # HELP cortex_bucket_store_block_drops_total Total number of local blocks that were dropped. + # TYPE cortex_bucket_store_block_drops_total counter + cortex_bucket_store_block_drops_total 2 + # HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts. + # TYPE cortex_bucket_store_block_loads_total counter + cortex_bucket_store_block_loads_total 3 + # HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks. + # TYPE cortex_bucket_store_blocks_loaded gauge + cortex_bucket_store_blocks_loaded 1 + `), metricNames...)) +} + +func getUsersInDir(t *testing.T, dir string) []string { + fs, err := ioutil.ReadDir(dir) + require.NoError(t, err) + + var result []string + for _, fi := range fs { + if fi.IsDir() { + result = append(result, fi.Name()) + } + } + sort.Strings(result) + return result +} + +type userShardingStrategy struct { + users []string +} + +func (u *userShardingStrategy) FilterUsers(ctx context.Context, userIDs []string) []string { + return u.users +} + +func (u *userShardingStrategy) FilterBlocks(ctx context.Context, userID string, metas map[ulid.ULID]*thanos_metadata.Meta, loaded map[ulid.ULID]struct{}, synced *extprom.TxGaugeVec) error { + if util.StringsContain(u.users, userID) { + return nil + } + + for k := range metas { + delete(metas, k) + } + return nil +} + +// failFirstGetBucket is an objstore.Bucket wrapper which fails the first Get() request with a mocked error. +type failFirstGetBucket struct { + objstore.Bucket + + firstGet atomic.Bool +} + +func (f *failFirstGetBucket) Get(ctx context.Context, name string) (io.ReadCloser, error) { + if f.firstGet.CAS(false, true) { + return nil, errors.New("Get() request mocked error") + } + + return f.Bucket.Get(ctx, name) +} diff --git a/pkg/storegateway/chunk_bytes_pool_test.go b/pkg/storegateway/chunk_bytes_pool_test.go new file mode 100644 index 0000000000000..182bb04c0f2ea --- /dev/null +++ b/pkg/storegateway/chunk_bytes_pool_test.go @@ -0,0 +1,37 @@ +package storegateway + +import ( + "bytes" + "fmt" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/store" + + cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +func TestChunkBytesPool_Get(t *testing.T) { + reg := prometheus.NewPedanticRegistry() + p, err := newChunkBytesPool(cortex_tsdb.ChunkPoolDefaultMinBucketSize, cortex_tsdb.ChunkPoolDefaultMaxBucketSize, 0, reg) + require.NoError(t, err) + + _, err = p.Get(store.EstimatedMaxChunkSize - 1) + require.NoError(t, err) + + _, err = p.Get(store.EstimatedMaxChunkSize + 1) + require.NoError(t, err) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(fmt.Sprintf(` + # HELP cortex_bucket_store_chunk_pool_requested_bytes_total Total bytes requested to chunk bytes pool. + # TYPE cortex_bucket_store_chunk_pool_requested_bytes_total counter + cortex_bucket_store_chunk_pool_requested_bytes_total %d + + # HELP cortex_bucket_store_chunk_pool_returned_bytes_total Total bytes returned by the chunk bytes pool. + # TYPE cortex_bucket_store_chunk_pool_returned_bytes_total counter + cortex_bucket_store_chunk_pool_returned_bytes_total %d + `, store.EstimatedMaxChunkSize*2, store.EstimatedMaxChunkSize*3)))) +} diff --git a/pkg/storegateway/gateway_ring_test.go b/pkg/storegateway/gateway_ring_test.go new file mode 100644 index 0000000000000..b621a566adf7c --- /dev/null +++ b/pkg/storegateway/gateway_ring_test.go @@ -0,0 +1,72 @@ +package storegateway + +import ( + "testing" + "time" + + "github.com/grafana/dskit/ring" + "github.com/stretchr/testify/assert" +) + +func TestIsHealthyForStoreGatewayOperations(t *testing.T) { + t.Parallel() + + tests := map[string]struct { + instance *ring.InstanceDesc + timeout time.Duration + ownerSyncExpected bool + ownerReadExpected bool + readExpected bool + }{ + "ACTIVE instance with last keepalive newer than timeout": { + instance: &ring.InstanceDesc{State: ring.ACTIVE, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + ownerSyncExpected: true, + ownerReadExpected: true, + readExpected: true, + }, + "ACTIVE instance with last keepalive older than timeout": { + instance: &ring.InstanceDesc{State: ring.ACTIVE, Timestamp: time.Now().Add(-90 * time.Second).Unix()}, + timeout: time.Minute, + ownerSyncExpected: false, + ownerReadExpected: false, + readExpected: false, + }, + "JOINING instance with last keepalive newer than timeout": { + instance: &ring.InstanceDesc{State: ring.JOINING, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + ownerSyncExpected: true, + ownerReadExpected: false, + readExpected: false, + }, + "LEAVING instance with last keepalive newer than timeout": { + instance: &ring.InstanceDesc{State: ring.LEAVING, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + ownerSyncExpected: true, + ownerReadExpected: false, + readExpected: false, + }, + "PENDING instance with last keepalive newer than timeout": { + instance: &ring.InstanceDesc{State: ring.PENDING, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + ownerSyncExpected: false, + ownerReadExpected: false, + readExpected: false, + }, + } + + for testName, testData := range tests { + testData := testData + + t.Run(testName, func(t *testing.T) { + actual := testData.instance.IsHealthy(BlocksOwnerSync, testData.timeout, time.Now()) + assert.Equal(t, testData.ownerSyncExpected, actual) + + actual = testData.instance.IsHealthy(BlocksOwnerRead, testData.timeout, time.Now()) + assert.Equal(t, testData.ownerReadExpected, actual) + + actual = testData.instance.IsHealthy(BlocksRead, testData.timeout, time.Now()) + assert.Equal(t, testData.readExpected, actual) + }) + } +} diff --git a/pkg/storegateway/gateway_test.go b/pkg/storegateway/gateway_test.go new file mode 100644 index 0000000000000..ab8a982692bb0 --- /dev/null +++ b/pkg/storegateway/gateway_test.go @@ -0,0 +1,1132 @@ +package storegateway + +import ( + "context" + "fmt" + "io/ioutil" + "math" + "math/rand" + "net/http" + "os" + "path" + "path/filepath" + "sort" + "strconv" + "strings" + "testing" + "time" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + "github.com/cortexproject/cortex/pkg/storage/bucket/filesystem" + cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" + "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/validation" + "github.com/go-kit/log" + "github.com/grafana/dskit/flagext" + "github.com/grafana/dskit/kv/consul" + "github.com/grafana/dskit/ring" + "github.com/grafana/dskit/services" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/extprom" + "github.com/thanos-io/thanos/pkg/objstore" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb" + "google.golang.org/grpc/status" + + tsdb_testutil "github.com/grafana/loki/pkg/storage/tsdb/testutil" + "github.com/grafana/loki/pkg/util/test" +) + +func TestConfig_Validate(t *testing.T) { + tests := map[string]struct { + setup func(cfg *Config, limits *validation.Limits) + expected error + }{ + "should pass by default": { + setup: func(cfg *Config, limits *validation.Limits) {}, + expected: nil, + }, + "should fail if the sharding strategy is invalid": { + setup: func(cfg *Config, limits *validation.Limits) { + cfg.ShardingEnabled = true + cfg.ShardingStrategy = "xxx" + }, + expected: errInvalidShardingStrategy, + }, + "should fail if the sharding strategy is shuffle-sharding and shard size has not been set": { + setup: func(cfg *Config, limits *validation.Limits) { + cfg.ShardingEnabled = true + cfg.ShardingStrategy = util.ShardingStrategyShuffle + }, + expected: errInvalidTenantShardSize, + }, + "should pass if the sharding strategy is shuffle-sharding and shard size has been set": { + setup: func(cfg *Config, limits *validation.Limits) { + cfg.ShardingEnabled = true + cfg.ShardingStrategy = util.ShardingStrategyShuffle + limits.StoreGatewayTenantShardSize = 3 + }, + expected: nil, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + cfg := &Config{} + limits := &validation.Limits{} + flagext.DefaultValues(cfg, limits) + testData.setup(cfg, limits) + + assert.Equal(t, testData.expected, cfg.Validate(*limits)) + }) + } +} + +func TestStoreGateway_InitialSyncWithDefaultShardingEnabled(t *testing.T) { + tests := map[string]struct { + initialExists bool + initialState ring.InstanceState + initialTokens ring.Tokens + }{ + "instance not in the ring": { + initialExists: false, + }, + "instance already in the ring with PENDING state and has no tokens": { + initialExists: true, + initialState: ring.PENDING, + initialTokens: ring.Tokens{}, + }, + "instance already in the ring with JOINING state and has some tokens": { + initialExists: true, + initialState: ring.JOINING, + initialTokens: ring.Tokens{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, + "instance already in the ring with ACTIVE state and has all tokens": { + initialExists: true, + initialState: ring.ACTIVE, + initialTokens: generateSortedTokens(RingNumTokens), + }, + "instance already in the ring with LEAVING state and has all tokens": { + initialExists: true, + initialState: ring.LEAVING, + initialTokens: generateSortedTokens(RingNumTokens), + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + ctx := context.Background() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = true + storageCfg := mockStorageConfig(t) + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + bucketClient := &bucket.ClientMock{} + + // Setup the initial instance state in the ring. + if testData.initialExists { + require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) { + ringDesc := ring.GetOrCreateRingDesc(in) + ringDesc.AddIngester(gatewayCfg.ShardingRing.InstanceID, gatewayCfg.ShardingRing.InstanceAddr, "", testData.initialTokens, testData.initialState, time.Now()) + return ringDesc, true, nil + })) + } + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), nil) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + assert.False(t, g.ringLifecycler.IsRegistered()) + + bucketClient.MockIterWithCallback("", []string{"user-1", "user-2"}, nil, func() { + // During the initial sync, we expect the instance to always be in the JOINING + // state within the ring. + assert.True(t, g.ringLifecycler.IsRegistered()) + assert.Equal(t, ring.JOINING, g.ringLifecycler.GetState()) + assert.Equal(t, RingNumTokens, len(g.ringLifecycler.GetTokens())) + assert.Subset(t, g.ringLifecycler.GetTokens(), testData.initialTokens) + }) + bucketClient.MockIter("user-1/", []string{}, nil) + bucketClient.MockIter("user-2/", []string{}, nil) + + // Once successfully started, the instance should be ACTIVE in the ring. + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + + assert.True(t, g.ringLifecycler.IsRegistered()) + assert.Equal(t, ring.ACTIVE, g.ringLifecycler.GetState()) + assert.Equal(t, RingNumTokens, len(g.ringLifecycler.GetTokens())) + assert.Subset(t, g.ringLifecycler.GetTokens(), testData.initialTokens) + + assert.NotNil(t, g.stores.getStore("user-1")) + assert.NotNil(t, g.stores.getStore("user-2")) + assert.Nil(t, g.stores.getStore("user-unknown")) + }) + } +} + +func TestStoreGateway_InitialSyncWithShardingDisabled(t *testing.T) { + ctx := context.Background() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = false + storageCfg := mockStorageConfig(t) + bucketClient := &bucket.ClientMock{} + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, nil, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), nil) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + + bucketClient.MockIter("", []string{"user-1", "user-2"}, nil) + bucketClient.MockIter("user-1/", []string{}, nil) + bucketClient.MockIter("user-2/", []string{}, nil) + + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + assert.NotNil(t, g.stores.getStore("user-1")) + assert.NotNil(t, g.stores.getStore("user-2")) + assert.Nil(t, g.stores.getStore("user-unknown")) +} + +func TestStoreGateway_InitialSyncFailure(t *testing.T) { + ctx := context.Background() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = true + storageCfg := mockStorageConfig(t) + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + bucketClient := &bucket.ClientMock{} + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), nil) + require.NoError(t, err) + + bucketClient.MockIter("", []string{}, errors.New("network error")) + + require.NoError(t, g.StartAsync(ctx)) + err = g.AwaitRunning(ctx) + assert.Error(t, err) + assert.Equal(t, services.Failed, g.State()) + + // We expect a clean shutdown, including unregistering the instance from the ring. + assert.False(t, g.ringLifecycler.IsRegistered()) +} + +// TestStoreGateway_InitialSyncWithWaitRingStability tests the store-gateway cold start case. +// When several store-gateways start up at once, we expect each store-gateway to only load +// their own blocks, regardless which store-gateway joined the ring first or last (even if starting +// at the same time, they will join the ring at a slightly different time). +func TestStoreGateway_InitialSyncWithWaitRingStability(t *testing.T) { + bucketClient, storageDir := tsdb_testutil.PrepareFilesystemBucket(t) + + // This tests uses real TSDB blocks. 24h time range, 2h block range period, + // 2 users = total (24 / 12) * 2 = 24 blocks. + numUsers := 2 + numBlocks := numUsers * 12 + now := time.Now() + mockTSDB(t, path.Join(storageDir, "user-1"), 24, 12, now.Add(-24*time.Hour).Unix()*1000, now.Unix()*1000) + mockTSDB(t, path.Join(storageDir, "user-2"), 24, 12, now.Add(-24*time.Hour).Unix()*1000, now.Unix()*1000) + + // Write the bucket index. + for _, userID := range []string{"user-1", "user-2"} { + createBucketIndex(t, bucketClient, userID) + } + + tests := map[string]struct { + shardingStrategy string + tenantShardSize int // Used only when the sharding strategy is shuffle-sharding. + replicationFactor int + numGateways int + expectedBlocksLoaded int + }{ + "default sharding strategy, 1 gateway, RF = 1": { + shardingStrategy: util.ShardingStrategyDefault, + replicationFactor: 1, + numGateways: 1, + expectedBlocksLoaded: numBlocks, + }, + "default sharding strategy, 2 gateways, RF = 1": { + shardingStrategy: util.ShardingStrategyDefault, + replicationFactor: 1, + numGateways: 2, + expectedBlocksLoaded: numBlocks, // blocks are sharded across gateways + }, + "default sharding strategy, 3 gateways, RF = 2": { + shardingStrategy: util.ShardingStrategyDefault, + replicationFactor: 2, + numGateways: 3, + expectedBlocksLoaded: 2 * numBlocks, // blocks are replicated 2 times + }, + "default sharding strategy, 5 gateways, RF = 3": { + shardingStrategy: util.ShardingStrategyDefault, + replicationFactor: 3, + numGateways: 5, + expectedBlocksLoaded: 3 * numBlocks, // blocks are replicated 3 times + }, + "shuffle sharding strategy, 1 gateway, RF = 1, SS = 1": { + shardingStrategy: util.ShardingStrategyShuffle, + tenantShardSize: 1, + replicationFactor: 1, + numGateways: 1, + expectedBlocksLoaded: numBlocks, + }, + "shuffle sharding strategy, 5 gateways, RF = 2, SS = 3": { + shardingStrategy: util.ShardingStrategyShuffle, + tenantShardSize: 3, + replicationFactor: 2, + numGateways: 5, + expectedBlocksLoaded: 2 * numBlocks, // blocks are replicated 2 times + }, + "shuffle sharding strategy, 20 gateways, RF = 3, SS = 3": { + shardingStrategy: util.ShardingStrategyShuffle, + tenantShardSize: 3, + replicationFactor: 3, + numGateways: 20, + expectedBlocksLoaded: 3 * numBlocks, // blocks are replicated 3 times + }, + } + + for testName, testData := range tests { + for _, bucketIndexEnabled := range []bool{true, false} { + t.Run(fmt.Sprintf("%s (bucket index enabled = %v)", testName, bucketIndexEnabled), func(t *testing.T) { + // Randomise the seed but log it in case we need to reproduce the test on failure. + seed := time.Now().UnixNano() + rand.Seed(seed) + t.Log("random generator seed:", seed) + + ctx := context.Background() + ringStore, closer := consul.NewInMemoryClientWithConfig(ring.GetCodec(), consul.Config{ + MaxCasRetries: 20, + CasRetryDelay: 500 * time.Millisecond, + }, log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Create the configured number of gateways. + var gateways []*StoreGateway + registries := util.NewUserRegistries() + + for i := 1; i <= testData.numGateways; i++ { + instanceID := fmt.Sprintf("gateway-%d", i) + + storageCfg := mockStorageConfig(t) + storageCfg.BucketStore.SyncInterval = time.Hour // Do not trigger the periodic sync in this test. We want the initial sync only. + storageCfg.BucketStore.BucketIndex.Enabled = bucketIndexEnabled + + limits := defaultLimitsConfig() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingRing.ReplicationFactor = testData.replicationFactor + gatewayCfg.ShardingRing.InstanceID = instanceID + gatewayCfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i) + gatewayCfg.ShardingRing.RingCheckPeriod = time.Hour // Do not check the ring topology changes in this test. We want the initial sync only. + gatewayCfg.ShardingRing.WaitStabilityMinDuration = 2 * time.Second + gatewayCfg.ShardingRing.WaitStabilityMaxDuration = 30 * time.Second + gatewayCfg.ShardingEnabled = true + gatewayCfg.ShardingStrategy = testData.shardingStrategy + limits.StoreGatewayTenantShardSize = testData.tenantShardSize + + overrides, err := validation.NewOverrides(limits, nil) + require.NoError(t, err) + + reg := prometheus.NewPedanticRegistry() + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, overrides, mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + + gateways = append(gateways, g) + registries.AddUserRegistry(instanceID, reg) + } + + // Start all gateways concurrently. + for _, g := range gateways { + require.NoError(t, g.StartAsync(ctx)) + } + + // Wait until all gateways are running. + for _, g := range gateways { + require.NoError(t, g.AwaitRunning(ctx)) + } + + // At this point we expect that all gateways have done the initial sync and + // they have synched only their own blocks, because they waited for a stable + // ring before starting the initial sync. + metrics := registries.BuildMetricFamiliesPerUser() + assert.Equal(t, float64(testData.expectedBlocksLoaded), metrics.GetSumOfGauges("cortex_bucket_store_blocks_loaded")) + assert.Equal(t, float64(2*testData.numGateways), metrics.GetSumOfGauges("cortex_bucket_stores_tenants_discovered")) + + if testData.shardingStrategy == util.ShardingStrategyShuffle { + assert.Equal(t, float64(testData.tenantShardSize*numBlocks), metrics.GetSumOfGauges("cortex_blocks_meta_synced")) + assert.Equal(t, float64(testData.tenantShardSize*numUsers), metrics.GetSumOfGauges("cortex_bucket_stores_tenants_synced")) + } else { + assert.Equal(t, float64(testData.numGateways*numBlocks), metrics.GetSumOfGauges("cortex_blocks_meta_synced")) + assert.Equal(t, float64(testData.numGateways*numUsers), metrics.GetSumOfGauges("cortex_bucket_stores_tenants_synced")) + } + + // We expect that all gateways have only run the initial sync and not the periodic one. + assert.Equal(t, float64(testData.numGateways), metrics.GetSumOfCounters("cortex_storegateway_bucket_sync_total")) + }) + } + } +} + +func TestStoreGateway_BlocksSyncWithDefaultSharding_RingTopologyChangedAfterScaleUp(t *testing.T) { + const ( + numUsers = 2 + numBlocks = numUsers * 12 + shardingStrategy = util.ShardingStrategyDefault + replicationFactor = 3 + numInitialGateways = 4 + numScaleUpGateways = 6 + expectedBlocksLoaded = 3 * numBlocks // blocks are replicated 3 times + ) + + bucketClient, storageDir := tsdb_testutil.PrepareFilesystemBucket(t) + + // This tests uses real TSDB blocks. 24h time range, 2h block range period, + // 2 users = total (24 / 12) * 2 = 24 blocks. + now := time.Now() + mockTSDB(t, path.Join(storageDir, "user-1"), 24, 12, now.Add(-24*time.Hour).Unix()*1000, now.Unix()*1000) + mockTSDB(t, path.Join(storageDir, "user-2"), 24, 12, now.Add(-24*time.Hour).Unix()*1000, now.Unix()*1000) + + // Write the bucket index. + for _, userID := range []string{"user-1", "user-2"} { + createBucketIndex(t, bucketClient, userID) + } + + // Randomise the seed but log it in case we need to reproduce the test on failure. + seed := time.Now().UnixNano() + rand.Seed(seed) + t.Log("random generator seed:", seed) + + ctx := context.Background() + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Create the configured number of gateways. + var initialGateways []*StoreGateway + initialRegistries := util.NewUserRegistries() + allRegistries := util.NewUserRegistries() + + createStoreGateway := func(id int, waitStabilityMin time.Duration) (*StoreGateway, string, *prometheus.Registry) { + instanceID := fmt.Sprintf("gateway-%d", id) + + storageCfg := mockStorageConfig(t) + storageCfg.BucketStore.SyncInterval = time.Hour // Do not trigger the periodic sync in this test. We want it to be triggered by ring topology changed. + storageCfg.BucketStore.BucketIndex.Enabled = true + + limits := defaultLimitsConfig() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingRing.ReplicationFactor = replicationFactor + gatewayCfg.ShardingRing.InstanceID = instanceID + gatewayCfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", id) + gatewayCfg.ShardingRing.RingCheckPeriod = 100 * time.Millisecond // Check it continuously. Topology will change on scale up. + gatewayCfg.ShardingRing.WaitStabilityMinDuration = waitStabilityMin + gatewayCfg.ShardingRing.WaitStabilityMaxDuration = 30 * time.Second + gatewayCfg.ShardingEnabled = true + gatewayCfg.ShardingStrategy = shardingStrategy + + overrides, err := validation.NewOverrides(limits, nil) + require.NoError(t, err) + + reg := prometheus.NewPedanticRegistry() + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, overrides, mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + + return g, instanceID, reg + } + + for i := 1; i <= numInitialGateways; i++ { + g, instanceID, reg := createStoreGateway(i, 2*time.Second) + initialGateways = append(initialGateways, g) + initialRegistries.AddUserRegistry(instanceID, reg) + allRegistries.AddUserRegistry(instanceID, reg) + } + + // Start all gateways concurrently. + for _, g := range initialGateways { + require.NoError(t, g.StartAsync(ctx)) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + } + + // Wait until all gateways are running. + for _, g := range initialGateways { + require.NoError(t, g.AwaitRunning(ctx)) + } + + // At this point we expect that all gateways have done the initial sync and + // they have synched only their own blocks. + metrics := initialRegistries.BuildMetricFamiliesPerUser() + assert.Equal(t, float64(expectedBlocksLoaded), metrics.GetSumOfGauges("cortex_bucket_store_blocks_loaded")) + assert.Equal(t, float64(2*numInitialGateways), metrics.GetSumOfGauges("cortex_bucket_stores_tenants_discovered")) + + assert.Equal(t, float64(numInitialGateways*numBlocks), metrics.GetSumOfGauges("cortex_blocks_meta_synced")) + assert.Equal(t, float64(numInitialGateways*numUsers), metrics.GetSumOfGauges("cortex_bucket_stores_tenants_synced")) + + // Scale up store-gateways. + var scaleUpGateways []*StoreGateway + scaleUpRegistries := util.NewUserRegistries() + numAllGateways := numInitialGateways + numScaleUpGateways + + for i := numInitialGateways + 1; i <= numAllGateways; i++ { + g, instanceID, reg := createStoreGateway(i, 10*time.Second) // Intentionally high "wait stability min duration". + scaleUpGateways = append(scaleUpGateways, g) + scaleUpRegistries.AddUserRegistry(instanceID, reg) + allRegistries.AddUserRegistry(instanceID, reg) + } + + // Start all new gateways concurrently. + for _, g := range scaleUpGateways { + require.NoError(t, g.StartAsync(ctx)) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + } + + // Since we configured the new store-gateways with an high "wait stability min duration", we expect + // them to join the ring at start up (with JOINING state) but then wait at least the min duration + // before syncing blocks and becoming ACTIVE. This give us enough time to check how the initial + // store-gateways behaves with regards to blocks syncing while other replicas are JOINING. + + // Wait until all the initial store-gateways sees all new store-gateways too. + test.Poll(t, 5*time.Second, float64(numAllGateways*numInitialGateways), func() interface{} { + metrics := initialRegistries.BuildMetricFamiliesPerUser() + return metrics.GetSumOfGauges("cortex_ring_members") + }) + + // We expect each block to be available for querying on at least 1 initial store-gateway. + for _, userID := range []string{"user-1", "user-2"} { + idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, log.NewNopLogger()) + require.NoError(t, err) + + for _, block := range idx.Blocks { + queried := false + + for _, g := range initialGateways { + req := &storepb.SeriesRequest{MinTime: math.MinInt64, MaxTime: math.MaxInt64} + srv := newBucketStoreSeriesServer(setUserIDToGRPCContext(ctx, userID)) + require.NoError(t, g.Series(req, srv)) + + for _, b := range srv.Hints.QueriedBlocks { + if b.Id == block.ID.String() { + queried = true + } + } + } + + assert.True(t, queried, "block %s has been successfully queried on initial store-gateways", block.ID.String()) + } + } + + // Wait until all new gateways are running. + for _, g := range scaleUpGateways { + require.NoError(t, g.AwaitRunning(ctx)) + } + + // At this point the new store-gateways are expected to be ACTIVE in the ring and all the initial + // store-gateways should unload blocks they don't own anymore. + test.Poll(t, 5*time.Second, float64(expectedBlocksLoaded), func() interface{} { + metrics := allRegistries.BuildMetricFamiliesPerUser() + return metrics.GetSumOfGauges("cortex_bucket_store_blocks_loaded") + }) +} + +func TestStoreGateway_ShouldSupportLoadRingTokensFromFile(t *testing.T) { + tests := map[string]struct { + storedTokens ring.Tokens + expectedNumTokens int + }{ + "stored tokens are less than the configured ones": { + storedTokens: generateSortedTokens(RingNumTokens - 10), + expectedNumTokens: RingNumTokens, + }, + "stored tokens are equal to the configured ones": { + storedTokens: generateSortedTokens(RingNumTokens), + expectedNumTokens: RingNumTokens, + }, + "stored tokens are more then the configured ones": { + storedTokens: generateSortedTokens(RingNumTokens + 10), + expectedNumTokens: RingNumTokens + 10, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + tokensFile, err := ioutil.TempFile(os.TempDir(), "tokens-*") + require.NoError(t, err) + defer os.Remove(tokensFile.Name()) //nolint:errcheck + + // Store some tokens to the file. + require.NoError(t, testData.storedTokens.StoreToFile(tokensFile.Name())) + + ctx := context.Background() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = true + gatewayCfg.ShardingRing.TokensFilePath = tokensFile.Name() + + storageCfg := mockStorageConfig(t) + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{}, nil) + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), nil) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + assert.False(t, g.ringLifecycler.IsRegistered()) + + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + assert.True(t, g.ringLifecycler.IsRegistered()) + assert.Equal(t, ring.ACTIVE, g.ringLifecycler.GetState()) + assert.Len(t, g.ringLifecycler.GetTokens(), testData.expectedNumTokens) + assert.Subset(t, g.ringLifecycler.GetTokens(), testData.storedTokens) + }) + } +} + +func TestStoreGateway_SyncOnRingTopologyChanged(t *testing.T) { + registeredAt := time.Now() + + tests := map[string]struct { + setupRing func(desc *ring.Desc) + updateRing func(desc *ring.Desc) + expectedSync bool + }{ + "should sync when an instance is added to the ring": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + }, + updateRing: func(desc *ring.Desc) { + desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt) + }, + expectedSync: true, + }, + "should sync when an instance is removed from the ring": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt) + }, + updateRing: func(desc *ring.Desc) { + desc.RemoveIngester("instance-1") + }, + expectedSync: true, + }, + "should sync when an instance changes state": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.JOINING, registeredAt) + }, + updateRing: func(desc *ring.Desc) { + instance := desc.Ingesters["instance-2"] + instance.State = ring.ACTIVE + desc.Ingesters["instance-2"] = instance + }, + expectedSync: true, + }, + "should sync when an healthy instance becomes unhealthy": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt) + }, + updateRing: func(desc *ring.Desc) { + instance := desc.Ingesters["instance-2"] + instance.Timestamp = time.Now().Add(-time.Hour).Unix() + desc.Ingesters["instance-2"] = instance + }, + expectedSync: true, + }, + "should sync when an unhealthy instance becomes healthy": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + + instance := desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt) + instance.Timestamp = time.Now().Add(-time.Hour).Unix() + desc.Ingesters["instance-2"] = instance + }, + updateRing: func(desc *ring.Desc) { + instance := desc.Ingesters["instance-2"] + instance.Timestamp = time.Now().Unix() + desc.Ingesters["instance-2"] = instance + }, + expectedSync: true, + }, + "should NOT sync when an instance updates the heartbeat": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt) + }, + updateRing: func(desc *ring.Desc) { + instance := desc.Ingesters["instance-2"] + instance.Timestamp = time.Now().Add(time.Second).Unix() + desc.Ingesters["instance-2"] = instance + }, + expectedSync: false, + }, + "should NOT sync when an instance is auto-forgotten in the ring but was already unhealthy in the previous state": { + setupRing: func(desc *ring.Desc) { + desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt) + desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt) + + // Set it already unhealthy. + instance := desc.Ingesters["instance-2"] + instance.Timestamp = time.Now().Add(-time.Hour).Unix() + desc.Ingesters["instance-2"] = instance + }, + updateRing: func(desc *ring.Desc) { + // Remove the unhealthy instance from the ring. + desc.RemoveIngester("instance-2") + }, + expectedSync: false, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + ctx := context.Background() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = true + gatewayCfg.ShardingRing.RingCheckPeriod = 100 * time.Millisecond + + storageCfg := mockStorageConfig(t) + storageCfg.BucketStore.SyncInterval = time.Hour // Do not trigger the periodic sync in this test. + + reg := prometheus.NewPedanticRegistry() + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{}, nil) + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), reg) + require.NoError(t, err) + + // Store the initial ring state before starting the gateway. + require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) { + ringDesc := ring.GetOrCreateRingDesc(in) + testData.setupRing(ringDesc) + return ringDesc, true, nil + })) + + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + + // Assert on the initial state. + regs := util.NewUserRegistries() + regs.AddUserRegistry("test", reg) + metrics := regs.BuildMetricFamiliesPerUser() + assert.Equal(t, float64(1), metrics.GetSumOfCounters("cortex_storegateway_bucket_sync_total")) + + // Change the ring topology. + require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) { + ringDesc := ring.GetOrCreateRingDesc(in) + testData.updateRing(ringDesc) + return ringDesc, true, nil + })) + + // Assert whether the sync triggered or not. + if testData.expectedSync { + test.Poll(t, time.Second, float64(2), func() interface{} { + metrics := regs.BuildMetricFamiliesPerUser() + return metrics.GetSumOfCounters("cortex_storegateway_bucket_sync_total") + }) + } else { + // Give some time to the store-gateway to trigger the sync (if any). + time.Sleep(250 * time.Millisecond) + + metrics := regs.BuildMetricFamiliesPerUser() + assert.Equal(t, float64(1), metrics.GetSumOfCounters("cortex_storegateway_bucket_sync_total")) + } + }) + } +} + +func TestStoreGateway_RingLifecyclerShouldAutoForgetUnhealthyInstances(t *testing.T) { + const unhealthyInstanceID = "unhealthy-id" + const heartbeatTimeout = time.Minute + + ctx := context.Background() + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = true + gatewayCfg.ShardingRing.HeartbeatPeriod = 100 * time.Millisecond + gatewayCfg.ShardingRing.HeartbeatTimeout = heartbeatTimeout + + storageCfg := mockStorageConfig(t) + + ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + bucketClient := &bucket.ClientMock{} + bucketClient.MockIter("", []string{}, nil) + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, ringStore, defaultLimitsOverrides(t), mockLoggingLevel(), log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + + // Add an unhealthy instance to the ring. + require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) { + ringDesc := ring.GetOrCreateRingDesc(in) + + instance := ringDesc.AddIngester(unhealthyInstanceID, "1.1.1.1", "", generateSortedTokens(RingNumTokens), ring.ACTIVE, time.Now()) + instance.Timestamp = time.Now().Add(-(ringAutoForgetUnhealthyPeriods + 1) * heartbeatTimeout).Unix() + ringDesc.Ingesters[unhealthyInstanceID] = instance + + return ringDesc, true, nil + })) + + // Ensure the unhealthy instance is removed from the ring. + test.Poll(t, time.Second, false, func() interface{} { + d, err := ringStore.Get(ctx, RingKey) + if err != nil { + return err + } + + _, ok := ring.GetOrCreateRingDesc(d).Ingesters[unhealthyInstanceID] + return ok + }) +} + +func TestStoreGateway_SeriesQueryingShouldRemoveExternalLabels(t *testing.T) { + ctx := context.Background() + logger := log.NewNopLogger() + userID := "user-1" + + storageDir, err := ioutil.TempDir(os.TempDir(), "") + require.NoError(t, err) + defer os.RemoveAll(storageDir) //nolint:errcheck + + // Generate 2 TSDB blocks with the same exact series (and data points). + numSeries := 2 + now := time.Now() + minT := now.Add(-1*time.Hour).Unix() * 1000 + maxT := now.Unix() * 1000 + step := (maxT - minT) / int64(numSeries) + mockTSDB(t, path.Join(storageDir, userID), numSeries, 0, minT, maxT) + mockTSDB(t, path.Join(storageDir, userID), numSeries, 0, minT, maxT) + + bucketClient, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + createBucketIndex(t, bucketClient, userID) + + // Find the created blocks (we expect 2). + var blockIDs []string + require.NoError(t, bucketClient.Iter(ctx, "user-1/", func(key string) error { + if _, ok := block.IsBlockDir(key); ok { + blockIDs = append(blockIDs, strings.TrimSuffix(strings.TrimPrefix(key, userID+"/"), "/")) + } + return nil + })) + require.Len(t, blockIDs, 2) + + // Inject different external labels for each block. + for idx, blockID := range blockIDs { + meta := metadata.Thanos{ + Labels: map[string]string{ + cortex_tsdb.TenantIDExternalLabel: userID, + cortex_tsdb.IngesterIDExternalLabel: fmt.Sprintf("ingester-%d", idx), + cortex_tsdb.ShardIDExternalLabel: fmt.Sprintf("shard-%d", idx), + }, + Source: metadata.TestSource, + } + + _, err := metadata.InjectThanos(logger, filepath.Join(storageDir, userID, blockID), meta, nil) + require.NoError(t, err) + } + + for _, bucketIndexEnabled := range []bool{true, false} { + t.Run(fmt.Sprintf("bucket index enabled = %v", bucketIndexEnabled), func(t *testing.T) { + // Create a store-gateway used to query back the series from the blocks. + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = false + storageCfg := mockStorageConfig(t) + storageCfg.BucketStore.BucketIndex.Enabled = bucketIndexEnabled + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, nil, defaultLimitsOverrides(t), mockLoggingLevel(), logger, nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + + // Query back all series. + req := &storepb.SeriesRequest{ + MinTime: minT, + MaxTime: maxT, + Matchers: []storepb.LabelMatcher{ + {Type: storepb.LabelMatcher_RE, Name: "__name__", Value: ".*"}, + }, + } + + srv := newBucketStoreSeriesServer(setUserIDToGRPCContext(ctx, userID)) + err = g.Series(req, srv) + require.NoError(t, err) + assert.Empty(t, srv.Warnings) + assert.Len(t, srv.SeriesSet, numSeries) + + for seriesID := 0; seriesID < numSeries; seriesID++ { + actual := srv.SeriesSet[seriesID] + + // Ensure Cortex external labels have been removed. + assert.Equal(t, []labelpb.ZLabel{{Name: "series_id", Value: strconv.Itoa(seriesID)}}, actual.Labels) + + // Ensure samples have been correctly queried. The Thanos store also deduplicate samples + // in most cases, but it's not strictly required guaranteeing deduplication at this stage. + samples, err := readSamplesFromChunks(actual.Chunks) + require.NoError(t, err) + assert.Equal(t, []sample{ + {ts: minT + (step * int64(seriesID)), value: float64(seriesID)}, + }, samples) + } + }) + } +} + +func TestStoreGateway_SeriesQueryingShouldEnforceMaxChunksPerQueryLimit(t *testing.T) { + const chunksQueried = 10 + + tests := map[string]struct { + limit int + expectedErr error + }{ + "no limit enforced if zero": { + limit: 0, + expectedErr: nil, + }, + "should return NO error if the actual number of queried chunks is <= limit": { + limit: chunksQueried, + expectedErr: nil, + }, + "should return error if the actual number of queried chunks is > limit": { + limit: chunksQueried - 1, + expectedErr: status.Error(http.StatusUnprocessableEntity, fmt.Sprintf("exceeded chunks limit: rpc error: code = Code(422) desc = limit %d violated (got %d)", chunksQueried-1, chunksQueried)), + }, + } + + ctx := context.Background() + logger := log.NewNopLogger() + userID := "user-1" + + storageDir, err := ioutil.TempDir(os.TempDir(), "") + require.NoError(t, err) + defer os.RemoveAll(storageDir) //nolint:errcheck + + // Generate 1 TSDB block with chunksQueried series. Since each mocked series contains only 1 sample, + // it will also only have 1 chunk. + now := time.Now() + minT := now.Add(-1*time.Hour).Unix() * 1000 + maxT := now.Unix() * 1000 + mockTSDB(t, path.Join(storageDir, userID), chunksQueried, 0, minT, maxT) + + bucketClient, err := filesystem.NewBucketClient(filesystem.Config{Directory: storageDir}) + require.NoError(t, err) + + // Prepare the request to query back all series (1 chunk per series in this test). + req := &storepb.SeriesRequest{ + MinTime: minT, + MaxTime: maxT, + Matchers: []storepb.LabelMatcher{ + {Type: storepb.LabelMatcher_RE, Name: "__name__", Value: ".*"}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Customise the limits. + limits := defaultLimitsConfig() + limits.MaxChunksPerQueryFromStore = testData.limit + overrides, err := validation.NewOverrides(limits, nil) + require.NoError(t, err) + + // Create a store-gateway used to query back the series from the blocks. + gatewayCfg := mockGatewayConfig() + gatewayCfg.ShardingEnabled = false + storageCfg := mockStorageConfig(t) + + g, err := newStoreGateway(gatewayCfg, storageCfg, bucketClient, nil, overrides, mockLoggingLevel(), logger, nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, g)) + defer services.StopAndAwaitTerminated(ctx, g) //nolint:errcheck + + // Query back all the series (1 chunk per series in this test). + srv := newBucketStoreSeriesServer(setUserIDToGRPCContext(ctx, userID)) + err = g.Series(req, srv) + + if testData.expectedErr != nil { + require.Error(t, err) + assert.IsType(t, testData.expectedErr, err) + s1, ok := status.FromError(errors.Cause(err)) + assert.True(t, ok) + s2, ok := status.FromError(errors.Cause(testData.expectedErr)) + assert.True(t, ok) + assert.True(t, strings.Contains(s1.Message(), s2.Message())) + assert.Equal(t, s1.Code(), s2.Code()) + } else { + require.NoError(t, err) + assert.Empty(t, srv.Warnings) + assert.Len(t, srv.SeriesSet, chunksQueried) + } + }) + } +} + +func mockGatewayConfig() Config { + cfg := Config{} + flagext.DefaultValues(&cfg) + + cfg.ShardingRing.InstanceID = "test" + cfg.ShardingRing.InstanceAddr = "127.0.0.1" + cfg.ShardingRing.WaitStabilityMinDuration = 0 + cfg.ShardingRing.WaitStabilityMaxDuration = 0 + + return cfg +} + +func mockStorageConfig(t *testing.T) cortex_tsdb.BlocksStorageConfig { + tmpDir, err := ioutil.TempDir(os.TempDir(), "store-gateway-test-*") + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(tmpDir)) + }) + + cfg := cortex_tsdb.BlocksStorageConfig{} + flagext.DefaultValues(&cfg) + + cfg.BucketStore.ConsistencyDelay = 0 + cfg.BucketStore.SyncDir = tmpDir + + return cfg +} + +// mockTSDB create 1+ TSDB blocks storing numSeries of series, each series +// with 1 sample and its timestamp evenly distributed between minT and maxT. +// If numBlocks > 0, then it uses numSeries only to find the distribution of +// samples. +func mockTSDB(t *testing.T, dir string, numSeries, numBlocks int, minT, maxT int64) { + // Create a new TSDB on a temporary directory. The blocks + // will be then snapshotted to the input dir. + tempDir, err := ioutil.TempDir(os.TempDir(), "tsdb") + require.NoError(t, err) + defer os.RemoveAll(tempDir) //nolint:errcheck + + db, err := tsdb.Open(tempDir, nil, nil, &tsdb.Options{ + MinBlockDuration: 2 * time.Hour.Milliseconds(), + MaxBlockDuration: 2 * time.Hour.Milliseconds(), + RetentionDuration: 15 * 24 * time.Hour.Milliseconds(), + }, nil) + require.NoError(t, err) + + db.DisableCompactions() + + step := (maxT - minT) / int64(numSeries) + addSample := func(i int) { + lbls := labels.Labels{labels.Label{Name: "series_id", Value: strconv.Itoa(i)}} + + app := db.Appender(context.Background()) + _, err := app.Append(0, lbls, minT+(step*int64(i)), float64(i)) + require.NoError(t, err) + require.NoError(t, app.Commit()) + require.NoError(t, db.Compact()) + } + if numBlocks > 0 { + i := 0 + // Snapshot adds another block. Hence numBlocks-1. + for len(db.Blocks()) < numBlocks-1 { + addSample(i) + i++ + } + } else { + for i := 0; i < numSeries; i++ { + addSample(i) + } + } + + require.NoError(t, db.Snapshot(dir, true)) + + require.NoError(t, db.Close()) +} + +func generateSortedTokens(numTokens int) ring.Tokens { + tokens := ring.GenerateTokens(numTokens, nil) + + // Ensure generated tokens are sorted. + sort.Slice(tokens, func(i, j int) bool { + return tokens[i] < tokens[j] + }) + + return ring.Tokens(tokens) +} + +func readSamplesFromChunks(rawChunks []storepb.AggrChunk) ([]sample, error) { + var samples []sample + + for _, rawChunk := range rawChunks { + c, err := chunkenc.FromData(chunkenc.EncXOR, rawChunk.Raw.Data) + if err != nil { + return nil, err + } + + it := c.Iterator(nil) + for it.Next() { + if it.Err() != nil { + return nil, it.Err() + } + + ts, v := it.At() + samples = append(samples, sample{ + ts: ts, + value: v, + }) + } + + if it.Err() != nil { + return nil, it.Err() + } + } + + return samples, nil +} + +type sample struct { + ts int64 + value float64 +} + +func defaultLimitsConfig() validation.Limits { + limits := validation.Limits{} + flagext.DefaultValues(&limits) + return limits +} + +func defaultLimitsOverrides(t *testing.T) *validation.Overrides { + overrides, err := validation.NewOverrides(defaultLimitsConfig(), nil) + require.NoError(t, err) + + return overrides +} + +type mockShardingStrategy struct { + mock.Mock +} + +func (m *mockShardingStrategy) FilterUsers(ctx context.Context, userIDs []string) []string { + args := m.Called(ctx, userIDs) + return args.Get(0).([]string) +} + +func (m *mockShardingStrategy) FilterBlocks(ctx context.Context, userID string, metas map[ulid.ULID]*metadata.Meta, loaded map[ulid.ULID]struct{}, synced *extprom.TxGaugeVec) error { + args := m.Called(ctx, userID, metas, loaded, synced) + return args.Error(0) +} + +func createBucketIndex(t *testing.T, bkt objstore.Bucket, userID string) *bucketindex.Index { + updater := bucketindex.NewUpdater(bkt, userID, nil, log.NewNopLogger()) + idx, _, err := updater.UpdateIndex(context.Background(), nil) + require.NoError(t, err) + require.NoError(t, bucketindex.WriteIndex(context.Background(), bkt, userID, nil, idx)) + + return idx +} diff --git a/pkg/storegateway/metadata_fetcher_filters_test.go b/pkg/storegateway/metadata_fetcher_filters_test.go new file mode 100644 index 0000000000000..7df4d4888822f --- /dev/null +++ b/pkg/storegateway/metadata_fetcher_filters_test.go @@ -0,0 +1,107 @@ +package storegateway + +import ( + "bytes" + "context" + "encoding/json" + "path" + "testing" + "time" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + promtest "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/extprom" + "github.com/thanos-io/thanos/pkg/objstore" + + tsdb_testutil "github.com/grafana/loki/pkg/storage/tsdb/testutil" +) + +func TestIgnoreDeletionMarkFilter_Filter(t *testing.T) { + testIgnoreDeletionMarkFilter(t, false) +} + +func TestIgnoreDeletionMarkFilter_FilterWithBucketIndex(t *testing.T) { + testIgnoreDeletionMarkFilter(t, true) +} + +func testIgnoreDeletionMarkFilter(t *testing.T, bucketIndexEnabled bool) { + const userID = "user-1" + + now := time.Now() + ctx := context.Background() + logger := log.NewNopLogger() + + // Create a bucket backed by filesystem. + bkt, _ := tsdb_testutil.PrepareFilesystemBucket(t) + bkt = bucketindex.BucketWithGlobalMarkers(bkt) + userBkt := bucket.NewUserBucketClient(userID, bkt, nil) + + shouldFetch := &metadata.DeletionMark{ + ID: ulid.MustNew(1, nil), + DeletionTime: now.Add(-15 * time.Hour).Unix(), + Version: 1, + } + + shouldIgnore := &metadata.DeletionMark{ + ID: ulid.MustNew(2, nil), + DeletionTime: now.Add(-60 * time.Hour).Unix(), + Version: 1, + } + + var buf bytes.Buffer + require.NoError(t, json.NewEncoder(&buf).Encode(&shouldFetch)) + require.NoError(t, userBkt.Upload(ctx, path.Join(shouldFetch.ID.String(), metadata.DeletionMarkFilename), &buf)) + require.NoError(t, json.NewEncoder(&buf).Encode(&shouldIgnore)) + require.NoError(t, userBkt.Upload(ctx, path.Join(shouldIgnore.ID.String(), metadata.DeletionMarkFilename), &buf)) + require.NoError(t, userBkt.Upload(ctx, path.Join(ulid.MustNew(3, nil).String(), metadata.DeletionMarkFilename), bytes.NewBufferString("not a valid deletion-mark.json"))) + + // Create the bucket index if required. + var idx *bucketindex.Index + if bucketIndexEnabled { + var err error + + u := bucketindex.NewUpdater(bkt, userID, nil, logger) + idx, _, err = u.UpdateIndex(ctx, nil) + require.NoError(t, err) + require.NoError(t, bucketindex.WriteIndex(ctx, bkt, userID, nil, idx)) + } + + inputMetas := map[ulid.ULID]*metadata.Meta{ + ulid.MustNew(1, nil): {}, + ulid.MustNew(2, nil): {}, + ulid.MustNew(3, nil): {}, + ulid.MustNew(4, nil): {}, + } + + expectedMetas := map[ulid.ULID]*metadata.Meta{ + ulid.MustNew(1, nil): {}, + ulid.MustNew(3, nil): {}, + ulid.MustNew(4, nil): {}, + } + + expectedDeletionMarks := map[ulid.ULID]*metadata.DeletionMark{ + ulid.MustNew(1, nil): shouldFetch, + ulid.MustNew(2, nil): shouldIgnore, + } + + synced := extprom.NewTxGaugeVec(nil, prometheus.GaugeOpts{Name: "synced"}, []string{"state"}) + f := NewIgnoreDeletionMarkFilter(logger, objstore.WithNoopInstr(userBkt), 48*time.Hour, 32) + + if bucketIndexEnabled { + require.NoError(t, f.FilterWithBucketIndex(ctx, inputMetas, idx, synced)) + } else { + require.NoError(t, f.Filter(ctx, inputMetas, synced)) + } + + assert.Equal(t, 1.0, promtest.ToFloat64(synced.WithLabelValues(block.MarkedForDeletionMeta))) + assert.Equal(t, expectedMetas, inputMetas) + assert.Equal(t, expectedDeletionMarks, f.DeletionMarkBlocks()) +} diff --git a/pkg/storegateway/metadata_fetcher_metrics_test.go b/pkg/storegateway/metadata_fetcher_metrics_test.go new file mode 100644 index 0000000000000..ea028cb07cd3f --- /dev/null +++ b/pkg/storegateway/metadata_fetcher_metrics_test.go @@ -0,0 +1,111 @@ +package storegateway + +import ( + "bytes" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestMetadataFetcherMetrics(t *testing.T) { + mainReg := prometheus.NewPedanticRegistry() + + metrics := NewMetadataFetcherMetrics() + mainReg.MustRegister(metrics) + + metrics.AddUserRegistry("user1", populateMetadataFetcherMetrics(3)) + metrics.AddUserRegistry("user2", populateMetadataFetcherMetrics(5)) + metrics.AddUserRegistry("user3", populateMetadataFetcherMetrics(7)) + + //noinspection ALL + err := testutil.GatherAndCompare(mainReg, bytes.NewBufferString(` + # HELP cortex_blocks_meta_sync_duration_seconds Duration of the blocks metadata synchronization in seconds + # TYPE cortex_blocks_meta_sync_duration_seconds histogram + cortex_blocks_meta_sync_duration_seconds_bucket{le="0.01"} 0 + cortex_blocks_meta_sync_duration_seconds_bucket{le="1"} 0 + cortex_blocks_meta_sync_duration_seconds_bucket{le="10"} 3 + cortex_blocks_meta_sync_duration_seconds_bucket{le="100"} 3 + cortex_blocks_meta_sync_duration_seconds_bucket{le="1000"} 3 + cortex_blocks_meta_sync_duration_seconds_bucket{le="+Inf"} 3 + cortex_blocks_meta_sync_duration_seconds_sum 9 + cortex_blocks_meta_sync_duration_seconds_count 3 + + # HELP cortex_blocks_meta_sync_failures_total Total blocks metadata synchronization failures + # TYPE cortex_blocks_meta_sync_failures_total counter + cortex_blocks_meta_sync_failures_total 30 + + # HELP cortex_blocks_meta_syncs_total Total blocks metadata synchronization attempts + # TYPE cortex_blocks_meta_syncs_total counter + cortex_blocks_meta_syncs_total 15 + + # HELP cortex_blocks_meta_sync_consistency_delay_seconds Configured consistency delay in seconds. + # TYPE cortex_blocks_meta_sync_consistency_delay_seconds gauge + cortex_blocks_meta_sync_consistency_delay_seconds 300 + + # HELP cortex_blocks_meta_synced Reflects current state of synced blocks (over all tenants). + # TYPE cortex_blocks_meta_synced gauge + cortex_blocks_meta_synced{state="corrupted-meta-json"} 75 + cortex_blocks_meta_synced{state="loaded"} 90 + cortex_blocks_meta_synced{state="too-fresh"} 105 +`)) + require.NoError(t, err) +} + +func populateMetadataFetcherMetrics(base float64) *prometheus.Registry { + reg := prometheus.NewRegistry() + m := newMetadataFetcherMetricsMock(reg) + + m.syncs.Add(base * 1) + m.syncFailures.Add(base * 2) + m.syncDuration.Observe(3) + m.syncConsistencyDelay.Set(300) + + m.synced.WithLabelValues("corrupted-meta-json").Set(base * 5) + m.synced.WithLabelValues("loaded").Set(base * 6) + m.synced.WithLabelValues("too-fresh").Set(base * 7) + + return reg +} + +type metadataFetcherMetricsMock struct { + syncs prometheus.Counter + syncFailures prometheus.Counter + syncDuration prometheus.Histogram + syncConsistencyDelay prometheus.Gauge + synced *prometheus.GaugeVec +} + +func newMetadataFetcherMetricsMock(reg prometheus.Registerer) *metadataFetcherMetricsMock { + var m metadataFetcherMetricsMock + + m.syncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Subsystem: "blocks_meta", + Name: "syncs_total", + Help: "Total blocks metadata synchronization attempts", + }) + m.syncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Subsystem: "blocks_meta", + Name: "sync_failures_total", + Help: "Total blocks metadata synchronization failures", + }) + m.syncDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Subsystem: "blocks_meta", + Name: "sync_duration_seconds", + Help: "Duration of the blocks metadata synchronization in seconds", + Buckets: []float64{0.01, 1, 10, 100, 1000}, + }) + m.syncConsistencyDelay = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + Name: "consistency_delay_seconds", + Help: "Configured consistency delay in seconds.", + }) + m.synced = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: "blocks_meta", + Name: "synced", + Help: "Number of block metadata synced", + }, []string{"state"}) + + return &m +} diff --git a/pkg/storegateway/partitioner_test.go b/pkg/storegateway/partitioner_test.go new file mode 100644 index 0000000000000..ab659374973c1 --- /dev/null +++ b/pkg/storegateway/partitioner_test.go @@ -0,0 +1,58 @@ +package storegateway + +import ( + "bytes" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/store" +) + +func TestGapBasedPartitioner_Partition(t *testing.T) { + reg := prometheus.NewPedanticRegistry() + p := newGapBasedPartitioner(10, reg) + + parts := p.Partition(5, func(i int) (uint64, uint64) { + switch i { + case 0: + return 10, 12 + case 1: + return 15, 18 + case 2: + return 22, 27 + case 3: + return 38, 41 + case 4: + return 50, 52 + default: + return 0, 0 + } + }) + + expected := []store.Part{ + {Start: 10, End: 27, ElemRng: [2]int{0, 3}}, + {Start: 38, End: 52, ElemRng: [2]int{3, 5}}, + } + require.Equal(t, expected, parts) + + assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_bucket_store_partitioner_requested_bytes_total Total size of byte ranges required to fetch from the storage before they are passed to the partitioner. + # TYPE cortex_bucket_store_partitioner_requested_bytes_total counter + cortex_bucket_store_partitioner_requested_bytes_total 15 + + # HELP cortex_bucket_store_partitioner_requested_ranges_total Total number of byte ranges required to fetch from the storage before they are passed to the partitioner. + # TYPE cortex_bucket_store_partitioner_requested_ranges_total counter + cortex_bucket_store_partitioner_requested_ranges_total 5 + + # HELP cortex_bucket_store_partitioner_expanded_bytes_total Total size of byte ranges returned by the partitioner after they've been combined together to reduce the number of bucket API calls. + # TYPE cortex_bucket_store_partitioner_expanded_bytes_total counter + cortex_bucket_store_partitioner_expanded_bytes_total 31 + + # HELP cortex_bucket_store_partitioner_expanded_ranges_total Total number of byte ranges returned by the partitioner after they've been combined together to reduce the number of bucket API calls. + # TYPE cortex_bucket_store_partitioner_expanded_ranges_total counter + cortex_bucket_store_partitioner_expanded_ranges_total 2 + `))) +} diff --git a/pkg/storegateway/sharding_strategy_test.go b/pkg/storegateway/sharding_strategy_test.go new file mode 100644 index 0000000000000..9ba549f62752e --- /dev/null +++ b/pkg/storegateway/sharding_strategy_test.go @@ -0,0 +1,670 @@ +package storegateway + +import ( + "context" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/kv/consul" + "github.com/grafana/dskit/ring" + "github.com/grafana/dskit/services" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thanos-io/thanos/pkg/block/metadata" + "github.com/thanos-io/thanos/pkg/extprom" + + cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +func TestDefaultShardingStrategy(t *testing.T) { + // The following block IDs have been picked to have increasing hash values + // in order to simplify the tests. + block1 := ulid.MustNew(1, nil) // hash: 283204220 + block2 := ulid.MustNew(2, nil) // hash: 444110359 + block3 := ulid.MustNew(5, nil) // hash: 2931974232 + block4 := ulid.MustNew(6, nil) // hash: 3092880371 + numAllBlocks := 4 + + block1Hash := cortex_tsdb.HashBlockID(block1) + block2Hash := cortex_tsdb.HashBlockID(block2) + block3Hash := cortex_tsdb.HashBlockID(block3) + block4Hash := cortex_tsdb.HashBlockID(block4) + + registeredAt := time.Now() + + tests := map[string]struct { + replicationFactor int + zoneAwarenessEnabled bool + setupRing func(*ring.Desc) + expectedBlocks map[string][]ulid.ULID + }{ + "one ACTIVE instance in the ring with replication factor = 1": { + replicationFactor: 1, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{0}, ring.ACTIVE, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block2, block3, block4}, + "127.0.0.2": {}, + }, + }, + "two ACTIVE instances in the ring with replication factor = 1": { + replicationFactor: 1, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block3}, + "127.0.0.2": {block2, block4}, + }, + }, + "one ACTIVE instance in the ring with replication factor = 2": { + replicationFactor: 2, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{0}, ring.ACTIVE, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block2, block3, block4}, + "127.0.0.2": {}, + }, + }, + "two ACTIVE instances in the ring with replication factor = 2": { + replicationFactor: 2, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block2, block3, block4}, + "127.0.0.2": {block1, block2, block3, block4}, + }, + }, + "multiple ACTIVE instances in the ring with replication factor = 2": { + replicationFactor: 2, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "", []uint32{block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block3 /* replicated: */, block2, block4}, + "127.0.0.2": {block2 /* replicated: */, block1}, + "127.0.0.3": {block4 /* replicated: */, block3}, + }, + }, + "multiple ACTIVE instances in the ring with replication factor = 2 and zone-awareness enabled": { + replicationFactor: 2, + zoneAwarenessEnabled: true, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "zone-a", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "zone-a", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "zone-b", []uint32{block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block3, block4}, + "127.0.0.2": {block2}, + "127.0.0.3": {block1, block2, block3, block4}, + }, + }, + "one unhealthy instance in the ring with replication factor = 1": { + replicationFactor: 1, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block4Hash + 1}, + } + }, + expectedBlocks: map[string][]ulid.ULID{ + // No shard has the blocks of the unhealthy instance. + "127.0.0.1": {block1, block3}, + "127.0.0.2": {block2}, + "127.0.0.3": {}, + }, + }, + "one unhealthy instance in the ring with replication factor = 2": { + replicationFactor: 2, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block4Hash + 1}, + } + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block3 /* replicated: */, block2, block4}, + "127.0.0.2": {block2 /* replicated: */, block1}, + "127.0.0.3": {}, + }, + }, + "two unhealthy instances in the ring with replication factor = 2": { + replicationFactor: 2, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-2"] = ring.InstanceDesc{ + Addr: "127.0.0.2", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block2Hash + 1, block3Hash + 1}, + } + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block4Hash + 1}, + } + }, + expectedBlocks: map[string][]ulid.ULID{ + // There may be some blocks missing depending if there are shared blocks + // between the two unhealthy nodes. + "127.0.0.1": {block1 /* replicated: */, block4}, + "127.0.0.2": {}, + "127.0.0.3": {}, + }, + }, + "two unhealthy instances in the ring with replication factor = 3": { + replicationFactor: 3, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block3Hash + 1}, + } + + r.Ingesters["instance-4"] = ring.InstanceDesc{ + Addr: "127.0.0.4", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block4Hash + 1}, + } + }, + expectedBlocks: map[string][]ulid.ULID{ + // There may be some blocks missing depending if there are shared blocks + // between the two unhealthy nodes. + "127.0.0.1": {block1 /* replicated: */, block3, block4}, + "127.0.0.2": {block2 /* replicated: */, block1, block4}, + "127.0.0.3": {}, + "127.0.0.4": {}, + }, + }, + "LEAVING instance in the ring should continue to keep its shard blocks but they should also be replicated to another instance": { + replicationFactor: 1, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "", []uint32{block4Hash + 1}, ring.LEAVING, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block3 /* replicated: */, block4}, + "127.0.0.2": {block2}, + "127.0.0.3": {block4}, + }, + }, + "JOINING instance in the ring should get its shard blocks and they should not be replicated to another instance": { + replicationFactor: 1, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "", []uint32{block4Hash + 1}, ring.JOINING, registeredAt) + }, + expectedBlocks: map[string][]ulid.ULID{ + "127.0.0.1": {block1, block3}, + "127.0.0.2": {block2}, + "127.0.0.3": {block4}, + }, + }, + } + + for testName, testData := range tests { + testName := testName + testData := testData + + t.Run(testName, func(t *testing.T) { + t.Parallel() + + ctx := context.Background() + store, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Initialize the ring state. + require.NoError(t, store.CAS(ctx, "test", func(in interface{}) (interface{}, bool, error) { + d := ring.NewDesc() + testData.setupRing(d) + return d, true, nil + })) + + cfg := ring.Config{ + ReplicationFactor: testData.replicationFactor, + HeartbeatTimeout: time.Minute, + ZoneAwarenessEnabled: testData.zoneAwarenessEnabled, + } + + r, err := ring.NewWithStoreClientAndStrategy(cfg, "test", "test", store, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), nil, nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, r)) + defer services.StopAndAwaitTerminated(ctx, r) //nolint:errcheck + + // Wait until the ring client has synced. + require.NoError(t, ring.WaitInstanceState(ctx, r, "instance-1", ring.ACTIVE)) + + for instanceAddr, expectedBlocks := range testData.expectedBlocks { + filter := NewDefaultShardingStrategy(r, instanceAddr, log.NewNopLogger()) + synced := extprom.NewTxGaugeVec(nil, prometheus.GaugeOpts{}, []string{"state"}) + synced.WithLabelValues(shardExcludedMeta).Set(0) + + metas := map[ulid.ULID]*metadata.Meta{ + block1: {}, + block2: {}, + block3: {}, + block4: {}, + } + + err = filter.FilterBlocks(ctx, "user-1", metas, map[ulid.ULID]struct{}{}, synced) + require.NoError(t, err) + + var actualBlocks []ulid.ULID + for id := range metas { + actualBlocks = append(actualBlocks, id) + } + + assert.ElementsMatch(t, expectedBlocks, actualBlocks) + + // Assert on the metric used to keep track of the blocks filtered out. + synced.Submit() + assert.Equal(t, float64(numAllBlocks-len(testData.expectedBlocks[instanceAddr])), testutil.ToFloat64(synced)) + } + }) + } +} + +func TestShuffleShardingStrategy(t *testing.T) { + // The following block IDs have been picked to have increasing hash values + // in order to simplify the tests. + block1 := ulid.MustNew(1, nil) // hash: 283204220 + block2 := ulid.MustNew(2, nil) // hash: 444110359 + block3 := ulid.MustNew(5, nil) // hash: 2931974232 + block4 := ulid.MustNew(6, nil) // hash: 3092880371 + numAllBlocks := 4 + + block1Hash := cortex_tsdb.HashBlockID(block1) + block2Hash := cortex_tsdb.HashBlockID(block2) + block3Hash := cortex_tsdb.HashBlockID(block3) + block4Hash := cortex_tsdb.HashBlockID(block4) + + userID := "user-A" + registeredAt := time.Now() + + type usersExpectation struct { + instanceID string + instanceAddr string + users []string + } + + type blocksExpectation struct { + instanceID string + instanceAddr string + blocks []ulid.ULID + } + + tests := map[string]struct { + replicationFactor int + limits ShardingLimits + setupRing func(*ring.Desc) + expectedUsers []usersExpectation + expectedBlocks []blocksExpectation + }{ + "one ACTIVE instance in the ring with RF = 1 and SS = 1": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 1}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{0}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{}}, + }, + }, + "one ACTIVE instance in the ring with RF = 2 and SS = 1 (should still sync blocks on the only available instance)": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 1}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{0}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{}}, + }, + }, + "one ACTIVE instance in the ring with RF = 2 and SS = 2 (should still sync blocks on the only available instance)": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 2}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{0}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{}}, + }, + }, + "two ACTIVE instances in the ring with RF = 1 and SS = 1 (should sync blocks on 1 instance because of the shard size)": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 1}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{}}, + }, + }, + "two ACTIVE instances in the ring with RF = 1 and SS = 2 (should sync blocks on 2 instances because of the shard size)": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 2}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block3}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{block2, block4}}, + }, + }, + "two ACTIVE instances in the ring with RF = 2 and SS = 1 (should sync blocks on 1 instance because of the shard size)": { + replicationFactor: 2, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 1}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{}}, + }, + }, + "two ACTIVE instances in the ring with RF = 2 and SS = 2 (should sync all blocks on 2 instances)": { + replicationFactor: 2, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 2}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, + }, + "multiple ACTIVE instances in the ring with RF = 2 and SS = 3": { + replicationFactor: 2, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 3}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "", []uint32{block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: []string{userID}}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block3 /* replicated: */, block2, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{block2 /* replicated: */, block1}}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", blocks: []ulid.ULID{block4 /* replicated: */, block3}}, + }, + }, + "one unhealthy instance in the ring with RF = 1 and SS = 3": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 3}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block4Hash + 1}, + } + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: []string{userID}}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + // No shard has the blocks of the unhealthy instance. + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block3}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{block2}}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", blocks: []ulid.ULID{}}, + }, + }, + "one unhealthy instance in the ring with RF = 2 and SS = 3": { + replicationFactor: 2, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 3}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block4Hash + 1}, + } + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: []string{userID}}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block3 /* replicated: */, block2, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{block2 /* replicated: */, block1}}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", blocks: []ulid.ULID{}}, + }, + }, + "one unhealthy instance in the ring with RF = 2 and SS = 2": { + replicationFactor: 2, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 2}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + + r.Ingesters["instance-3"] = ring.InstanceDesc{ + Addr: "127.0.0.3", + Timestamp: time.Now().Add(-time.Hour).Unix(), + State: ring.ACTIVE, + Tokens: []uint32{block3Hash + 1}, + } + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{ /* no blocks because not belonging to the shard */ }}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", blocks: []ulid.ULID{ /* no blocks because unhealthy */ }}, + }, + }, + "LEAVING instance in the ring should continue to keep its shard blocks but they should also be replicated to another instance": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 2}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "", []uint32{block4Hash + 1}, ring.LEAVING, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3 /* replicated: */, block4}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{ /* no blocks because not belonging to the shard */ }}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", blocks: []ulid.ULID{block4}}, + }, + }, + "JOINING instance in the ring should get its shard blocks and they should not be replicated to another instance": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 2}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-3", "127.0.0.3", "", []uint32{block4Hash + 1}, ring.JOINING, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: nil}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block2, block3}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{ /* no blocks because not belonging to the shard */ }}, + {instanceID: "instance-3", instanceAddr: "127.0.0.3", blocks: []ulid.ULID{block4}}, + }, + }, + "SS = 0 disables shuffle sharding": { + replicationFactor: 1, + limits: &shardingLimitsMock{storeGatewayTenantShardSize: 0}, + setupRing: func(r *ring.Desc) { + r.AddIngester("instance-1", "127.0.0.1", "", []uint32{block1Hash + 1, block3Hash + 1}, ring.ACTIVE, registeredAt) + r.AddIngester("instance-2", "127.0.0.2", "", []uint32{block2Hash + 1, block4Hash + 1}, ring.ACTIVE, registeredAt) + }, + expectedUsers: []usersExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", users: []string{userID}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", users: []string{userID}}, + }, + expectedBlocks: []blocksExpectation{ + {instanceID: "instance-1", instanceAddr: "127.0.0.1", blocks: []ulid.ULID{block1, block3}}, + {instanceID: "instance-2", instanceAddr: "127.0.0.2", blocks: []ulid.ULID{block2, block4}}, + }, + }, + } + + for testName, testData := range tests { + testName := testName + testData := testData + + t.Run(testName, func(t *testing.T) { + t.Parallel() + + ctx := context.Background() + store, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + // Initialize the ring state. + require.NoError(t, store.CAS(ctx, "test", func(in interface{}) (interface{}, bool, error) { + d := ring.NewDesc() + testData.setupRing(d) + return d, true, nil + })) + + cfg := ring.Config{ + ReplicationFactor: testData.replicationFactor, + HeartbeatTimeout: time.Minute, + SubringCacheDisabled: true, + } + + r, err := ring.NewWithStoreClientAndStrategy(cfg, "test", "test", store, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), nil, nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, r)) + defer services.StopAndAwaitTerminated(ctx, r) //nolint:errcheck + + // Wait until the ring client has synced. + require.NoError(t, ring.WaitInstanceState(ctx, r, "instance-1", ring.ACTIVE)) + + // Assert on filter users. + for _, expected := range testData.expectedUsers { + filter := NewShuffleShardingStrategy(r, expected.instanceID, expected.instanceAddr, testData.limits, log.NewNopLogger()) + assert.Equal(t, expected.users, filter.FilterUsers(ctx, []string{userID})) + } + + // Assert on filter blocks. + for _, expected := range testData.expectedBlocks { + filter := NewShuffleShardingStrategy(r, expected.instanceID, expected.instanceAddr, testData.limits, log.NewNopLogger()) + synced := extprom.NewTxGaugeVec(nil, prometheus.GaugeOpts{}, []string{"state"}) + synced.WithLabelValues(shardExcludedMeta).Set(0) + + metas := map[ulid.ULID]*metadata.Meta{ + block1: {}, + block2: {}, + block3: {}, + block4: {}, + } + + err = filter.FilterBlocks(ctx, userID, metas, map[ulid.ULID]struct{}{}, synced) + require.NoError(t, err) + + var actualBlocks []ulid.ULID + for id := range metas { + actualBlocks = append(actualBlocks, id) + } + + assert.ElementsMatch(t, expected.blocks, actualBlocks) + + // Assert on the metric used to keep track of the blocks filtered out. + synced.Submit() + assert.Equal(t, float64(numAllBlocks-len(expected.blocks)), testutil.ToFloat64(synced)) + } + }) + } +} + +type shardingLimitsMock struct { + storeGatewayTenantShardSize int +} + +func (m *shardingLimitsMock) StoreGatewayTenantShardSize(_ string) int { + return m.storeGatewayTenantShardSize +}