From 70082c936da8b08fdbd70255bef820c960941383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Mon, 19 Feb 2024 19:13:52 +0200 Subject: [PATCH] cache: attach object storage hash to iter key (#6880) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attach object storage hash to the iter key so that it would be possible to reuse the same cache storage e.g. Redis for different buckets. Without this, the results are funny to say the least if you accidentally attempt to do that. Thus, let's add the hash to reduce the possibility of an accident for our users. Signed-off-by: Giedrius Statkevičius --- pkg/cache/caching_bucket_config.go | 8 ++++--- pkg/store/cache/cachekey/cachekey.go | 26 +++++++++++++++++------ pkg/store/cache/cachekey/cachekey_test.go | 26 ++++++++++++++++++++--- pkg/store/cache/caching_bucket.go | 2 +- pkg/store/cache/caching_bucket_factory.go | 6 +++++- pkg/store/cache/caching_bucket_test.go | 2 +- 6 files changed, 55 insertions(+), 15 deletions(-) diff --git a/pkg/cache/caching_bucket_config.go b/pkg/cache/caching_bucket_config.go index e17b2f27b1c..0d60a00f751 100644 --- a/pkg/cache/caching_bucket_config.go +++ b/pkg/cache/caching_bucket_config.go @@ -64,8 +64,9 @@ type OperationConfig struct { // Operation-specific configs. type IterConfig struct { OperationConfig - TTL time.Duration - Codec IterCodec + TTL time.Duration + Codec IterCodec + ConfigHash string } type ExistsConfig struct { @@ -105,11 +106,12 @@ func newOperationConfig(cache Cache, matcher func(string) bool) OperationConfig } // CacheIter configures caching of "Iter" operation for matching directories. -func (cfg *CachingBucketConfig) CacheIter(configName string, cache Cache, matcher func(string) bool, ttl time.Duration, codec IterCodec) { +func (cfg *CachingBucketConfig) CacheIter(configName string, cache Cache, matcher func(string) bool, ttl time.Duration, codec IterCodec, configHash string) { cfg.iter[configName] = &IterConfig{ OperationConfig: newOperationConfig(cache, matcher), TTL: ttl, Codec: codec, + ConfigHash: configHash, } } diff --git a/pkg/store/cache/cachekey/cachekey.go b/pkg/store/cache/cachekey/cachekey.go index 0393dbdb028..6128b67c788 100644 --- a/pkg/store/cache/cachekey/cachekey.go +++ b/pkg/store/cache/cachekey/cachekey.go @@ -29,15 +29,21 @@ const ( ) type BucketCacheKey struct { - Verb VerbType - Name string - Start int64 - End int64 + Verb VerbType + Name string + Start int64 + End int64 + ObjectStorageConfigHash string } // String returns the string representation of BucketCacheKey. func (ck BucketCacheKey) String() string { if ck.Start == 0 && ck.End == 0 { + // Let's add object storage configuration hash to the iter verbs + // so that it would be possible to re-use the same cache storage. + if ck.Verb == IterVerb || ck.Verb == IterRecursiveVerb { + return string(ck.Verb) + ":" + ck.Name + ":" + ck.ObjectStorageConfigHash + } return string(ck.Verb) + ":" + ck.Name } @@ -72,7 +78,8 @@ func ParseBucketCacheKey(key string) (BucketCacheKey, error) { return BucketCacheKey{}, ErrInvalidBucketCacheKeyVerb } - if verb == SubrangeVerb { + switch verb { + case SubrangeVerb: if len(slice) != 4 { return BucketCacheKey{}, ErrInvalidBucketCacheKeyFormat } @@ -89,7 +96,14 @@ func ParseBucketCacheKey(key string) (BucketCacheKey, error) { ck.Start = start ck.End = end - } else { + case IterRecursiveVerb, IterVerb: + if len(slice) == 3 { + ck.ObjectStorageConfigHash = slice[2] + } + if len(slice) > 3 { + return BucketCacheKey{}, ErrInvalidBucketCacheKeyFormat + } + default: if len(slice) != 2 { return BucketCacheKey{}, ErrInvalidBucketCacheKeyFormat } diff --git a/pkg/store/cache/cachekey/cachekey_test.go b/pkg/store/cache/cachekey/cachekey_test.go index a63287d13a0..1757bfaee54 100644 --- a/pkg/store/cache/cachekey/cachekey_test.go +++ b/pkg/store/cache/cachekey/cachekey_test.go @@ -71,6 +71,24 @@ func TestParseBucketCacheKey(t *testing.T) { expected: BucketCacheKey{}, expectedErr: ErrInvalidBucketCacheKeyFormat, }, + // Iter could have object storage hash attached to it. + { + key: "iter::asdasdsa", + expected: BucketCacheKey{ + Verb: IterVerb, + Name: "", + ObjectStorageConfigHash: "asdasdsa", + }, + }, + // Iter recursive could have object storage hash attached to it. + { + key: "iter-recursive:foo/:asdasdsa", + expected: BucketCacheKey{ + Verb: IterRecursiveVerb, + Name: "foo/", + ObjectStorageConfigHash: "asdasdsa", + }, + }, // Key must always have a name. { key: "iter", @@ -116,8 +134,10 @@ func TestParseBucketCacheKey(t *testing.T) { } for _, tc := range testcases { - res, err := ParseBucketCacheKey(tc.key) - testutil.Equals(t, tc.expectedErr, err) - testutil.Equals(t, tc.expected, res) + t.Run(tc.key, func(t *testing.T) { + res, err := ParseBucketCacheKey(tc.key) + testutil.Equals(t, tc.expectedErr, err) + testutil.Equals(t, tc.expected, res) + }) } } diff --git a/pkg/store/cache/caching_bucket.go b/pkg/store/cache/caching_bucket.go index 796036a9a29..11faa03d6a6 100644 --- a/pkg/store/cache/caching_bucket.go +++ b/pkg/store/cache/caching_bucket.go @@ -133,7 +133,7 @@ func (cb *CachingBucket) Iter(ctx context.Context, dir string, f func(string) er cb.operationRequests.WithLabelValues(objstore.OpIter, cfgName).Inc() - iterVerb := cachekey.BucketCacheKey{Verb: cachekey.IterVerb, Name: dir} + iterVerb := cachekey.BucketCacheKey{Verb: cachekey.IterVerb, Name: dir, ObjectStorageConfigHash: cfg.ConfigHash} opts := objstore.ApplyIterOptions(options...) if opts.Recursive { iterVerb.Verb = cachekey.IterRecursiveVerb diff --git a/pkg/store/cache/caching_bucket_factory.go b/pkg/store/cache/caching_bucket_factory.go index 28bbf79dcf9..18abc90af39 100644 --- a/pkg/store/cache/caching_bucket_factory.go +++ b/pkg/store/cache/caching_bucket_factory.go @@ -4,10 +4,12 @@ package storecache import ( + "fmt" "regexp" "strings" "time" + "github.com/cespare/xxhash/v2" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/pkg/errors" @@ -82,6 +84,8 @@ func NewCachingBucketFromYaml(yamlContent []byte, bucket objstore.Bucket, logger return nil, errors.Wrap(err, "parsing config YAML file") } + cfgHash := string(fmt.Sprintf("%d", xxhash.Sum64(yamlContent))) + backendConfig, err := yaml.Marshal(config.BackendConfig) if err != nil { return nil, errors.Wrap(err, "marshal content of cache backend configuration") @@ -97,7 +101,7 @@ func NewCachingBucketFromYaml(yamlContent []byte, bucket objstore.Bucket, logger cfg.CacheGet("meta.jsons", nil, isMetaFile, int(config.MetafileMaxSize), config.MetafileContentTTL, config.MetafileExistsTTL, config.MetafileDoesntExistTTL) // Cache Iter requests for root. - cfg.CacheIter("blocks-iter", nil, isBlocksRootDir, config.BlocksIterTTL, JSONIterCodec{}) + cfg.CacheIter("blocks-iter", nil, isBlocksRootDir, config.BlocksIterTTL, JSONIterCodec{}, cfgHash) switch strings.ToUpper(string(config.Type)) { case string(MemcachedBucketCacheProvider): diff --git a/pkg/store/cache/caching_bucket_test.go b/pkg/store/cache/caching_bucket_test.go index 8644b84cd85..2d23289cb4a 100644 --- a/pkg/store/cache/caching_bucket_test.go +++ b/pkg/store/cache/caching_bucket_test.go @@ -400,7 +400,7 @@ func TestCachedIter(t *testing.T) { const cfgName = "dirs" cfg := thanoscache.NewCachingBucketConfig() - cfg.CacheIter(cfgName, cache, func(string) bool { return true }, 5*time.Minute, JSONIterCodec{}) + cfg.CacheIter(cfgName, cache, func(string) bool { return true }, 5*time.Minute, JSONIterCodec{}, "") cb, err := NewCachingBucket(inmem, cfg, nil, nil) testutil.Ok(t, err)