From 39c958eb027c6610bc94e6579deda44450a3fb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Serv=C3=A9n=20Mar=C3=ADn?= Date: Thu, 31 Oct 2019 17:51:39 +0100 Subject: [PATCH] cmd/thanos/compact: add bucket UI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit enhances the compact component so that it runs the bucket UI whenever the --wait flag is also passed. In order to reduce the overhead of running the UI in addition to the compactor, this commit also refactors the compactor and bucket commands a bit in order to re-use a single meta fetcher. Signed-off-by: Lucas Servén Marín --- CHANGELOG.md | 1 + cmd/thanos/bucket.go | 47 +++++------ cmd/thanos/compact.go | 31 ++++++- docs/components/compact.md | 166 ++++++++++++++++++++++--------------- 4 files changed, 151 insertions(+), 94 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1185e4f3d4..9098f88925 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#2265](https://github.com/thanos-io/thanos/pull/2265) Compactor: Add `--wait-interval` to specify compaction wait interval between consecutive compact runs when `--wait` enabled. - [#2250](https://github.com/thanos-io/thanos/pull/2250) Compactor: Enable vertical compaction for offline deduplication (Experimental). Uses `--deduplication.replica-label` flag to specify the replica label to deduplicate on (Hidden). Please note that this uses a NAIVE algorithm for merging (no smart replica deduplication, just chaining samples together). This works well for deduplication of blocks with **precisely the same samples** like produced by Receiver replication. We plan to add a smarter algorithm in the following weeks. +- [#1714](https://github.com/thanos-io/thanos/pull/1714) Run the bucket web UI in the compact component when it is run as a long-lived process. ### Changed diff --git a/cmd/thanos/bucket.go b/cmd/thanos/bucket.go index 2f321ef9ac..1b19fece34 100644 --- a/cmd/thanos/bucket.go +++ b/cmd/thanos/bucket.go @@ -328,8 +328,6 @@ func registerBucketWeb(m map[string]setupFunc, root *kingpin.CmdClause, name str label := cmd.Flag("label", "Prometheus label to use as timeline title").String() m[name+" web"] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { - ctx, cancel := context.WithCancel(context.Background()) - comp := component.Bucket httpProbe := prober.NewHTTP() statusProber := prober.Combine( @@ -365,10 +363,26 @@ func registerBucketWeb(m map[string]setupFunc, root *kingpin.CmdClause, name str level.Warn(logger).Log("msg", "Refresh interval should be at least 2 times the timeout") } + confContentYaml, err := objStoreConfig.Content() + if err != nil { + return err + } + + bkt, err := client.NewBucket(logger, confContentYaml, reg, component.Bucket.String()) + if err != nil { + return errors.Wrap(err, "bucket client") + } + + fetcher, err := block.NewMetaFetcher(logger, fetcherConcurrency, bkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), nil) + if err != nil { + return err + } + + ctx, cancel := context.WithCancel(context.Background()) g.Add(func() error { statusProber.Ready() - - return refresh(ctx, logger, bucketUI, *interval, *timeout, name, reg, objStoreConfig) + defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") + return refresh(ctx, logger, bucketUI, *interval, *timeout, fetcher) }, func(error) { cancel() }) @@ -432,7 +446,6 @@ func registerBucketReplicate(m map[string]setupFunc, root *kingpin.CmdClause, na } func registerBucketDownsample(m map[string]setupFunc, root *kingpin.CmdClause, name string, objStoreConfig *extflag.PathOrContent) { - comp := component.Downsample cmd := root.Command(comp.String(), "continuously downsamples blocks in an object store bucket") @@ -446,30 +459,14 @@ func registerBucketDownsample(m map[string]setupFunc, root *kingpin.CmdClause, n } } -// refresh metadata from remote storage periodically and update UI. -func refresh(ctx context.Context, logger log.Logger, bucketUI *ui.Bucket, duration time.Duration, timeout time.Duration, name string, reg *prometheus.Registry, objStoreConfig *extflag.PathOrContent) error { - confContentYaml, err := objStoreConfig.Content() - if err != nil { - return err - } - - bkt, err := client.NewBucket(logger, confContentYaml, reg, name) - if err != nil { - return errors.Wrap(err, "bucket client") - } - - fetcher, err := block.NewMetaFetcher(logger, fetcherConcurrency, bkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), nil) - if err != nil { - return err - } - - defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") +// refresh metadata from remote storage periodically and update the UI. +func refresh(ctx context.Context, logger log.Logger, bucketUI *ui.Bucket, duration time.Duration, timeout time.Duration, fetcher *block.MetaFetcher) error { return runutil.Repeat(duration, ctx.Done(), func() error { return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error { iterCtx, iterCancel := context.WithTimeout(ctx, timeout) defer iterCancel() - blocks, err := download(iterCtx, logger, bkt, fetcher) + blocks, err := download(iterCtx, logger, fetcher) if err != nil { bucketUI.Set("[]", err) return err @@ -486,7 +483,7 @@ func refresh(ctx context.Context, logger log.Logger, bucketUI *ui.Bucket, durati }) } -func download(ctx context.Context, logger log.Logger, bkt objstore.Bucket, fetcher *block.MetaFetcher) ([]metadata.Meta, error) { +func download(ctx context.Context, logger log.Logger, fetcher *block.MetaFetcher) ([]metadata.Meta, error) { level.Info(logger).Log("msg", "synchronizing block metadata") metas, _, err := fetcher.Fetch(ctx) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 6fe4ddbd18..704b387a77 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -20,6 +20,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/route" "github.com/prometheus/prometheus/tsdb" "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/indexheader" @@ -29,11 +30,13 @@ import ( "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/extflag" "github.com/thanos-io/thanos/pkg/extprom" + extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http" "github.com/thanos-io/thanos/pkg/objstore" "github.com/thanos-io/thanos/pkg/objstore/client" "github.com/thanos-io/thanos/pkg/prober" "github.com/thanos-io/thanos/pkg/runutil" httpserver "github.com/thanos-io/thanos/pkg/server/http" + "github.com/thanos-io/thanos/pkg/ui" "gopkg.in/alecthomas/kingpin.v2" ) @@ -107,7 +110,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { wait := cmd.Flag("wait", "Do not exit after all compactions have been processed and wait for new work."). Short('w').Bool() - waitInterval := cmd.Flag("wait-interval", "Wait interval between consecutive compaction runs. Only works when --wait flag specified."). + waitInterval := cmd.Flag("wait-interval", "Wait interval between consecutive compaction runs and bucket refreshes. Only works when --wait flag specified."). Default("5m").Duration() generateMissingIndexCacheFiles := cmd.Flag("index.generate-missing-cache-file", "If enabled, on startup compactor runs an on-off job that scans all the blocks to find all blocks with missing index cache file. It generates those if needed and upload."). @@ -141,6 +144,15 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { selectorRelabelConf := regSelectorRelabelFlags(cmd) + webExternalPrefix := cmd.Flag("web.external-prefix", "Static prefix for all HTML links and redirect URLs in the bucket web UI interface. Actual endpoints are still served on / or the web.route-prefix. This allows thanos bucket web UI to be served behind a reverse proxy that strips a URL sub-path.").Default("").String() + webPrefixHeaderName := cmd.Flag("web.prefix-header", "Name of HTTP request header used for dynamic prefixing of UI links and redirects. This option is ignored if web.external-prefix argument is set. Security risk: enable this option only if a reverse proxy in front of thanos is resetting the header. The --web.prefix-header=X-Forwarded-Prefix option can be useful, for example, if Thanos UI is served via Traefik reverse proxy with PathPrefixStrip option enabled, which sends the stripped prefix value in X-Forwarded-Prefix header. This allows thanos UI to be served on a sub-path.").Default("").String() + flagsMap := map[string]string{ + "web.external-prefix": *webExternalPrefix, + "web.prefix-header": *webPrefixHeaderName, + } + + label := cmd.Flag("bucket-web-label", "Prometheus label to use as timeline title in the bucket web UI").String() + m[component.Compact.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error { return runCompact(g, logger, reg, *httpAddr, @@ -166,6 +178,8 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application) { *dedupReplicaLabels, selectorRelabelConf, *waitInterval, + *label, + flagsMap, ) } } @@ -193,6 +207,8 @@ func runCompact( dedupReplicaLabels []string, selectorRelabelConf *extflag.PathOrContent, waitInterval time.Duration, + label string, + flagsMap map[string]string, ) error { halted := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ Name: "thanos_compactor_halted", @@ -442,6 +458,19 @@ func runCompact( cancel() }) + if wait { + router := route.New() + bucketUI := ui.NewBucketUI(logger, label, flagsMap) + bucketUI.Register(router, extpromhttp.NewInstrumentationMiddleware(reg)) + srv.Handle("/", router) + + g.Add(func() error { + return refresh(ctx, logger, bucketUI, waitInterval, time.Minute, metaFetcher) + }, func(error) { + cancel() + }) + } + level.Info(logger).Log("msg", "starting compact node") statusProber.Ready() return nil diff --git a/docs/components/compact.md b/docs/components/compact.md index bff5f29931..2fd3d80883 100644 --- a/docs/components/compact.md +++ b/docs/components/compact.md @@ -82,83 +82,113 @@ usage: thanos compact [] continuously compacts blocks in an object store bucket Flags: - -h, --help Show context-sensitive help (also try --help-long - and --help-man). - --version Show application version. - --log.level=info Log filtering level. - --log.format=logfmt Log format to use. Possible options: logfmt or - json. + -h, --help Show context-sensitive help (also try + --help-long and --help-man). + --version Show application version. + --log.level=info Log filtering level. + --log.format=logfmt Log format to use. Possible options: logfmt or + json. --tracing.config-file= - Path to YAML file with tracing configuration. See - format details: - https://thanos.io/tracing.md/#configuration + Path to YAML file with tracing configuration. + See format details: + https://thanos.io/tracing.md/#configuration --tracing.config= - Alternative to 'tracing.config-file' flag (lower - priority). Content of YAML file with tracing - configuration. See format details: - https://thanos.io/tracing.md/#configuration + Alternative to 'tracing.config-file' flag (lower + priority). Content of YAML file with tracing + configuration. See format details: + https://thanos.io/tracing.md/#configuration --http-address="0.0.0.0:10902" - Listen host:port for HTTP endpoints. - --http-grace-period=2m Time to wait after an interrupt received for HTTP - Server. - --data-dir="./data" Data directory in which to cache blocks and - process compactions. + Listen host:port for HTTP endpoints. + --http-grace-period=2m Time to wait after an interrupt received for + HTTP Server. + --data-dir="./data" Data directory in which to cache blocks and + process compactions. --objstore.config-file= - Path to YAML file that contains object store - configuration. See format details: - https://thanos.io/storage.md/#configuration + Path to YAML file that contains object store + configuration. See format details: + https://thanos.io/storage.md/#configuration --objstore.config= - Alternative to 'objstore.config-file' flag (lower - priority). Content of YAML file that contains - object store configuration. See format details: - https://thanos.io/storage.md/#configuration - --consistency-delay=30m Minimum age of fresh (non-compacted) blocks - before they are being processed. Malformed blocks - older than the maximum of consistency-delay and - 48h0m0s will be removed. + Alternative to 'objstore.config-file' flag + (lower priority). Content of YAML file that + contains object store configuration. See format + details: + https://thanos.io/storage.md/#configuration + --consistency-delay=30m Minimum age of fresh (non-compacted) blocks + before they are being processed. Malformed + blocks older than the maximum of + consistency-delay and 48h0m0s will be removed. --retention.resolution-raw=0d - How long to retain raw samples in bucket. Setting - this to 0d will retain samples of this resolution - forever + How long to retain raw samples in bucket. + Setting this to 0d will retain samples of this + resolution forever --retention.resolution-5m=0d - How long to retain samples of resolution 1 (5 - minutes) in bucket. Setting this to 0d will - retain samples of this resolution forever + How long to retain samples of resolution 1 (5 + minutes) in bucket. Setting this to 0d will + retain samples of this resolution forever --retention.resolution-1h=0d - How long to retain samples of resolution 2 (1 - hour) in bucket. Setting this to 0d will retain - samples of this resolution forever - -w, --wait Do not exit after all compactions have been - processed and wait for new work. - --wait-interval=5m Wait interval between consecutive compaction - runs. Only works when --wait flag specified. - --downsampling.disable Disables downsampling. This is not recommended as - querying long time ranges without non-downsampled - data is not efficient and useful e.g it is not - possible to render all samples for a human eye - anyway + How long to retain samples of resolution 2 (1 + hour) in bucket. Setting this to 0d will retain + samples of this resolution forever + -w, --wait Do not exit after all compactions have been + processed and wait for new work. + --wait-interval=5m Wait interval between consecutive compaction + runs and bucket refreshes. Only works when + --wait flag specified. + --downsampling.disable Disables downsampling. This is not recommended + as querying long time ranges without + non-downsampled data is not efficient and useful + e.g it is not possible to render all samples for + a human eye anyway --block-sync-concurrency=20 - Number of goroutines to use when syncing block - metadata from object storage. - --compact.concurrency=1 Number of goroutines to use when compacting - groups. + Number of goroutines to use when syncing block + metadata from object storage. + --compact.concurrency=1 Number of goroutines to use when compacting + groups. + --delete-delay=48h Time before a block marked for deletion is + deleted from bucket. If delete-delay is non + zero, blocks will be marked for deletion and + compactor component will delete blocks marked + for deletion from the bucket. If delete-delay is + 0, blocks will be deleted straight away. Note + that deleting blocks immediately can cause query + failures, if store gateway still has the block + loaded, or compactor is ignoring the deletion + because it's compacting the block at the same + time. --selector.relabel-config-file= - Path to YAML file that contains relabeling - configuration that allows selecting blocks. It - follows native Prometheus relabel-config syntax. - See format details: - https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + Path to YAML file that contains relabeling + configuration that allows selecting blocks. It + follows native Prometheus relabel-config syntax. + See format details: + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config --selector.relabel-config= - Alternative to 'selector.relabel-config-file' - flag (lower priority). Content of YAML file that - contains relabeling configuration that allows - selecting blocks. It follows native Prometheus - relabel-config syntax. See format details: - https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config - --delete-delay=48h Time before a block marked for deletion is deleted from bucket. - If delete-delay is non zero, blocks will be marked for deletion and compactor component will delete blocks marked for deletion from the bucket. - If delete-delay is 0, blocks will be deleted straight away. - Use this if you want to get rid of or move the block immediately. - Note that deleting blocks immediately can cause query failures, if store gateway still has the block - loaded, or compactor is ignoring the deletion because it's compacting the block at the same time. + Alternative to 'selector.relabel-config-file' + flag (lower priority). Content of YAML file that + contains relabeling configuration that allows + selecting blocks. It follows native Prometheus + relabel-config syntax. See format details: + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + --web.external-prefix="" Static prefix for all HTML links and redirect + URLs in the bucket web UI interface. Actual + endpoints are still served on / or the + web.route-prefix. This allows thanos bucket web + UI to be served behind a reverse proxy that + strips a URL sub-path. + --web.prefix-header="" Name of HTTP request header used for dynamic + prefixing of UI links and redirects. This option + is ignored if web.external-prefix argument is + set. Security risk: enable this option only if a + reverse proxy in front of thanos is resetting + the header. The + --web.prefix-header=X-Forwarded-Prefix option + can be useful, for example, if Thanos UI is + served via Traefik reverse proxy with + PathPrefixStrip option enabled, which sends the + stripped prefix value in X-Forwarded-Prefix + header. This allows thanos UI to be served on a + sub-path. + --bucket-web-label=BUCKET-WEB-LABEL + Prometheus label to use as timeline title in the + bucket web UI + ```