diff --git a/CHANGELOG.md b/CHANGELOG.md index ebb383375c..b101365484 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,62 +2,63 @@ ## master / unreleased + +## 0.6.0 / 2020-01-28 + Note that the ruler flags need to be changed in this upgrade. You're moving from a single node ruler to something that might need to be sharded. -If you are running with a high `-ruler.num-workers` and if you're not able to execute all your rules in `-ruler.evaluation-interval`, then you'll need to shard. -Further, if you're using the configs service, we've upgraded the migration library and this requires some manual intervention. See full -instructions below to upgrade your Postgres. +Further, if you're using the configs service, we've upgraded the migration library and this requires some manual intervention. See full instructions below to upgrade your PostgreSQL. -* [CHANGE] Remove unnecessary configs/flags from the ruler ring config to align with the pattern used in the distributor ring. #1987 - * Ruler ring related flags are now all prefixed with `ruler.ring.` as opposed to just `ruler.` - * Changed the default value for `-ruler.ring.prefix` from `collectors/` to `rulers/` in order to not clash with other keys (ie. ring) stored in the same key-value store. * [CHANGE] The frontend component now does not cache results if it finds a `Cache-Control` header and if one of its values is `no-store`. #1974 * [CHANGE] Flags changed with transition to upstream Prometheus rules manager: - * `ruler.client-timeout` is now `ruler.configs.client-timeout` in order to match `ruler.configs.url` - * `ruler.group-timeout`has been removed - * `ruler.num-workers` has been removed - * `ruler.rule-path` has been added to specify where the prometheus rule manager will sync rule files - * `ruler.storage.type` has beem added to specify the rule store backend type, currently only the configdb. - * `ruler.poll-interval` has been added to specify the interval in which to poll new rule groups. + * `-ruler.client-timeout` is now `ruler.configs.client-timeout` in order to match `ruler.configs.url`. + * `-ruler.group-timeout`has been removed. + * `-ruler.num-workers` has been removed. + * `-ruler.rule-path` has been added to specify where the prometheus rule manager will sync rule files. + * `-ruler.storage.type` has beem added to specify the rule store backend type, currently only the configdb. + * `-ruler.poll-interval` has been added to specify the interval in which to poll new rule groups. + * `-ruler.evaluation-interval` default value has changed from `15s` to `1m` to match the default evaluation interval in Prometheus. + * Ruler sharding requires a ring which can be configured via the ring flags prefixed by `ruler.ring.`. #1987 * [CHANGE] Use relative links from /ring page to make it work when used behind reverse proxy. #1896 * [CHANGE] Deprecated `-distributor.limiter-reload-period` flag. #1766 * [CHANGE] Ingesters now write only normalised tokens to the ring, although they can still read denormalised tokens used by other ingesters. `-ingester.normalise-tokens` is now deprecated, and ignored. If you want to switch back to using denormalised tokens, you need to downgrade to Cortex 0.4.0. Previous versions don't handle claiming tokens from normalised ingesters correctly. #1809 * [CHANGE] Overrides mechanism has been renamed to "runtime config", and is now separate from limits. Runtime config is simply a file that is reloaded by Cortex every couple of seconds. Limits and now also multi KV use this mechanism.
New arguments were introduced: `-runtime-config.file` (defaults to empty) and `-runtime-config.reload-period` (defaults to 10 seconds), which replace previously used `-limits.per-user-override-config` and `-limits.per-user-override-period` options. Old options are still used if `-runtime-config.file` is not specified. This change is also reflected in YAML configuration, where old `limits.per_tenant_override_config` and `limits.per_tenant_override_period` fields are replaced with `runtime_config.file` and `runtime_config.period` respectively. #1749 * [CHANGE] Cortex now rejects data with duplicate labels. Previously, such data was accepted, with duplicate labels removed with only one value left. #1964 * [CHANGE] Changed the default value for `-distributor.ha-tracker.prefix` from `collectors/` to `ha-tracker/` in order to not clash with other keys (ie. ring) stored in the same key-value store. #1940 +* [FEATURE] Write-Ahead-Log added in ingesters for more data reliability against ingester crashes. #1103 + * `--ingester.wal-enabled`: Setting this to `true` enables writing to WAL during ingestion. + * `--ingester.wal-dir`: Directory where the WAL data should be stored and/or recovered from. + * `--ingester.checkpoint-enabled`: Set this to `true` to enable checkpointing of in-memory chunks to disk. + * `--ingester.checkpoint-duration`: This is the interval at which checkpoints should be created. + * `--ingester.recover-from-wal`: Set this to `true` to recover data from an existing WAL. + * For more information, please checkout the ["Ingesters with WAL" guide](https://cortexmetrics.io/docs/guides/ingesters-with-wal/). * [FEATURE] The distributor can now drop labels from samples (similar to the removal of the replica label for HA ingestion) per user via the `distributor.drop-label` flag. #1726 * [FEATURE] Added flag `debug.mutex-profile-fraction` to enable mutex profiling #1969 * [FEATURE] Added `global` ingestion rate limiter strategy. Deprecated `-distributor.limiter-reload-period` flag. #1766 * [FEATURE] Added support for Microsoft Azure blob storage to be used for storing chunk data. #1913 * [FEATURE] Added readiness probe endpoint`/ready` to queriers. #1934 -* [FEATURE] EXPERIMENTAL: Added `/series` API endpoint support with TSDB blocks storage. #1830 * [FEATURE] Added "multi" KV store that can interact with two other KV stores, primary one for all reads and writes, and secondary one, which only receives writes. Primary/secondary store can be modified in runtime via runtime-config mechanism (previously "overrides"). #1749 -* [FEATURE] EXPERIMENTAL: Added TSDB blocks `compactor` component, which iterates over users blocks stored in the bucket and compact them according to the configured block ranges. #1942 +* [FEATURE] Added support to store ring tokens to a file and read it back on startup, instead of generating/fetching the tokens to/from the ring. This feature can be enabled with the flag `-ingester.tokens-file-path`. #1750 +* [FEATURE] Experimental TSDB: Added `/series` API endpoint support with TSDB blocks storage. #1830 +* [FEATURE] Experimental TSDB: Added TSDB blocks `compactor` component, which iterates over users blocks stored in the bucket and compact them according to the configured block ranges. #1942 * [ENHANCEMENT] metric `cortex_ingester_flush_reasons` gets a new `reason` value: `Spread`, when `-ingester.spread-flushes` option is enabled. #1978 * [ENHANCEMENT] Added `password` and `enable_tls` options to redis cache configuration. Enables usage of Microsoft Azure Cache for Redis service. #1923 -* [ENHANCEMENT] Experimental TSDB: Open existing TSDB on startup to prevent ingester from becoming ready before it can accept writes. #1917 - * `--experimental.tsdb.max-tsdb-opening-concurrency-on-startup` -* [ENHANCEMENT] Experimental TSDB: Added `cortex_ingester_shipper_dir_syncs_total`, `cortex_ingester_shipper_dir_sync_failures_total`, `cortex_ingester_shipper_uploads_total` and `cortex_ingester_shipper_upload_failures_total` metrics from TSDB shipper component. #1983 +* [ENHANCEMENT] Upgraded Kubernetes API version for deployments from `extensions/v1beta1` to `apps/v1`. #1941 +* [ENHANCEMENT] Experimental TSDB: Open existing TSDB on startup to prevent ingester from becoming ready before it can accept writes. The max concurrency is set via `--experimental.tsdb.max-tsdb-opening-concurrency-on-startup`. #1917 * [ENHANCEMENT] Experimental TSDB: Querier now exports aggregate metrics from Thanos bucket store and in memory index cache (many metrics to list, but all have `cortex_querier_bucket_store_` or `cortex_querier_blocks_index_cache_` prefix). #1996 * [ENHANCEMENT] Experimental TSDB: Improved multi-tenant bucket store. #1991 * Allowed to configure the blocks sync interval via `-experimental.tsdb.bucket-store.sync-interval` (0 disables the sync) * Limited the number of tenants concurrently synched by `-experimental.tsdb.bucket-store.block-sync-concurrency` * Renamed `cortex_querier_sync_seconds` metric to `cortex_querier_blocks_sync_seconds` * Track `cortex_querier_blocks_sync_seconds` metric for the initial sync too - * Fixed race condition * [BUGFIX] Fixed unnecessary CAS operations done by the HA tracker when the jitter is enabled. #1861 -* [BUGFIX] Fixed #1904 ingesters getting stuck in a LEAVING state after coming up from an ungraceful exit. #1921 +* [BUGFIX] Fixed ingesters getting stuck in a LEAVING state after coming up from an ungraceful exit. #1921 * [BUGFIX] Reduce memory usage when ingester Push() errors. #1922 -* [BUGFIX] TSDB: Fixed handling of out of order/bound samples in ingesters with the experimental TSDB blocks storage. #1864 -* [BUGFIX] TSDB: Fixed querying ingesters in `LEAVING` state with the experimental TSDB blocks storage. #1854 -* [BUGFIX] TSDB: Fixed error handling in the series to chunks conversion with the experimental TSDB blocks storage. #1837 -* [BUGFIX] TSDB: Fixed TSDB creation conflict with blocks transfer in a `JOINING` ingester with the experimental TSDB blocks storage. #1818 -* [BUGFIX] TSDB: `experimental.tsdb.ship-interval` of <=0 treated as disabled instead of allowing panic. #1975 -* [BUGFIX] TSDB: Fixed `cortex_ingester_queried_samples` and `cortex_ingester_queried_series` metrics when using block storage. #1981 -* [BUGFIX] TSDB: Fixed `cortex_ingester_memory_series` and `cortex_ingester_memory_users` metrics when using with the experimental TSDB blocks storage. #1982 -* [BUGFIX] TSDB: Fixed `cortex_ingester_memory_series_created_total` and `cortex_ingester_memory_series_removed_total` metrics when using TSDB blocks storage. #1990 * [BUGFIX] Table Manager: Fixed calculation of expected tables and creation of tables from next active schema considering grace period. #1976 +* [BUGFIX] Experimental TSDB: Fixed ingesters consistency during hand-over when using experimental TSDB blocks storage. #1854 #1818 +* [BUGFIX] Experimental TSDB: Fixed metrics when using experimental TSDB blocks storage. #1981 #1982 #1990 #1983 +* [BUGFIX] Experimental memberlist: Use the advertised address when sending packets to other peers of the Gossip memberlist. #1857 -### Upgrading Postgres (if you're using configs service) +### Upgrading PostgreSQL (if you're using configs service) Reference: https://github.com/golang-migrate/migrate/tree/master/database/postgres#upgrading-from-v1 @@ -69,6 +70,11 @@ Reference: https://github.com/golang-migrate/migrate/tree/master/database/postgr migrate -path /cmd/cortex/migrations -database postgres://localhost:5432/database force 2 ``` +### Known issues + +- The `cortex_prometheus_rule_group_last_evaluation_timestamp_seconds` metric, tracked by the ruler, is not unregistered for rule groups not being used anymore. This issue will be fixed in the next Cortex release (see [2033](https://github.com/cortexproject/cortex/issues/2033)). + + ## 0.4.0 / 2019-12-02 * [CHANGE] The frontend component has been refactored to be easier to re-use. When upgrading the frontend, cache entries will be discarded and re-created with the new protobuf schema. #1734 diff --git a/RELEASE.md b/RELEASE.md index 7d679d52e0..8682af85a2 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -12,8 +12,8 @@ Our goal is to provide a new minor release every 4 weeks. This is a new process | v0.2.0 | 2019-08-28 | Goutham Veeramachaneni (Github: @gouthamve) | | v0.3.0 | 2019-10-09 | Bryan Boreham (@bboreham) | | v0.4.0 | 2019-11-13 | Tom Wilkie (@tomwilkie) | -| v0.5.0 | 2020-01-08 | _Abandoned_ | -| v0.6.0 | 2020-01-20 | **searching for a volunteer** | +| v0.5.0 | 2020-01-08 | _Abandoned_ | +| v0.6.0 | 2020-01-22 | Marco Pracucci (@pracucci) | ## Release shepherd responsibilities @@ -59,6 +59,8 @@ Entries in the `CHANGELOG.md` are meant to be in this order: * `[ENHANCEMENT]` * `[BUGFIX]` +To quickly look for the list of PR missing a reference in the `CHANGELOG.md` you can run `./tools/release/check-changelog.sh LAST-RELEASE-TAG...master`. + ### Draft the new release Tag the new release with a tag named `v..`, e.g. `v0.1.3`. Note the `v` prefix. diff --git a/VERSION b/VERSION index 1d0ba9ea18..a918a2aa18 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.4.0 +0.6.0 diff --git a/pkg/ingester/flush.go b/pkg/ingester/flush.go index 49b229229e..3b73e1ffa7 100644 --- a/pkg/ingester/flush.go +++ b/pkg/ingester/flush.go @@ -193,6 +193,9 @@ func (i *Ingester) sweepSeries(userID string, fp model.Fingerprint, series *memo } func (i *Ingester) shouldFlushSeries(series *memorySeries, fp model.Fingerprint, immediate bool) flushReason { + if len(series.chunkDescs) == 0 { + return noFlush + } if immediate { return reasonImmediate } @@ -203,12 +206,9 @@ func (i *Ingester) shouldFlushSeries(series *memorySeries, fp model.Fingerprint, return series.chunkDescs[0].flushReason } return reasonMultipleChunksInSeries - } else if len(series.chunkDescs) > 0 { - // Otherwise look in more detail at the first chunk - return i.shouldFlushChunk(series.chunkDescs[0], fp, series.isStale()) } - - return noFlush + // Otherwise look in more detail at the first chunk + return i.shouldFlushChunk(series.chunkDescs[0], fp, series.isStale()) } func (i *Ingester) shouldFlushChunk(c *desc, fp model.Fingerprint, lastValueIsStale bool) flushReason { @@ -290,11 +290,14 @@ func (i *Ingester) flushUserSeries(flushQueueIndex int, userID string, fp model. return nil } - // Assume we're going to flush everything, and maybe don't flush the head chunk if it doesn't need it. + // shouldFlushSeries() has told us we have at least one chunk chunks := series.chunkDescs - if immediate || (len(chunks) > 0 && i.shouldFlushChunk(series.head(), fp, series.isStale()) != noFlush) { + if immediate { series.closeHead(reasonImmediate) + } else if chunkReason := i.shouldFlushChunk(series.head(), fp, series.isStale()); chunkReason != noFlush { + series.closeHead(chunkReason) } else { + // The head chunk doesn't need flushing; step back by one. chunks = chunks[:len(chunks)-1] } diff --git a/tools/release/check-changelog.sh b/tools/release/check-changelog.sh new file mode 100755 index 0000000000..6ba9c30e18 --- /dev/null +++ b/tools/release/check-changelog.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Expect as input parameter the commits range to analyze. +if [ $# -ne 1 ]; then + echo "Usage: $0 range" + echo "" + echo " range The commit range to compare as documented at:" + echo " https://git-scm.com/docs/gitrevisions" + echo "" + echo "Example:" + echo " $0 v0.4.0...master" + echo "" + exit 1 +fi + +# Find all merged PRs. +GIT_LOG=$(git log --pretty=format:"%s" $1) +PR_LIST=$(echo "$GIT_LOG" | grep -Eo '#[0-9]+') +PR_LIST_COUNT=$(echo "$PR_LIST" | wc -l | grep -Eo '[0-9]+') +PR_AUTHORS_COUNT=$(git log --pretty=format:"%an" $1 | sort | uniq -i | wc -l | grep -Eo '[0-9]+') +echo "Found ${PR_LIST_COUNT} PRs from ${PR_AUTHORS_COUNT} authors." +echo "" + +# For each PR check if it's mentioned in the changelog. +echo "List of missing PR in the CHANGELOG.md:" +for PR in $PR_LIST; do + grep -q "$PR" CHANGELOG.md + if [ $? -eq 0 ]; then + continue + fi + + # Print 1 line for the missing PR + echo -n "- ${PR}: " + echo "$GIT_LOG" | grep "$PR" +done