From 975d051dc72198a1562f2af20f97417c1c870b4f Mon Sep 17 00:00:00 2001 From: lanmengran1 Date: Thu, 23 Sep 2021 10:37:00 +0800 Subject: [PATCH] extend_replication_when_instance_heartbeat_timeout Signed-off-by: lanmengran1 --- CHANGELOG.md | 1 + pkg/ring/ring.go | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed16d325da..fa89c0d894 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ * [BUGFIX] Querier: fixed panic when querying exemplars and using `-distributor.shard-by-all-labels=false`. #4473 * [BUGFIX] Querier: honor querier minT,maxT if `nil` SelectHints are passed to Select(). #4413 * [BUGFIX] Compactor: fixed panic while collecting Prometheus metrics. #4483 +* [BUGFIX] Ring: extend replication when instance heartbeat timeoutgst. #4493 ## 1.10.0 / 2021-08-03 diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 01ef9d24fe..0ee5db417c 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -375,10 +375,15 @@ func (r *Ring) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts, distinctHosts = append(distinctHosts, info.InstanceID) instance := r.ringDesc.Ingesters[info.InstanceID] - + state := instance.State + // Heartbeat unhealthy instance also needs to extend replication. + // Since the ring will not update instance state automatically, changing the state to LEFT instead. + if !instance.IsHeartbeatHealthy(r.cfg.HeartbeatTimeout, time.Now()) { + state = LEFT + } // Check whether the replica set should be extended given we're including // this instance. - if op.ShouldExtendReplicaSetOnState(instance.State) { + if op.ShouldExtendReplicaSetOnState(state) { n++ } else if r.cfg.ZoneAwarenessEnabled && info.Zone != "" { // We should only add the zone if we are not going to extend,