From 437dee295f78f3f4ab42880103ab03520f59f28b Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 8 Apr 2020 03:10:56 +0200 Subject: [PATCH] [BUGFIX] `DiffRemoteAndLocalState` incorrectly sync changes with remote. When comparing FederationState between local and remote states, the update was comparing the lastIndex, but this index on remote could be lower than max index retrieved and stored from remote, to be exact, I found it could be zero forever in some case (probably a race condition, but was not able to pinpoint it). This fix is not fixing the initial root cause: the ModifiedIndex is set to 0 in remote DC, but it is a useful workaround as it fixes the convergence if the remote server is not behaving as it should. In that case, it would break sync of updates and the remote object would be never synced. To be sure we don't forget anything, I also added a test over `isSame()` to be sure the objects are identical. This was the reason for the test `TestReplication_FederationStates()` to fail randomly. This is also probably the cause for some of the integration tests to fail randomly as well. With this fix, the command: ``` i=0; while /usr/local/bin/go test -timeout 30s github.com/hashicorp/consul/agent/consul -run '^(TestReplication_FederationStates)$'; do go clean -testcache; i=$((i + 1)); printf "$i "; done ``` That used to break on my machine in less than 20 runs is now running 150+ times without any issue. Might also fix #7575 --- agent/consul/federation_state_replication.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/agent/consul/federation_state_replication.go b/agent/consul/federation_state_replication.go index 788b953b5541..f677a964c73b 100644 --- a/agent/consul/federation_state_replication.go +++ b/agent/consul/federation_state_replication.go @@ -75,7 +75,7 @@ func (r *FederationStateReplicator) DiffRemoteAndLocalState(localRaw interface{} for localIdx, remoteIdx = 0, 0; localIdx < len(local) && remoteIdx < len(remote); { if local[localIdx].Datacenter == remote[remoteIdx].Datacenter { // fedState is in both the local and remote state - need to check raft indices - if remote[remoteIdx].ModifyIndex > lastRemoteIndex { + if (remote[remoteIdx].ModifyIndex > lastRemoteIndex) || !remote[remoteIdx].IsSame(local[localIdx]) { updates = append(updates, remote[remoteIdx]) } // increment both indices when equal @@ -171,7 +171,10 @@ func (r *FederationStateReplicator) PerformUpdates(ctx context.Context, updatesR state2 := &dup // Keep track of the raft modify index at the primary - state2.PrimaryModifyIndex = state.ModifyIndex + if state.ModifyIndex != 0 { + // For some reason, ModifyIndex can be zero from remote + state2.PrimaryModifyIndex = state.ModifyIndex + } req := structs.FederationStateRequest{ Op: structs.FederationStateUpsert,