diff --git a/src/core/Akka.Cluster.Tests/SBR/SplitBrainResolverSpec.cs b/src/core/Akka.Cluster.Tests/SBR/SplitBrainResolverSpec.cs index b49c73fce8e..e5d7cb67cdb 100644 --- a/src/core/Akka.Cluster.Tests/SBR/SplitBrainResolverSpec.cs +++ b/src/core/Akka.Cluster.Tests/SBR/SplitBrainResolverSpec.cs @@ -1231,6 +1231,25 @@ public void LeaseMajority_must_down_indirectly_connected_when_combined_with_clea strategy3.NodesToDown(reverseDecision3).Should().BeEquivalentTo(new[] { MemberB, MemberC, MemberD, MemberE }.Select(m => m.UniqueAddress)); } + [Fact] + public void LeaseMajority_must_down_indirectly_connected_when_combined_with_clean_partition_A_B_C_D__E_F___A_B_C_D() + { + var setup = new LeaseMajoritySetup(this); + var memberELeaving = Leaving(MemberE); + var memberFDown = Downed(MemberF); + setup.Side1 = ImmutableHashSet.Create(MemberA, MemberB, MemberC, MemberD); + setup.Side2 = ImmutableHashSet.Create(memberELeaving, memberFDown); + + // trouble when indirectly connected happens before clean partition + setup.IndirectlyConnected = ImmutableHashSet.Create((memberELeaving, memberFDown)); + + // from side1 of the partition, majority + setup.AssertDowningSide(setup.Side1, new[] { memberELeaving }); + + // from side2 of the partition, minority + setup.AssertDowningSide(setup.Side2, new[] { MemberA, MemberB, MemberC, MemberD, memberELeaving }); + } + [Fact] public void Strategy_must_add_and_remove_members_with_default_Member_ordering() { @@ -1805,6 +1824,58 @@ public void Split_Brain_Resolver_must_down_indirectly_connected_when_combined_wi setup.Stop(); } + [Fact] + public void Split_Brain_Resolver_must_down_indirectly_connected_when_combined_with_partition_and_exiting_A_B_C_D__E_Fexiting___A_B_C_D() + { + var setup = new SetupKeepMajority(this, TimeSpan.Zero, MemberA.UniqueAddress, null); + setup.MemberUp(MemberA, MemberB, MemberC, MemberD, MemberE, MemberF); + var memberFExiting = Exiting(MemberF); + setup.A.Tell(new ClusterEvent.MemberExited(memberFExiting)); + setup.Leader(MemberA); + // indirectly connected: memberF + // partition: memberA, memberB, memberC, memberD | memberE, memberF + setup.ReachabilityChanged( + (MemberA, MemberE), + (MemberA, memberFExiting), + (MemberB, MemberE), + (MemberB, memberFExiting), + (MemberC, MemberE), + (MemberC, memberFExiting), + (MemberD, MemberE), + (MemberD, memberFExiting), + (MemberE, memberFExiting)); + setup.Tick(); + // keep fully connected members + setup.ExpectDownCalled(MemberE); + setup.Stop(); + } + + [Fact] + public void Split_Brain_Resolver_must_down_indirectly_connected_when_combined_with_partition_and_exiting_A_B_C_D__Eexiting_F___A_B_C_D() + { + var setup = new SetupKeepMajority(this, TimeSpan.Zero, MemberA.UniqueAddress, null); + setup.MemberUp(MemberA, MemberB, MemberC, MemberD, MemberE, MemberF); + var memberEExiting = Exiting(MemberE); + setup.A.Tell(new ClusterEvent.MemberExited(memberEExiting)); + setup.Leader(MemberA); + // indirectly connected: memberF + // partition: memberA, memberB, memberC, memberD | memberE, memberF + setup.ReachabilityChanged( + (MemberA, memberEExiting), + (MemberA, MemberF), + (MemberB, memberEExiting), + (MemberB, MemberF), + (MemberC, memberEExiting), + (MemberC, MemberF), + (MemberD, memberEExiting), + (MemberD, MemberF), + (MemberE, MemberF)); + setup.Tick(); + // keep fully connected members + setup.ExpectDownCalled(MemberF); + setup.Stop(); + } + [Fact] public void Split_Brain_Resolver_must_down_all_in_self_data_centers() { diff --git a/src/core/Akka.Cluster/SBR/DowningStrategy.cs b/src/core/Akka.Cluster/SBR/DowningStrategy.cs index 2509e328e8a..556a52e5ea5 100644 --- a/src/core/Akka.Cluster/SBR/DowningStrategy.cs +++ b/src/core/Akka.Cluster/SBR/DowningStrategy.cs @@ -219,41 +219,40 @@ private ImmutableHashSet IndirectlyConnectedFromIntersectionOfObs public ImmutableHashSet UnreachableButNotIndirectlyConnected => Unreachable.Except(IndirectlyConnected); - private ImmutableHashSet AdditionalNodesToDownWhenIndirectlyConnected + private ImmutableHashSet AdditionalNodesToDownWhenIndirectlyConnected(ImmutableHashSet downable) { - get + if (UnreachableButNotIndirectlyConnected.IsEmpty) return ImmutableHashSet.Empty; + + var originalUnreachable = Unreachable; + var originalReachability = Reachability; + try + { + var intersectionOfObserversAndSubjects = IndirectlyConnectedFromIntersectionOfObserversAndSubjects; + var haveSeenCurrentGossip = IndirectlyConnectedFromSeenCurrentGossip; + Reachability = Reachability.FilterRecords( + r => + // we only retain records for addresses that are still downable + downable.Contains(r.Observer) && downable.Contains(r.Subject) && + // remove records between the indirectly connected + !(intersectionOfObserversAndSubjects.Contains(r.Observer) && + intersectionOfObserversAndSubjects.Contains(r.Subject) || + haveSeenCurrentGossip.Contains(r.Observer) && haveSeenCurrentGossip.Contains(r.Subject))); + Unreachable = Reachability.AllUnreachableOrTerminated; + var additionalDecision = Decide(); + + if (additionalDecision.IsIndirectlyConnected) + throw new InvalidOperationException( + $"SBR double {additionalDecision} decision, downing all instead. " + + $"originalReachability: [{originalReachability}], filtered reachability [{Reachability}], " + + $"still indirectlyConnected: [{string.Join(", ", IndirectlyConnected)}], seenBy: [{string.Join(", ", SeenBy)}]" + ); + + return NodesToDown(additionalDecision); + } + finally { - if (UnreachableButNotIndirectlyConnected.IsEmpty) return ImmutableHashSet.Empty; - - var originalUnreachable = Unreachable; - var originalReachability = Reachability; - try - { - var intersectionOfObserversAndSubjects = IndirectlyConnectedFromIntersectionOfObserversAndSubjects; - var haveSeenCurrentGossip = IndirectlyConnectedFromSeenCurrentGossip; - // remove records between the indirectly connected - Reachability = Reachability.FilterRecords( - r => - !(intersectionOfObserversAndSubjects.Contains(r.Observer) && - intersectionOfObserversAndSubjects.Contains(r.Subject) || - haveSeenCurrentGossip.Contains(r.Observer) && haveSeenCurrentGossip.Contains(r.Subject))); - Unreachable = Reachability.AllUnreachableOrTerminated; - var additionalDecision = Decide(); - - if (additionalDecision.IsIndirectlyConnected) - throw new InvalidOperationException( - $"SBR double {additionalDecision} decision, downing all instead. " + - $"originalReachability: [{originalReachability}], filtered reachability [{Reachability}], " + - $"still indirectlyConnected: [{string.Join(", ", IndirectlyConnected)}], seenBy: [{string.Join(", ", SeenBy)}]" - ); - - return NodesToDown(additionalDecision); - } - finally - { - Unreachable = originalUnreachable; - Reachability = originalReachability; - } + Unreachable = originalUnreachable; + Reachability = originalReachability; } } @@ -384,8 +383,8 @@ public ImmutableHashSet NodesToDown(IDecision decision = null) decision = decision ?? Decide(); var downable = Members - .Union(Joining) .Where(m => m.Status != MemberStatus.Down && m.Status != MemberStatus.Exiting) + .Union(Joining) .Select(m => m.UniqueAddress) .ToImmutableHashSet(); @@ -407,7 +406,7 @@ public ImmutableHashSet NodesToDown(IDecision decision = null) // failure detection observations between the indirectly connected nodes. // Also include nodes that corresponds to the decision without the unreachability observations from // the indirectly connected nodes - return downable.Intersect(IndirectlyConnected.Union(AdditionalNodesToDownWhenIndirectlyConnected)); + return downable.Intersect(IndirectlyConnected.Union(AdditionalNodesToDownWhenIndirectlyConnected(downable))); case ReverseDownIndirectlyConnected _: // indirectly connected + all reachable