Skip to content

Commit

Permalink
[SBR] Ignore reachability records for invalid nodes (#6064)
Browse files Browse the repository at this point in the history
  • Loading branch information
ismaelhamed authored Aug 12, 2022
1 parent 9410a4c commit 207e7b8
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 35 deletions.
71 changes: 71 additions & 0 deletions src/core/Akka.Cluster.Tests/SBR/SplitBrainResolverSpec.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,25 @@ public void LeaseMajority_must_down_indirectly_connected_when_combined_with_clea
strategy3.NodesToDown(reverseDecision3).Should().BeEquivalentTo(new[] { MemberB, MemberC, MemberD, MemberE }.Select(m => m.UniqueAddress));
}

[Fact]
public void LeaseMajority_must_down_indirectly_connected_when_combined_with_clean_partition_A_B_C_D__E_F___A_B_C_D()
{
var setup = new LeaseMajoritySetup(this);
var memberELeaving = Leaving(MemberE);
var memberFDown = Downed(MemberF);
setup.Side1 = ImmutableHashSet.Create(MemberA, MemberB, MemberC, MemberD);
setup.Side2 = ImmutableHashSet.Create(memberELeaving, memberFDown);

// trouble when indirectly connected happens before clean partition
setup.IndirectlyConnected = ImmutableHashSet.Create((memberELeaving, memberFDown));

// from side1 of the partition, majority
setup.AssertDowningSide(setup.Side1, new[] { memberELeaving });

// from side2 of the partition, minority
setup.AssertDowningSide(setup.Side2, new[] { MemberA, MemberB, MemberC, MemberD, memberELeaving });
}

[Fact]
public void Strategy_must_add_and_remove_members_with_default_Member_ordering()
{
Expand Down Expand Up @@ -1805,6 +1824,58 @@ public void Split_Brain_Resolver_must_down_indirectly_connected_when_combined_wi
setup.Stop();
}

[Fact]
public void Split_Brain_Resolver_must_down_indirectly_connected_when_combined_with_partition_and_exiting_A_B_C_D__E_Fexiting___A_B_C_D()
{
var setup = new SetupKeepMajority(this, TimeSpan.Zero, MemberA.UniqueAddress, null);
setup.MemberUp(MemberA, MemberB, MemberC, MemberD, MemberE, MemberF);
var memberFExiting = Exiting(MemberF);
setup.A.Tell(new ClusterEvent.MemberExited(memberFExiting));
setup.Leader(MemberA);
// indirectly connected: memberF
// partition: memberA, memberB, memberC, memberD | memberE, memberF
setup.ReachabilityChanged(
(MemberA, MemberE),
(MemberA, memberFExiting),
(MemberB, MemberE),
(MemberB, memberFExiting),
(MemberC, MemberE),
(MemberC, memberFExiting),
(MemberD, MemberE),
(MemberD, memberFExiting),
(MemberE, memberFExiting));
setup.Tick();
// keep fully connected members
setup.ExpectDownCalled(MemberE);
setup.Stop();
}

[Fact]
public void Split_Brain_Resolver_must_down_indirectly_connected_when_combined_with_partition_and_exiting_A_B_C_D__Eexiting_F___A_B_C_D()
{
var setup = new SetupKeepMajority(this, TimeSpan.Zero, MemberA.UniqueAddress, null);
setup.MemberUp(MemberA, MemberB, MemberC, MemberD, MemberE, MemberF);
var memberEExiting = Exiting(MemberE);
setup.A.Tell(new ClusterEvent.MemberExited(memberEExiting));
setup.Leader(MemberA);
// indirectly connected: memberF
// partition: memberA, memberB, memberC, memberD | memberE, memberF
setup.ReachabilityChanged(
(MemberA, memberEExiting),
(MemberA, MemberF),
(MemberB, memberEExiting),
(MemberB, MemberF),
(MemberC, memberEExiting),
(MemberC, MemberF),
(MemberD, memberEExiting),
(MemberD, MemberF),
(MemberE, MemberF));
setup.Tick();
// keep fully connected members
setup.ExpectDownCalled(MemberF);
setup.Stop();
}

[Fact]
public void Split_Brain_Resolver_must_down_all_in_self_data_centers()
{
Expand Down
69 changes: 34 additions & 35 deletions src/core/Akka.Cluster/SBR/DowningStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -219,41 +219,40 @@ private ImmutableHashSet<UniqueAddress> IndirectlyConnectedFromIntersectionOfObs
public ImmutableHashSet<UniqueAddress> UnreachableButNotIndirectlyConnected =>
Unreachable.Except(IndirectlyConnected);

private ImmutableHashSet<UniqueAddress> AdditionalNodesToDownWhenIndirectlyConnected
private ImmutableHashSet<UniqueAddress> AdditionalNodesToDownWhenIndirectlyConnected(ImmutableHashSet<UniqueAddress> downable)
{
get
if (UnreachableButNotIndirectlyConnected.IsEmpty) return ImmutableHashSet<UniqueAddress>.Empty;

var originalUnreachable = Unreachable;
var originalReachability = Reachability;
try
{
var intersectionOfObserversAndSubjects = IndirectlyConnectedFromIntersectionOfObserversAndSubjects;
var haveSeenCurrentGossip = IndirectlyConnectedFromSeenCurrentGossip;
Reachability = Reachability.FilterRecords(
r =>
// we only retain records for addresses that are still downable
downable.Contains(r.Observer) && downable.Contains(r.Subject) &&
// remove records between the indirectly connected
!(intersectionOfObserversAndSubjects.Contains(r.Observer) &&
intersectionOfObserversAndSubjects.Contains(r.Subject) ||
haveSeenCurrentGossip.Contains(r.Observer) && haveSeenCurrentGossip.Contains(r.Subject)));
Unreachable = Reachability.AllUnreachableOrTerminated;
var additionalDecision = Decide();

if (additionalDecision.IsIndirectlyConnected)
throw new InvalidOperationException(
$"SBR double {additionalDecision} decision, downing all instead. " +
$"originalReachability: [{originalReachability}], filtered reachability [{Reachability}], " +
$"still indirectlyConnected: [{string.Join(", ", IndirectlyConnected)}], seenBy: [{string.Join(", ", SeenBy)}]"
);

return NodesToDown(additionalDecision);
}
finally
{
if (UnreachableButNotIndirectlyConnected.IsEmpty) return ImmutableHashSet<UniqueAddress>.Empty;

var originalUnreachable = Unreachable;
var originalReachability = Reachability;
try
{
var intersectionOfObserversAndSubjects = IndirectlyConnectedFromIntersectionOfObserversAndSubjects;
var haveSeenCurrentGossip = IndirectlyConnectedFromSeenCurrentGossip;
// remove records between the indirectly connected
Reachability = Reachability.FilterRecords(
r =>
!(intersectionOfObserversAndSubjects.Contains(r.Observer) &&
intersectionOfObserversAndSubjects.Contains(r.Subject) ||
haveSeenCurrentGossip.Contains(r.Observer) && haveSeenCurrentGossip.Contains(r.Subject)));
Unreachable = Reachability.AllUnreachableOrTerminated;
var additionalDecision = Decide();

if (additionalDecision.IsIndirectlyConnected)
throw new InvalidOperationException(
$"SBR double {additionalDecision} decision, downing all instead. " +
$"originalReachability: [{originalReachability}], filtered reachability [{Reachability}], " +
$"still indirectlyConnected: [{string.Join(", ", IndirectlyConnected)}], seenBy: [{string.Join(", ", SeenBy)}]"
);

return NodesToDown(additionalDecision);
}
finally
{
Unreachable = originalUnreachable;
Reachability = originalReachability;
}
Unreachable = originalUnreachable;
Reachability = originalReachability;
}
}

Expand Down Expand Up @@ -384,8 +383,8 @@ public ImmutableHashSet<UniqueAddress> NodesToDown(IDecision decision = null)
decision = decision ?? Decide();

var downable = Members
.Union(Joining)
.Where(m => m.Status != MemberStatus.Down && m.Status != MemberStatus.Exiting)
.Union(Joining)
.Select(m => m.UniqueAddress)
.ToImmutableHashSet();

Expand All @@ -407,7 +406,7 @@ public ImmutableHashSet<UniqueAddress> NodesToDown(IDecision decision = null)
// failure detection observations between the indirectly connected nodes.
// Also include nodes that corresponds to the decision without the unreachability observations from
// the indirectly connected nodes
return downable.Intersect(IndirectlyConnected.Union(AdditionalNodesToDownWhenIndirectlyConnected));
return downable.Intersect(IndirectlyConnected.Union(AdditionalNodesToDownWhenIndirectlyConnected(downable)));

case ReverseDownIndirectlyConnected _:
// indirectly connected + all reachable
Expand Down

0 comments on commit 207e7b8

Please sign in to comment.