Skip to content

Commit

Permalink
Prevent picking a failpoint that waiting till snapshot that doesn't s…
Browse files Browse the repository at this point in the history
…upport lower snapshot catchup entries but allow reproducing issue etcd-io#15271

Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
  • Loading branch information
serathius committed May 13, 2024
1 parent 333bd7b commit 573e38b
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 10 deletions.
14 changes: 11 additions & 3 deletions tests/robustness/failpoint/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ func (tb triggerBlackhole) Trigger(ctx context.Context, t *testing.T, member e2e
}

func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, process e2e.EtcdProcess) bool {
// Waiting for snapshot with default snapshot-catchup-entries (10`000) takes too long.
if tb.waitTillSnapshot && entriesToGuaranteeSnapshot(config) > 200 {
return false
}
return config.ClusterSize > 1 && process.PeerProxy() != nil
}

Expand Down Expand Up @@ -127,16 +131,20 @@ func waitTillSnapshot(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCl
}
t.Logf("clusterRevision: %d, blackholedMemberRevision: %d", clusterRevision, blackholedMemberRevision)
// Blackholed member has to be sufficiently behind to trigger snapshot transfer.
// Need to make sure leader compacted latest revBlackholedMem inside EtcdServer.snapshot.
// That's why we wait for clus.Cfg.SnapshotCount (to trigger snapshot) + clus.Cfg.SnapshotCatchUpEntries (EtcdServer.snapshot compaction offset)
if clusterRevision-blackholedMemberRevision > int64(clus.Cfg.ServerConfig.SnapshotCount+clus.Cfg.ServerConfig.SnapshotCatchUpEntries) {
if clusterRevision-blackholedMemberRevision > int64(entriesToGuaranteeSnapshot(*clus.Cfg)) {
break
}
time.Sleep(100 * time.Millisecond)
}
return nil
}

func entriesToGuaranteeSnapshot(config e2e.EtcdProcessClusterConfig) uint64 {
// Need to make sure leader compacted latest revBlackholedMem inside EtcdServer.snapshot.
// That's why we wait for clus.Cfg.SnapshotCount (to trigger snapshot) + clus.Cfg.SnapshotCatchUpEntries (EtcdServer.snapshot compaction offset)
return config.ServerConfig.SnapshotCount + config.ServerConfig.SnapshotCatchUpEntries
}

// latestRevisionForEndpoint gets latest revision of the first endpoint in Client.Endpoints list
func latestRevisionForEndpoint(ctx context.Context, c *clientv3.Client) (int64, error) {
resp, err := c.Status(ctx, c.Endpoints()[0])
Expand Down
4 changes: 0 additions & 4 deletions tests/robustness/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,6 @@ func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testSce
t.Fatal(err)
}
}
err = failpoint.Validate(r.Cluster, s.failpoint)
if err != nil {
t.Fatal(err)
}

// t.Failed() returns false during panicking. We need to forcibly
// save data on panicking.
Expand Down
8 changes: 5 additions & 3 deletions tests/robustness/scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ func exploratoryScenarios(t *testing.T) []testScenario {
e2e.WithCompactionBatchLimit(100),
e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond),
}
// snapshot-catchup-entries flag was backported in https://github.com/etcd-io/etcd/pull/17808
v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13}
if v.Compare(v3_5_13) >= 0 {
baseOptions = append(baseOptions, e2e.WithSnapshotCatchUpEntries(100))
}
scenarios := []testScenario{}
for _, tp := range trafficProfiles {
name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize1")
Expand All @@ -104,9 +109,6 @@ func exploratoryScenarios(t *testing.T) []testScenario {
clusterOfSize3Options := baseOptions
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithIsPeerTLS(true))
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithPeerProxy(true))
if !v.LessThan(version.V3_6) {
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithSnapshotCatchUpEntries(100))
}
scenarios = append(scenarios, testScenario{
name: name,
traffic: tp.Traffic,
Expand Down

0 comments on commit 573e38b

Please sign in to comment.