-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GH-13 Add disaster recovery scenario 3
- Loading branch information
Showing
1 changed file
with
131 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
import shutil | ||
import signal | ||
import time | ||
from TestHarness import Cluster, TestHelper, Utils, WalletMgr | ||
from TestHarness.Node import BlockType | ||
|
||
############################################################### | ||
# disaster_recovery - Scenario 3 | ||
# | ||
# Create integration test with 4 nodes (A, B, C, and D) which each have their own producer and finalizer. The finalizer | ||
# policy consists of the four finalizers with a threshold of 3. The proposer policy involves all four proposers. | ||
# | ||
# - At least two of the four nodes should have a LIB N and a finalizer safety information file that locks on a block | ||
# after N. The other two nodes should have a LIB that is less than or equal to block N. | ||
# | ||
# All nodes are shut down. The reversible blocks on all nodes is deleted. Restart all nodes from an earlier snapshot. | ||
# | ||
# All nodes eventually sync up to block N. Some nodes will consider block N to LIB but others may not. | ||
# | ||
# Not enough finalizers should be voting because of the lock in their finalizer safety information file. Verify that | ||
# LIB does not advance on any node. | ||
# | ||
# Cleanly shut down all nodes and delete their finalizer safety information files. Then restart the nodes. | ||
# | ||
# Verify that LIB advances on all nodes and they all agree on the LIB. In particular, verify that block N is the | ||
# same ID on all nodes as the one before nodes were first shutdown. | ||
# | ||
############################################################### | ||
|
||
Print=Utils.Print | ||
errorExit=Utils.errorExit | ||
|
||
args=TestHelper.parse_args({"-d","--keep-logs","--dump-error-details","-v","--leave-running","--unshared"}) | ||
pnodes=4 | ||
delay=args.d | ||
debug=args.v | ||
prod_count = 1 # per node prod count | ||
total_nodes=pnodes | ||
dumpErrorDetails=args.dump_error_details | ||
|
||
Utils.Debug=debug | ||
testSuccessful=False | ||
|
||
cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs) | ||
walletMgr=WalletMgr(True, keepRunning=args.leave_running, keepLogs=args.keep_logs) | ||
|
||
try: | ||
TestHelper.printSystemInfo("BEGIN") | ||
|
||
cluster.setWalletMgr(walletMgr) | ||
|
||
Print(f'producing nodes: {pnodes}, delay between nodes launch: {delay} second{"s" if delay != 1 else ""}') | ||
|
||
Print("Stand up cluster") | ||
if cluster.launch(pnodes=pnodes, totalNodes=total_nodes, totalProducers=pnodes, delay=delay, loadSystemContract=False, | ||
activateIF=True, biosFinalizer=False) is False: | ||
errorExit("Failed to stand up eos cluster.") | ||
|
||
assert cluster.biosNode.getInfo(exitOnError=True)["head_block_producer"] != "eosio", "launch should have waited for production to change" | ||
cluster.biosNode.kill(signal.SIGTERM) | ||
cluster.waitOnClusterSync(blockAdvancing=5) | ||
|
||
node0 = cluster.getNode(0) # A | ||
node1 = cluster.getNode(1) # B | ||
node2 = cluster.getNode(2) # C | ||
node3 = cluster.getNode(3) # D | ||
|
||
Print("Create snapshot (node 0)") | ||
ret = node0.createSnapshot() | ||
assert ret is not None, "Snapshot creation failed" | ||
ret_head_block_num = ret["payload"]["head_block_num"] | ||
Print(f"Snapshot head block number {ret_head_block_num}") | ||
|
||
Print("Wait for snapshot node lib to advance") | ||
assert node0.waitForBlock(ret_head_block_num+1, blockType=BlockType.lib), "Node0 did not advance to make snapshot block LIB" | ||
assert node1.waitForLibToAdvance(), "Node1 did not advance LIB after snapshot of Node0" | ||
|
||
assert node0.waitForLibToAdvance(), "Node0 did not advance LIB after snapshot" | ||
currentLIB = node0.getIrreversibleBlockNum() | ||
libBlock = node0.getBlock(currentLIB) | ||
Print(f"Lib Block: {libBlock}") | ||
|
||
Print("Shutdown all nodes") | ||
for node in [node0, node1, node2, node3]: | ||
node.kill(signal.SIGTERM) | ||
for node in [node0, node1, node2, node3]: | ||
assert not node.verifyAlive(), "Node did not shutdown" | ||
|
||
Print("Remove reversible blocks and state, but not finalizers safety data") | ||
for node in [node0, node1, node2, node3]: | ||
node.removeReversibleBlks() | ||
node.removeState() | ||
|
||
Print("Restart nodes from snapshot") | ||
for i in range(4): | ||
isRelaunchSuccess = cluster.getNode(i).relaunch(chainArg=" -e --snapshot {}".format(node0.getLatestSnapshot())) | ||
assert isRelaunchSuccess, f"node {i} relaunch from snapshot failed" | ||
|
||
Print("Verify LIB does not advance on any node") | ||
for node in [node0, node1, node2, node3]: | ||
assert not node.waitForLibToAdvance(), "Node advanced LIB after relaunch when it should not" | ||
|
||
Print("Shutdown all nodes to remove finalizer safety data") | ||
for node in [node0, node1, node2, node3]: | ||
node.kill(signal.SIGTERM) | ||
for node in [node0, node1, node2, node3]: | ||
assert not node.verifyAlive(), "Node did not shutdown" | ||
|
||
for node in [node0, node1, node2, node3]: | ||
node.removeFinalizersSafetyFile() | ||
|
||
Print("Restart nodes") | ||
for node in [node0, node1, node2, node3]: | ||
node.relaunch(rmArgs=" --snapshot {}".format(node0.getLatestSnapshot())) | ||
|
||
Print("Verify LIB advances on all nodes") | ||
for node in [node0, node1, node2, node3]: | ||
assert node.waitForLibToAdvance(), "Node did not advance LIB after restart" | ||
|
||
for node in [node0, node1, node2, node3]: | ||
nodeId = node.getBlock(currentLIB)["id"] | ||
assert nodeId == libBlock["id"], "Node lib block id does not match prior lib block id" | ||
|
||
testSuccessful=True | ||
finally: | ||
TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, dumpErrorDetails=dumpErrorDetails) | ||
|
||
exitCode = 0 if testSuccessful else 1 | ||
exit(exitCode) |