Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IF: Disaster_recovery scenario 2 test #72

Merged
merged 16 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nodeos_high_transaction_test.py ${CMA
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nodeos_retry_transaction_test.py ${CMAKE_CURRENT_BINARY_DIR}/nodeos_retry_transaction_test.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/transition_to_if.py ${CMAKE_CURRENT_BINARY_DIR}/transition_to_if.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery.py ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_2.py ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_2.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_2_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_2_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/trx_finality_status_test.py ${CMAKE_CURRENT_BINARY_DIR}/trx_finality_status_test.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/trx_finality_status_forked_test.py ${CMAKE_CURRENT_BINARY_DIR}/trx_finality_status_forked_test.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugin_http_api_test.py ${CMAKE_CURRENT_BINARY_DIR}/plugin_http_api_test.py COPYONLY)
Expand Down Expand Up @@ -149,6 +151,8 @@ set_property(TEST transition_to_if_lr PROPERTY LABELS long_running_tests)

add_test(NAME disaster_recovery COMMAND tests/disaster_recovery.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST disaster_recovery PROPERTY LABELS nonparallelizable_tests)
add_test(NAME disaster_recovery_2 COMMAND tests/disaster_recovery_2.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST disaster_recovery_2 PROPERTY LABELS nonparallelizable_tests)

add_test(NAME ship_test COMMAND tests/ship_test.py -v --num-clients 10 --num-requests 5000 ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST ship_test PROPERTY LABELS nonparallelizable_tests)
Expand Down
9 changes: 5 additions & 4 deletions tests/disaster_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@
from TestHarness.Node import BlockType

###############################################################
# disaster_recovery
# disaster_recovery - Scenario 1
#
# Integration test with 4 finalizers (A, B, C, and D).
#
# The 4 nodes are cleanly shutdown in the following state:
# - A has LIB N. A has a finalizer safety information file that locks on a block after N.
# - B, C, and D have LIB less than N. They have finalizer safety information files that lock on N.
#
# All nodes lose their reversible blocks and restart from an earlier snapshot.
# Nodes B, C, and D lose their reversible blocks. All nodes restart from an earlier snapshot.
#
# A is restarted and replays up to block N after restarting from snapshot. Block N is sent to the other
# nodes B, C, and D after they are also started up again.
# A is restarted and replays up to its last reversible block (which is a block number greater than N) after
# restarting from snapshot. Blocks N and later is sent to the other nodes B, C, and D after they are also
# started up again.
#
# Verify that LIB advances and that A, B, C, and D are eventually voting strong on new blocks.
#
Expand Down
132 changes: 132 additions & 0 deletions tests/disaster_recovery_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env python3
import os
import shutil
import signal
import time
from TestHarness import Cluster, TestHelper, Utils, WalletMgr
from TestHarness.Node import BlockType

###############################################################
# disaster_recovery - Scenario 2
#
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the detailed description here. It will be even better for each of scenarios to have one sentence describing what the purpose of the test.

I think our new tests should follow this test's pattern.

# Integration test with 5 nodes (A, B, C, D, and P). Nodes A, B, C, and D each have one finalizer but no proposers.
# Node P has a proposer but no finalizers. The finalizer policy consists of the four finalizers with a threshold of 3.
# The proposer policy involves just the single proposer P.
#
# A, B, C, and D can be connected to each other however we like as long as blocks sent to node A can traverse to the
# other nodes B, C, and D. However, node P should only be connected to node A.
#
# At some point after IF transition has completed and LIB is advancing, block production on node P should be paused.
# Enough time should be given to allow and in-flight votes on the latest produced blocks to be delivered to node P.
# Then, the connection between node P and node A should be severed, and then block production on node P resumed. The
# LIB on node P should advance to but then stall at block N. Then shortly after that, node P should be cleanly shut down.
#
# Verify that the LIB on A, B, C, and D has stalled and is less than block N. Then, nodes A, B, C, and D can all be
greg7mdp marked this conversation as resolved.
Show resolved Hide resolved
# cleanly shut down.
#
# Then, reversible blocks from all nodes should be removed. All nodes are restarted from an earlier
# snapshot (prior to block N).
#
# P is restarted and replays up to block N after restarting from snapshot. Blocks up to and including block N are sent
# to the other nodes A, B, C, and D after they are also started up again.
#
# Verify that LIB advances and that A, B, C, and D are eventually voting strong on new blocks.
###############################################################

Print=Utils.Print
errorExit=Utils.errorExit

args=TestHelper.parse_args({"-d","--keep-logs","--dump-error-details","-v","--leave-running","--unshared"})
pnodes=1
delay=args.d
debug=args.v
prod_count = 1 # per node prod count
total_nodes=pnodes+4
dumpErrorDetails=args.dump_error_details

Utils.Debug=debug
testSuccessful=False

cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs)
walletMgr=WalletMgr(True, keepRunning=args.leave_running, keepLogs=args.keep_logs)

try:
TestHelper.printSystemInfo("BEGIN")

cluster.setWalletMgr(walletMgr)

Print(f'producing nodes: {pnodes}, delay between nodes launch: {delay} second{"s" if delay != 1 else ""}')

Print("Stand up cluster")
specificExtraNodeosArgs={}
specificExtraNodeosArgs[0]="--plugin eosio::net_api_plugin --plugin eosio::producer_api_plugin "

if cluster.launch(pnodes=pnodes, totalNodes=total_nodes, totalProducers=pnodes, specificExtraNodeosArgs=specificExtraNodeosArgs,
topo="./tests/disaster_recovery_2_test_shape.json", delay=delay, loadSystemContract=False,
activateIF=True, signatureProviderForNonProducer=True) is False:
errorExit("Failed to stand up eos cluster.")

assert cluster.biosNode.getInfo(exitOnError=True)["head_block_producer"] != "eosio", "launch should have waited for production to change"

cluster.biosNode.kill(signal.SIGTERM)
cluster.waitOnClusterSync(blockAdvancing=5)

node0 = cluster.getNode(0) # P
node1 = cluster.getNode(1) # A
node2 = cluster.getNode(2) # B
node3 = cluster.getNode(3) # C
node4 = cluster.getNode(4) # D

Print("Create snapshot (node 0)")
ret = node0.createSnapshot()
assert ret is not None, "Snapshot creation failed"
ret_head_block_num = ret["payload"]["head_block_num"]
Print(f"Snapshot head block number {ret_head_block_num}")

Print("Wait for snapshot node lib to advance")
node0.waitForBlock(ret_head_block_num+1, blockType=BlockType.lib)
assert node1.waitForLibToAdvance(), "Ndoe1 did not advance LIB after snapshot of Node0"

assert node0.waitForLibToAdvance(), "Node0 did not advance LIB after snapshot"
linh2931 marked this conversation as resolved.
Show resolved Hide resolved
currentLIB = node0.getIrreversibleBlockNum()
greg7mdp marked this conversation as resolved.
Show resolved Hide resolved

Print("Pause production on Node0")
lib = node0.getIrreversibleBlockNum()
ret_json = node0.processUrllibRequest("producer", "pause")
linh2931 marked this conversation as resolved.
Show resolved Hide resolved
# wait for lib because waitForBlock uses > not >=
assert node0.waitForBlock(lib, blockType=BlockType.lib), "Node0 did not advance LIB after pause"
time.sleep(1)

Print("Disconnect the producing node (Node0) from peer Node1")
ret_json = node0.processUrllibRequest("net", "disconnect", "localhost:9877")
assert not node0.waitForLibToAdvance(timeout=10), "Node0 LIB still advancing after disconnect"

Print("Resume production on Node0")
ret_json = node0.processUrllibRequest("producer", "resume")
assert node0.waitForHeadToAdvance(blocksToAdvance=2)

greg7mdp marked this conversation as resolved.
Show resolved Hide resolved
assert not node1.waitForHeadToAdvance(timeout=5), "Node1 head still advancing after disconnect"

for node in [node0, node1, node2, node3, node4]:
node.kill(signal.SIGTERM)

for node in [node0, node1, node2, node3, node4]:
assert not node.verifyAlive(), "Node did not shutdown"

for node in [node0, node1, node2, node3, node4]:
node.removeReversibleBlks()
node.removeState()

for i in range(5):
isRelaunchSuccess = cluster.getNode(i).relaunch(chainArg=" -e --snapshot {}".format(node0.getLatestSnapshot()))
assert isRelaunchSuccess, f"node {i} relaunch from snapshot failed"

for node in [node0, node1, node2, node3, node4]:
assert node.waitForLibToAdvance(), "Node did not advance LIB after relaunch"
greg7mdp marked this conversation as resolved.
Show resolved Hide resolved

testSuccessful=True
finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, dumpErrorDetails=dumpErrorDetails)

exitCode = 0 if testSuccessful else 1
exit(exitCode)
124 changes: 124 additions & 0 deletions tests/disaster_recovery_2_test_shape.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"name": "testnet_",
"ssh_helper": {
"ssh_cmd": "/usr/bin/ssh",
"scp_cmd": "/usr/bin/scp",
"ssh_identity": "",
"ssh_args": ""
},
"nodes": {
"bios":{
"name": "bios",
"keys": [
{
"privkey":"5KQwrPbwdL6PhXujxW37FSSQZ1JiwsST4cqQzDeyXtP79zkvFD3",
"pubkey":"EOS6MRyAjQq8ud7hVNYcfnVPJqcVpscN5So8BhtHuGYqET5GDW5CV"
}
],
"peers": [],
"producers": [
"eosio"
],
"dont_start": false
},
"testnet_00":{
"name": "testnet_00",
linh2931 marked this conversation as resolved.
Show resolved Hide resolved
"keys": [
{
"privkey":"5Jf4sTk7vwX1MYpLJ2eQFanVvKYXFqGBrCyANPukuP2BJ5WAAKZ",
"pubkey":"EOS58B33q9S7oNkgeFfcoW3VJYu4obfDiqn5RHGE2ige6jVjUhymR"
}
],
"peers": [
"bios",
"testnet_01"
],
"producers": [
"defproducera"
],
"dont_start": false
},
"testnet_01":{
"name": "testnet_01",
"keys": [
{
"privkey":"5HviUPkTEtvF2B1nm8aZUnjma2TzgpKRjuXjwHyy3FME4xDbkZF",
"pubkey":"EOS5CbcTDgbks2ptTxvyCbT9HFbzX7PDHUY2wN4DDnVBhhQr2ZNDE",
"blspubkey":"PUB_BLS_Y8ndNvnrEpnzJcNUg49ncWDiDGRgR7WUmRRDR9yMURoS6zF14sPnbb-DsTGp0cEM628a4CmG6KXMhPJMqGZvb7RM_MGIwgbEhVaENL8rXeYLOuFDS375KHFgXxs2P5sZuaN7aA",
"blsprivkey":"PVT_BLS_A1Mifu5xyaxiveyjnZ-qN2zOt-5_KLMpjTrDI9udcQNV1NBR",
"blspop":"SIG_BLS_7D0OUU1h7E0AKkAmqV4v3Ot9oSPWJBOss4yDejr2x1g5G31cSSAYIAtqZOYC-ioNzddY7zkvTcbhKgBzv5a-G1HmV1pOCXXPJ5TL0iqU8Ks5abeEWCdhArGATmRQiSMYNcj9rMQcm3H6Z0pOlOdbDdt8Cg-SY_H4jEGmAY2ZqudAH_U8gS19aydJU-2uQq0SPIr2Okl-WNbc-q3NVQw6Y0sAHAwN4BOIHup2MJyDDDIbpSEkBchRp3zna1XJf6oBuUzpqQ"
}
],
"peers": [
"bios",
"testnet_02",
"testnet_04"
],
"producers": [
],
"dont_start": false
},
"testnet_02":{
"name": "testnet_02",
"keys": [
{
"privkey":"5KkQbdxFHr8Pg1N3DEMDdU7emFgUTwQvh99FDJrodFhUbbsAtQT",
"pubkey":"EOS6Tkpf8kcDfa32WA9B4nTcEJ64ZdDMSNioDcaL6rzdMwnpzaWJB",
"blspubkey":"PUB_BLS_Wf_O_QeyVhekDXS5q3qBxTyj_qxSrX_uiCY4z8ClpW0X2jrAVgAVHOQ9IR2H40QTWveD8QIGhhSbmSFPa0zFbs5k3yfnjfuuwpA7T1O13_LSdtxT19ehYiE4chZX6SUMJ09JFA",
"blsprivkey":"PVT_BLS_1ZLWim0k80ssXswSZp1T3ydHO9U3gLnKKlEBIDy8927XDLLj",
"blspop":"SIG_BLS_EL09aI3w-qCgarLM2Z5-T6sisSHBN0J4vMZxtGQklkOcAxgnCaPPXe0roxY4W0gVe2y6T01YrklmT_qZu2tAwqiNrVJcScY8QKvRSeczGBBab1MgnHvaAOuf6bA4JPAELIu2iPWfsS6-oLyLbNP5xtZpMXPHu3yaSJssXNOb5rcVs1KXaIUEagJeAlBBQEcKmFWfeAsJ_R8JDw4i9gSNmROzUjm6LVBpvB7vrnPDPFRA0BQ19H4FED6PtuFPShwJGVz4dg"
}
],
"peers": [
"bios",
"testnet_01",
"testnet_04"
],
"producers": [
],
"dont_start": false
},
"testnet_03":{
"name": "testnet_03",
"keys": [
{
"privkey":"5JxTJJegQBpEL1p77TzkN1ompMB9gDwAfjM9chPzFCB4chxmwrE",
"pubkey":"EOS52ntDHqA2qj4xVo7KmxdezMRhvvBqpZBuKYJCsgihisxmywpAx",
"blspubkey":"PUB_BLS_C-FprIiry6X-8dlLYH7xUAhIuKXBQv56zJPgtcdmKeHf8AAy750eRrOYBtKG0-QEIN5l_yl9dTLvAYmOios6Q5t3ybWBUVVQ2WWcbZLVxzwBftLwYvo1zPXH7LHEE_sAgP1i7g",
"blsprivkey":"PVT_BLS_ubElmjajfsYP_9HRSpmV-Fi_IPWKTyJS4XFSWrU8ezMZ_mL_",
"blspop":"SIG_BLS_k3wrhVl2GUG_lGsPr9io-zoamPw7eiaxMDExk-yOqcpXtu0zALHoUWJRh0WOerAS1-_RQNhbi4q-BWO9IbiNWRKP9CYIhNIL6ochGHHy4aBmZ-IzEjfBrDt7inDtFTYY0Gl372e5OqPXAwi6J3GeHipXuzAiw7SV8XdWFefthxId4meKX6vw5_RWx4XQ4ScRYoCG7UQtIZkQPEsu1SfJGL6z-cfTTSq-naKbzp0QQYfqtQkFfmL7qQUH1iohnb0HbTbRbQ"
}
],
"peers": [
"bios",
"testnet_01",
"testnet_02",
"testnet_04"
],
"producers": [
],
"dont_start": false
},
"testnet_04":{
"name": "testnet_04",
"keys": [
{
"privkey":"5K3h9XiAmrx9EuqD8CRxHgQwEVDaWpqrhrnpdvwHtVzwJFMhNmE",
"pubkey":"EOS7K5pQCk22ojetRdyumrqp6nJX6eiQiTWWcGkZAMGhoBxgcsxhK",
"blspubkey":"PUB_BLS_kGOCEX1MM5Xl928OOvGLyNo3_GpV8av1HnoaCEGOD8bAu3MDvazu0gCZGA1G7msTh1ZTPMEMVdXMuRVS0tv_9bW9Ohz9XvgtjgbPpxxc_NaeENkGg4uDBOro0Rk8DCEW4ToLKA",
"blsprivkey":"PVT_BLS_EnQXObGKvYqfubrKjxpCqNkHeLlkQg7LERjDGm1RKjgyFZnk",
"blspop":"SIG_BLS_bXrzPVc-ahxOCWrcl-iWIMuS8ego54iz7vi38A8h_ViqtxklH9O3A2z0eiw5j40M08ejiTm7JbCY_GOwulv1oXb9SaLYQkCTZjzCVssDkghLBRTVCZW2oJmU9WbZXikNw6nkygTs5sUTtCda2a_M5jqY_Rw92_NWmbolgBNkFvMcAgSHexdETA-b7QgJX_oYBWkyP0Pt8LzO6bJueZSjH8wZ8VuPc9o8taY85mt_qgdOTbXVBG2m5ud0eAUps2UHAHt-Ig"
}
],
"peers": [
"bios",
"testnet_01",
"testnet_02",
"testnet_03"
],
"producers": [
],
"dont_start": false
}
}
}
Loading