Skip to content
This repository has been archived by the owner on Jul 27, 2022. It is now read-only.

Commit

Permalink
Problem (Fix #2203): validator crash after re-joined with different v…
Browse files Browse the repository at this point in the history
…alidator key

Solution:
- deduplicate staking address when doing cleanup
- index used_validator_addresses when initialize after load from storage
  • Loading branch information
yihuang committed Sep 4, 2020
1 parent e245ac1 commit 06f717f
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 69 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions chain-abci/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ secp256k1 = { git = "https://github.com/crypto-com/rust-secp256k1-zkp.git", rev
parity-scale-codec = { features = ["derive"], version = "1.3" }
thiserror = "1.0"
kvdb = "0.7"
itertools = "0.9"

[target.'cfg(target_os = "linux")'.dependencies]
aesm-client = {version = "0.5", features = ["sgxs"], optional = true }
Expand Down
8 changes: 8 additions & 0 deletions chain-abci/src/staking/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::collections::{BTreeMap, BTreeSet, HashMap};

use core::cmp::Ordering;
use itertools::Itertools;
use parity_scale_codec::{Decode, Encode};
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -188,6 +189,12 @@ impl StakingTable {
.idx_validator_address
.insert(val.validator_address(), *addr)
.is_none());
for (val_addr, _) in val.used_validator_addresses.iter() {
assert!(self
.idx_validator_address
.insert(val_addr.clone(), *addr)
.is_none());
}
} else {
// no panic: Invariant 2.4
unreachable!("only council node addresses stored in internal indicies");
Expand Down Expand Up @@ -472,6 +479,7 @@ impl StakingTable {
let to_delete = self
.idx_validator_address
.values()
.unique()
.filter_map(|addr| {
let staking = heap.get(addr).unwrap();
if let Some(NodeState::CouncilNode(val)) = &staking.node_meta {
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/bot/chainbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ async def fix_genesis(genesis, cfg):
json.dump(cfg, fp_cfg)
fp_cfg.flush()
await run(
f'dev-utils genesis generate --in_place --no_backup --no_genesistime_overwrite --no_evidence_overwrite '
f'dev-utils genesis generate --in_place --no_backup --no_genesistime_overwrite '
f'--genesis_dev_config_path "{fp_cfg.name}" '
f'--tendermint_genesis_path "{fp_genesis.name}"'
)
Expand Down
16 changes: 5 additions & 11 deletions integration-tests/multinode/join_cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,12 @@
"op": "replace",
"path": "/initial_fee_policy/per_byte_fee",
"value": "1.25"
}
],
"tendermint_config_patch": [
{
"op": "replace",
"path": "/consensus/create_empty_blocks",
"value": true
},
{
"op": "add",
"path": "/consensus/create_empty_blocks_interval",
"value": "0s"
"op": "replace",
"path": "/evidence/max_age_duration",
"value": "18000000000"
}
]
],
"tendermint_config_patch": []
}
159 changes: 102 additions & 57 deletions integration-tests/multinode/join_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
#!/usr/bin/env python3
import os
from chainbot import SigningKey
from common import get_rpc, UnixStreamXMLRPCClient, wait_for_validators, wait_for_port, wait_for_blocks, wait_for_tx, stop_node, wait_for_blocktime
from pathlib import Path
import json
from chainbot import SigningKey, node_key
from common import (
get_rpc, UnixStreamXMLRPCClient, wait_for_validators,
wait_for_port, wait_for_blocks, wait_for_tx,
stop_node, wait_for_blocktime, latest_block_time
)

'''
three node, 1/3 voting power each.
Expand All @@ -12,25 +18,36 @@
- jail_duration
procedure:
- restore node2 wallet on node0
- stop node2
- wait for missed_block_threshold blocks to make non-live fault
- check punishment state on the first staking address
- start node2
- re-join
- check validators 3
- unbond
- check validators 2
- withdraw and deposit
- re-join
- check validators 3
- unbond and re-join
- unbond
- check number of validators 2
- withdraw and deposit
- restart node2 with a new validator key
- re-join
- check number of validators 3
- non-live and re-join with new validator key
- restore node2 wallet on node0
- stop node2
- wait for missed_block_threshold blocks to make non-live fault
- check punishment state on the staking address
- start node2 with the new validator key
- re-join with the new validator key
- check number of validators: 3
- non-live and wait for cleanup
- stop node2
- wait for missed_block_threshold blocks to make non-live fault again
- check punishment state on the staking address
- wait for unbonding period
- check node2 validator record is cleaned up
'''

# keep these values same as jail_cluster.json
TARGET_NODE = 'node2'
TARGET_NODE_MNEMONIC = 'symptom labor zone shrug chicken bargain hood define tornado mass inquiry rural step color guitar'
TARGET_NODE_VALIDATOR_SEED = '5c1b9c06ae7485cd0f9d75819f964db3b1306ebd397f5bbdc1dd386a32b7c1c0'
NEW_VALIDATOR_SEED = '5c1b9c06ae7485cd0f9d75819f964db3b1306ebd397f5bbdc1dd386a32b7c1c1'
MISSED_BLOCK_THRESHOLD = 5
JAIL_DURATION = 10
BASE_PORT = int(os.environ.get('BASE_PORT', 26650))
Expand All @@ -46,51 +63,10 @@
wait_for_validators(rpc, 3)

enckey = rpc.wallet.restore(TARGET_NODE_MNEMONIC, name='target')

print('Stop', TARGET_NODE)
stop_node(supervisor, TARGET_NODE)

print('Waiting for', MISSED_BLOCK_THRESHOLD + 3, 'blocks')
wait_for_blocks(rpc, MISSED_BLOCK_THRESHOLD + 3)

assert len(rpc.chain.validators()['validators']) == 2

addr = rpc.address.list(enckey=enckey, name='target')[0]
rpc.wallet.sync(enckey=enckey, name='target')
state = rpc.staking.state(addr, name='target')
print('joinstate', state)
punishment = state['last_slash']
print('punishment', punishment)
assert punishment['kind'] == 'NonLive'
print('slash amount', punishment['amount'])
if state['node_meta']:
jailed_until = state['node_meta']['CouncilNode']['jailed_until']
assert jailed_until is None, 'NonLive fault is not jailed'
else:
print('node is cleaned up')

print('Starting', TARGET_NODE)
supervisor.supervisor.startProcessGroup(TARGET_NODE)
wait_for_port(TARGET_PORT + 7)
print('Started', TARGET_NODE)

print('Join', TARGET_NODE)
txid = rpc.staking.join(
TARGET_NODE,
SigningKey(TARGET_NODE_VALIDATOR_SEED).pub_key_base64(),
addr,
rpc.staking.gen_keypackage(),
enckey=enckey,
name='target',
)

print('Wait for transaction', txid)
wait_for_tx(rpc, txid)

print('Wait 3 blocks for validators to take effect')
wait_for_blocks(rpc, 3)

assert len(rpc.chain.validators()['validators']) == 3
############## unbond and re-join ##################

rpc.wallet.sync(enckey=enckey, name='target')
txid = rpc.staking.unbond(addr, int(state['bonded']) - 100000000 + 1, enckey=enckey, name='target')
Expand Down Expand Up @@ -151,3 +127,72 @@
wait_for_blocks(rpc, 3)

assert len(rpc.chain.validators()['validators']) == 3

############## non-live and re-join with new validator key ##################

print('Stop', TARGET_NODE)
stop_node(supervisor, TARGET_NODE)

print('Waiting for', MISSED_BLOCK_THRESHOLD+1, 'blocks')
wait_for_blocks(rpc, MISSED_BLOCK_THRESHOLD+1)

# assert len(rpc.chain.validators()['validators']) == 2

rpc.wallet.sync(enckey=enckey, name='target')
state = rpc.staking.state(addr, name='target')
print('joinstate', state)
punishment = state['last_slash']
print('punishment', punishment)
assert punishment['kind'] == 'NonLive'
print('slash amount', punishment['amount'])
if state['node_meta']:
jailed_until = state['node_meta']['CouncilNode']['jailed_until']
assert jailed_until is None, 'NonLive fault is not jailed'
else:
print('node is cleaned up')

print('Start node2 with new validator key')
json.dump(node_key(NEW_VALIDATOR_SEED),
open('./data/%s/tendermint/priv_validator_key.json' % TARGET_NODE, 'w'),
indent=4)
print('Starting', TARGET_NODE)
supervisor.supervisor.startProcessGroup(TARGET_NODE)
wait_for_port(TARGET_PORT + 7)
print('Started', TARGET_NODE)

print('Join', TARGET_NODE)
txid = rpc.staking.join(
TARGET_NODE,
SigningKey(NEW_VALIDATOR_SEED).pub_key_base64(),
addr,
rpc.staking.gen_keypackage(),
enckey=enckey,
name='target',
)

print('Wait for transaction', txid)
wait_for_tx(rpc, txid)

print('Wait 3 blocks for validators to take effect')
wait_for_blocks(rpc, 3)

assert len(rpc.chain.validators()['validators']) == 3

############## non-live and wait for cleanup ##################

print('Stop', TARGET_NODE)
stop_node(supervisor, TARGET_NODE)

print('Waiting for', MISSED_BLOCK_THRESHOLD + 3, 'blocks')
wait_for_blocks(rpc, MISSED_BLOCK_THRESHOLD + 3)

assert len(rpc.chain.validators()['validators']) == 2

rpc.wallet.sync(enckey=enckey, name='target')
print('state before cleanup', rpc.staking.state(addr, name='target'))

wait_for_blocktime(rpc, latest_block_time(rpc) + 18)
rpc.wallet.sync(enckey=enckey, name='target')
state = rpc.staking.state(addr, name='target')
print('state', state)
assert state.get('node_meta') is None

0 comments on commit 06f717f

Please sign in to comment.