Skip to content

Commit

Permalink
Apply hotfix for inconsistent head (#1639)
Browse files Browse the repository at this point in the history
## Issue Addressed

- Resolves #1616

## Proposed Changes

If we look at the function which persists fork choice and the canonical head to disk:

https://github.com/sigp/lighthouse/blob/1db8daae0c7bb34bf2e05644fa6bf313c2bea98e/beacon_node/beacon_chain/src/beacon_chain.rs#L234-L280

There is a race-condition which might cause the canonical head and fork choice values to be out-of-sync.

I believe this is the cause of #1616. I managed to recreate the issue and produce a database that was unable to sync under the `master` branch but able to sync with this branch.

These new changes solve the issue by ignoring the persisted `canonical_head_block_root` value and instead getting fork choice to generate it. This ensures that the canonical head is in-sync with fork choice.

## Additional Info

This is hotfix method that leaves some crusty code hanging around. Once this PR is merged (to satisfy the v0.2.x users) we should later update and merge #1638 so we can have a clean fix for the v0.3.x versions.
  • Loading branch information
paulhauner committed Sep 22, 2020
1 parent 14ff385 commit bd39cc8
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 93 deletions.
194 changes: 109 additions & 85 deletions beacon_node/beacon_chain/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,12 @@ pub struct BeaconChainBuilder<T: BeaconChainTypes> {
#[allow(clippy::type_complexity)]
store: Option<Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>>,
store_migrator: Option<T::StoreMigrator>,
canonical_head: Option<BeaconSnapshot<T::EthSpec>>,
/// The finalized checkpoint to anchor the chain. May be genesis or a higher
/// checkpoint.
pub finalized_snapshot: Option<BeaconSnapshot<T::EthSpec>>,
pub genesis_time: Option<u64>,
genesis_block_root: Option<Hash256>,
#[allow(clippy::type_complexity)]
fork_choice: Option<
ForkChoice<BeaconForkChoiceStore<T::EthSpec, T::HotStore, T::ColdStore>, T::EthSpec>,
>,
op_pool: Option<OperationPool<T::EthSpec>>,
eth1_chain: Option<Eth1Chain<T::Eth1Chain, T::EthSpec>>,
event_handler: Option<T::EventHandler>,
Expand Down Expand Up @@ -146,9 +147,9 @@ where
Self {
store: None,
store_migrator: None,
canonical_head: None,
finalized_snapshot: None,
genesis_time: None,
genesis_block_root: None,
fork_choice: None,
op_pool: None,
eth1_chain: None,
event_handler: None,
Expand Down Expand Up @@ -278,22 +279,31 @@ where
.to_string()
})?;

self.genesis_block_root = Some(chain.genesis_block_root);
self.head_tracker = Some(
HeadTracker::from_ssz_container(&chain.ssz_head_tracker)
.map_err(|e| format!("Failed to decode head tracker for database: {:?}", e))?,
);
let persisted_fork_choice = store
.get_item::<PersistedForkChoice>(&Hash256::from_slice(&FORK_CHOICE_DB_KEY))
.map_err(|e| format!("DB error when reading persisted fork choice: {:?}", e))?
.ok_or_else(|| "No persisted fork choice present in database.".to_string())?;

let head_block_root = chain.canonical_head_block_root;
let head_block = store
.get_item::<SignedBeaconBlock<TEthSpec>>(&head_block_root)
.map_err(|e| format!("DB error when reading head block: {:?}", e))?
.ok_or_else(|| "Head block not found in store".to_string())?;
let head_state_root = head_block.state_root();
let head_state = store
.get_state(&head_state_root, Some(head_block.slot()))
.map_err(|e| format!("DB error when reading head state: {:?}", e))?
.ok_or_else(|| "Head state not found in store".to_string())?;
let fc_store = BeaconForkChoiceStore::from_persisted(
persisted_fork_choice.fork_choice_store,
store.clone(),
)
.map_err(|e| format!("Unable to load ForkChoiceStore: {:?}", e))?;

let fork_choice =
ForkChoice::from_persisted(persisted_fork_choice.fork_choice, fc_store)
.map_err(|e| format!("Unable to parse persisted fork choice from disk: {:?}", e))?;

let genesis_block = store
.get_item::<SignedBeaconBlock<TEthSpec>>(&chain.genesis_block_root)
.map_err(|e| format!("DB error when reading genesis block: {:?}", e))?
.ok_or_else(|| "Genesis block not found in store".to_string())?;
let genesis_state = store
.get_state(&genesis_block.state_root(), Some(genesis_block.slot()))
.map_err(|e| format!("DB error when reading genesis state: {:?}", e))?
.ok_or_else(|| "Genesis block not found in store".to_string())?;

self.genesis_time = Some(genesis_state.genesis_time);

self.op_pool = Some(
store
Expand All @@ -303,35 +313,16 @@ where
.unwrap_or_else(OperationPool::new),
);

let finalized_block_root = head_state.finalized_checkpoint.root;
let finalized_block = store
.get_item::<SignedBeaconBlock<TEthSpec>>(&finalized_block_root)
.map_err(|e| format!("DB error when reading finalized block: {:?}", e))?
.ok_or_else(|| "Finalized block not found in store".to_string())?;
let finalized_state_root = finalized_block.state_root();
let finalized_state = store
.get_state(&finalized_state_root, Some(finalized_block.slot()))
.map_err(|e| format!("DB error when reading finalized state: {:?}", e))?
.ok_or_else(|| "Finalized state not found in store".to_string())?;

self.finalized_snapshot = Some(BeaconSnapshot {
beacon_block_root: finalized_block_root,
beacon_block: finalized_block,
beacon_state_root: finalized_state_root,
beacon_state: finalized_state,
});

self.canonical_head = Some(BeaconSnapshot {
beacon_block_root: head_block_root,
beacon_block: head_block,
beacon_state_root: head_state_root,
beacon_state: head_state,
});

let pubkey_cache = ValidatorPubkeyCache::load_from_file(pubkey_cache_path)
.map_err(|e| format!("Unable to open persisted pubkey cache: {:?}", e))?;

self.genesis_block_root = Some(chain.genesis_block_root);
self.head_tracker = Some(
HeadTracker::from_ssz_container(&chain.ssz_head_tracker)
.map_err(|e| format!("Failed to decode head tracker for database: {:?}", e))?,
);
self.validator_pubkey_cache = Some(pubkey_cache);
self.fork_choice = Some(fork_choice);

Ok(self)
}
Expand Down Expand Up @@ -374,12 +365,20 @@ where
)
})?;

self.finalized_snapshot = Some(BeaconSnapshot {
let genesis = BeaconSnapshot {
beacon_block_root,
beacon_block,
beacon_state_root,
beacon_state,
});
};

let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store, &genesis);

let fork_choice = ForkChoice::from_genesis(fc_store, &genesis.beacon_block.message)
.map_err(|e| format!("Unable to build initialize ForkChoice: {:?}", e))?;

self.fork_choice = Some(fork_choice);
self.genesis_time = Some(genesis.beacon_state.genesis_time);

Ok(self.empty_op_pool())
}
Expand Down Expand Up @@ -457,23 +456,73 @@ where
.store
.clone()
.ok_or_else(|| "Cannot build without a store.".to_string())?;

// If this beacon chain is being loaded from disk, use the stored head. Otherwise, just use
// the finalized checkpoint (which is probably genesis).
let mut canonical_head = if let Some(head) = self.canonical_head {
head
let mut fork_choice = self
.fork_choice
.ok_or_else(|| "Cannot build without fork choice.".to_string())?;
let genesis_block_root = self
.genesis_block_root
.ok_or_else(|| "Cannot build without a genesis block root".to_string())?;

let current_slot = if slot_clock
.is_prior_to_genesis()
.ok_or_else(|| "Unable to read slot clock".to_string())?
{
self.spec.genesis_slot
} else {
self.finalized_snapshot
.ok_or_else(|| "Cannot build without a state".to_string())?
slot_clock
.now()
.ok_or_else(|| "Unable to read slot".to_string())?
};

let head_block_root = fork_choice
.get_head(current_slot)
.map_err(|e| format!("Unable to get fork choice head: {:?}", e))?;

let head_block = store
.get_item::<SignedBeaconBlock<TEthSpec>>(&head_block_root)
.map_err(|e| format!("DB error when reading head block: {:?}", e))?
.ok_or_else(|| "Head block not found in store".to_string())?;
let head_state_root = head_block.state_root();
let head_state = store
.get_state(&head_state_root, Some(head_block.slot()))
.map_err(|e| format!("DB error when reading head state: {:?}", e))?
.ok_or_else(|| "Head state not found in store".to_string())?;

let mut canonical_head = BeaconSnapshot {
beacon_block_root: head_block_root,
beacon_block: head_block,
beacon_state_root: head_state_root,
beacon_state: head_state,
};

if canonical_head.beacon_block.state_root() != canonical_head.beacon_state_root {
return Err("beacon_block.state_root != beacon_state".to_string());
}

canonical_head
.beacon_state
.build_all_caches(&self.spec)
.map_err(|e| format!("Failed to build state caches: {:?}", e))?;

if canonical_head.beacon_block.state_root() != canonical_head.beacon_state_root {
return Err("beacon_block.state_root != beacon_state".to_string());
// Perform a check to ensure that the finalization points of the head and fork choice are
// consistent.
//
// This is a sanity check to detect database corruption.
let fc_finalized = fork_choice.finalized_checkpoint();
let head_finalized = canonical_head.beacon_state.finalized_checkpoint;
if fc_finalized != head_finalized {
if head_finalized.root == Hash256::zero()
&& head_finalized.epoch == fc_finalized.epoch
&& fc_finalized.root == genesis_block_root
{
// This is a legal edge-case encountered during genesis.
} else {
return Err(format!(
"Database corrupt: fork choice is finalized at {:?} whilst head is finalized at \
{:?}",
fc_finalized, head_finalized
));
}
}

let pubkey_cache_path = self
Expand All @@ -485,26 +534,6 @@ where
.map_err(|e| format!("Unable to init validator pubkey cache: {:?}", e))
})?;

let persisted_fork_choice = store
.get_item::<PersistedForkChoice>(&Hash256::from_slice(&FORK_CHOICE_DB_KEY))
.map_err(|e| format!("DB error when reading persisted fork choice: {:?}", e))?;

let fork_choice = if let Some(persisted) = persisted_fork_choice {
let fc_store =
BeaconForkChoiceStore::from_persisted(persisted.fork_choice_store, store.clone())
.map_err(|e| format!("Unable to load ForkChoiceStore: {:?}", e))?;

ForkChoice::from_persisted(persisted.fork_choice, fc_store)
.map_err(|e| format!("Unable to parse persisted fork choice from disk: {:?}", e))?
} else {
let genesis = &canonical_head;

let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store.clone(), genesis);

ForkChoice::from_genesis(fc_store, &genesis.beacon_block.message)
.map_err(|e| format!("Unable to build initialize ForkChoice: {:?}", e))?
};

let beacon_chain = BeaconChain {
spec: self.spec,
config: self.chain_config,
Expand Down Expand Up @@ -533,9 +562,7 @@ where
eth1_chain: self.eth1_chain,
genesis_validators_root: canonical_head.beacon_state.genesis_validators_root,
canonical_head: TimeoutRwLock::new(canonical_head.clone()),
genesis_block_root: self
.genesis_block_root
.ok_or_else(|| "Cannot build without a genesis block root".to_string())?,
genesis_block_root,
fork_choice: RwLock::new(fork_choice),
event_handler: self
.event_handler
Expand Down Expand Up @@ -634,11 +661,8 @@ where
/// Requires the state to be initialized.
pub fn testing_slot_clock(self, slot_duration: Duration) -> Result<Self, String> {
let genesis_time = self
.finalized_snapshot
.as_ref()
.ok_or_else(|| "testing_slot_clock requires an initialized state")?
.beacon_state
.genesis_time;
.genesis_time
.ok_or_else(|| "testing_slot_clock requires an initialized state")?;

let slot_clock = TestingSlotClock::new(
Slot::new(0),
Expand Down
7 changes: 7 additions & 0 deletions beacon_node/beacon_chain/src/persisted_beacon_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ use types::Hash256;

#[derive(Clone, Encode, Decode)]
pub struct PersistedBeaconChain {
/// This value is ignored to resolve the issue described here:
///
/// https://github.com/sigp/lighthouse/pull/1639
///
/// The following PR will clean-up and remove this field:
///
/// https://github.com/sigp/lighthouse/pull/1638
pub canonical_head_block_root: Hash256,
pub genesis_block_root: Hash256,
pub ssz_head_tracker: SszHeadTracker,
Expand Down
7 changes: 5 additions & 2 deletions beacon_node/beacon_chain/tests/persistence_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,11 @@ fn assert_chains_pretty_much_the_same<T: BeaconChainTypes>(a: &BeaconChain<T>, b
a.genesis_block_root, b.genesis_block_root,
"genesis_block_root should be equal"
);

let slot = a.slot().unwrap();
assert!(
*a.fork_choice.read() == *b.fork_choice.read(),
"fork_choice should be equal"
a.fork_choice.write().get_head(slot).unwrap()
== b.fork_choice.write().get_head(slot).unwrap(),
"fork_choice heads should be equal"
);
}
7 changes: 2 additions & 5 deletions beacon_node/client/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,11 +713,8 @@ where
.ok_or_else(|| "system_time_slot_clock requires a beacon_chain_builder")?;

let genesis_time = beacon_chain_builder
.finalized_snapshot
.as_ref()
.ok_or_else(|| "system_time_slot_clock requires an initialized beacon state")?
.beacon_state
.genesis_time;
.genesis_time
.ok_or_else(|| "system_time_slot_clock requires an initialized beacon state")?;

let spec = self
.chain_spec
Expand Down
8 changes: 7 additions & 1 deletion consensus/fork_choice/src/fork_choice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use std::marker::PhantomData;
use proto_array::{Block as ProtoBlock, ProtoArrayForkChoice};
use ssz_derive::{Decode, Encode};
use types::{
BeaconBlock, BeaconState, BeaconStateError, Epoch, EthSpec, Hash256, IndexedAttestation, Slot,
BeaconBlock, BeaconState, BeaconStateError, Checkpoint, Epoch, EthSpec, Hash256,
IndexedAttestation, Slot,
};

use crate::ForkChoiceStore;
Expand Down Expand Up @@ -758,6 +759,11 @@ where
.is_descendant(self.fc_store.finalized_checkpoint().root, block_root)
}

/// Return the current finalized checkpoint.
pub fn finalized_checkpoint(&self) -> Checkpoint {
*self.fc_store.finalized_checkpoint()
}

/// Returns the latest message for a given validator, if any.
///
/// Returns `(block_root, block_slot)`.
Expand Down

0 comments on commit bd39cc8

Please sign in to comment.