From 30936c5abdabd1afa3fdb1c4e32299e4a2be753f Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Mon, 1 Jul 2024 18:38:28 +0300 Subject: [PATCH 1/3] Optimize allocations during finalization --- Cargo.lock | 1 + substrate/client/api/src/leaves.rs | 81 +++++++++---------- substrate/client/db/src/lib.rs | 50 +++++------- substrate/primitives/blockchain/Cargo.toml | 1 + .../primitives/blockchain/src/backend.rs | 41 ++++++---- .../blockchain/src/header_metadata.rs | 26 +++--- 6 files changed, 103 insertions(+), 97 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1785510714bc..4bd70905aee4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19673,6 +19673,7 @@ dependencies = [ "schnellru", "sp-api", "sp-consensus", + "sp-core", "sp-database", "sp-runtime", "sp-state-machine", diff --git a/substrate/client/api/src/leaves.rs b/substrate/client/api/src/leaves.rs index e129de8bf3fa..70efe8b19c62 100644 --- a/substrate/client/api/src/leaves.rs +++ b/substrate/client/api/src/leaves.rs @@ -45,33 +45,20 @@ pub struct RemoveOutcome { } /// Removed leaves after a finalization action. -pub struct FinalizationOutcome { - removed: BTreeMap, Vec>, +pub struct FinalizationOutcome +where + I: Iterator, +{ + removed: I, } -impl FinalizationOutcome { - /// Merge with another. This should only be used for displaced items that - /// are produced within one transaction of each other. - pub fn merge(&mut self, mut other: Self) { - // this will ignore keys that are in duplicate, however - // if these are actually produced correctly via the leaf-set within - // one transaction, then there will be no overlap in the keys. - self.removed.append(&mut other.removed); - } - - /// Iterate over all displaced leaves. - pub fn leaves(&self) -> impl Iterator { - self.removed.values().flatten() - } - +impl FinalizationOutcome +where + I: Iterator, +{ /// Constructor - pub fn new(new_displaced: impl Iterator) -> Self { - let mut removed = BTreeMap::, Vec>::new(); - for (hash, number) in new_displaced { - removed.entry(Reverse(number)).or_default().push(hash); - } - - FinalizationOutcome { removed } + pub fn new(new_displaced: I) -> Self { + FinalizationOutcome { removed: new_displaced } } } @@ -86,7 +73,7 @@ pub struct LeafSet { impl LeafSet where H: Clone + PartialEq + Decode + Encode, - N: std::fmt::Debug + Clone + AtLeast32Bit + Decode + Encode, + N: std::fmt::Debug + Copy + AtLeast32Bit + Decode + Encode, { /// Construct a new, blank leaf set. pub fn new() -> Self { @@ -117,13 +104,13 @@ where let number = Reverse(number); let removed = if number.0 != N::zero() { - let parent_number = Reverse(number.0.clone() - N::one()); + let parent_number = Reverse(number.0 - N::one()); self.remove_leaf(&parent_number, &parent_hash).then(|| parent_hash) } else { None }; - self.insert_leaf(number.clone(), hash.clone()); + self.insert_leaf(number, hash.clone()); ImportOutcome { inserted: LeafSetItem { hash, number }, removed } } @@ -150,7 +137,7 @@ where let inserted = parent_hash.and_then(|parent_hash| { if number.0 != N::zero() { - let parent_number = Reverse(number.0.clone() - N::one()); + let parent_number = Reverse(number.0 - N::one()); self.insert_leaf(parent_number, parent_hash.clone()); Some(parent_hash) } else { @@ -162,11 +149,12 @@ where } /// Remove all leaves displaced by the last block finalization. - pub fn remove_displaced_leaves(&mut self, displaced_leaves: &FinalizationOutcome) { - for (number, hashes) in &displaced_leaves.removed { - for hash in hashes.iter() { - self.remove_leaf(number, hash); - } + pub fn remove_displaced_leaves(&mut self, displaced_leaves: FinalizationOutcome) + where + I: Iterator, + { + for (number, hash) in displaced_leaves.removed { + self.remove_leaf(&Reverse(number), &hash); } } @@ -186,13 +174,13 @@ where let items = self .storage .iter() - .flat_map(|(number, hashes)| hashes.iter().map(move |h| (h.clone(), number.clone()))) + .flat_map(|(number, hashes)| hashes.iter().map(move |h| (h.clone(), *number))) .collect::>(); - for (hash, number) in &items { + for (hash, number) in items { if number.0 > best_number { assert!( - self.remove_leaf(number, hash), + self.remove_leaf(&number, &hash), "item comes from an iterator over storage; qed", ); } @@ -207,7 +195,7 @@ where // we need to make sure that the best block exists in the leaf set as // this is an invariant of regular block import. if !leaves_contains_best { - self.insert_leaf(best_number.clone(), best_hash.clone()); + self.insert_leaf(best_number, best_hash.clone()); } } @@ -229,7 +217,7 @@ where column: u32, prefix: &[u8], ) { - let leaves: Vec<_> = self.storage.iter().map(|(n, h)| (n.0.clone(), h.clone())).collect(); + let leaves: Vec<_> = self.storage.iter().map(|(n, h)| (n.0, h.clone())).collect(); tx.set_from_vec(column, prefix, leaves.encode()); } @@ -274,7 +262,7 @@ where /// Returns the highest leaf and all hashes associated to it. pub fn highest_leaf(&self) -> Option<(N, &[H])> { - self.storage.iter().next().map(|(k, v)| (k.0.clone(), &v[..])) + self.storage.iter().next().map(|(k, v)| (k.0, &v[..])) } } @@ -286,13 +274,13 @@ pub struct Undo<'a, H: 'a, N: 'a> { impl<'a, H: 'a, N: 'a> Undo<'a, H, N> where H: Clone + PartialEq + Decode + Encode, - N: std::fmt::Debug + Clone + AtLeast32Bit + Decode + Encode, + N: std::fmt::Debug + Copy + AtLeast32Bit + Decode + Encode, { /// Undo an imported block by providing the import operation outcome. /// No additional operations should be performed between import and undo. pub fn undo_import(&mut self, outcome: ImportOutcome) { if let Some(removed_hash) = outcome.removed { - let removed_number = Reverse(outcome.inserted.number.0.clone() - N::one()); + let removed_number = Reverse(outcome.inserted.number.0 - N::one()); self.inner.insert_leaf(removed_number, removed_hash); } self.inner.remove_leaf(&outcome.inserted.number, &outcome.inserted.hash); @@ -302,7 +290,7 @@ where /// No additional operations should be performed between remove and undo. pub fn undo_remove(&mut self, outcome: RemoveOutcome) { if let Some(inserted_hash) = outcome.inserted { - let inserted_number = Reverse(outcome.removed.number.0.clone() - N::one()); + let inserted_number = Reverse(outcome.removed.number.0 - N::one()); self.inner.remove_leaf(&inserted_number, &inserted_hash); } self.inner.insert_leaf(outcome.removed.number, outcome.removed.hash); @@ -310,8 +298,13 @@ where /// Undo a finalization operation by providing the displaced leaves. /// No additional operations should be performed between finalization and undo. - pub fn undo_finalization(&mut self, mut outcome: FinalizationOutcome) { - self.inner.storage.append(&mut outcome.removed); + pub fn undo_finalization(&mut self, outcome: FinalizationOutcome) + where + I: Iterator, + { + for (number, hash) in outcome.removed { + self.inner.storage.entry(Reverse(number)).or_default().push(hash); + } } } diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index afcd560dba85..c30be837c54a 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -1814,10 +1814,10 @@ impl Backend { } let new_displaced = self.blockchain.displaced_leaves_after_finalizing(f_hash, f_num)?; - let finalization_outcome = - FinalizationOutcome::new(new_displaced.displaced_leaves.clone().into_iter()); - self.blockchain.leaves.write().remove_displaced_leaves(&finalization_outcome); + self.blockchain.leaves.write().remove_displaced_leaves(FinalizationOutcome::new( + new_displaced.displaced_leaves.iter().copied(), + )); self.prune_blocks(transaction, f_num, &new_displaced, current_transaction_justifications)?; @@ -1871,11 +1871,9 @@ impl Backend { displaced: &DisplacedLeavesAfterFinalization, ) -> ClientResult<()> { // Discard all blocks from displaced branches - for (_, tree_route) in displaced.tree_routes.iter() { - for r in tree_route.retracted() { - self.blockchain.insert_persisted_body_if_pinned(r.hash)?; - self.prune_block(transaction, BlockId::::hash(r.hash))?; - } + for &hash in displaced.displaced_blocks.iter() { + self.blockchain.insert_persisted_body_if_pinned(hash)?; + self.prune_block(transaction, BlockId::::hash(hash))?; } Ok(()) } @@ -3149,91 +3147,87 @@ pub(crate) mod tests { let d1 = insert_header(&backend, 2, b1, None, H256::from([3; 32])); let d2 = insert_header(&backend, 3, d1, None, Default::default()); { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a3, b2]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a3, b2]).unwrap().unwrap(); assert_eq!(lca.hash, block0); assert_eq!(lca.number, 0); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a1, a3]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a1, a3]).unwrap().unwrap(); assert_eq!(lca.hash, a1); assert_eq!(lca.number, 1); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a3, a1]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a3, a1]).unwrap().unwrap(); assert_eq!(lca.hash, a1); assert_eq!(lca.number, 1); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a2, a3]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a3]).unwrap().unwrap(); assert_eq!(lca.hash, a2); assert_eq!(lca.number, 2); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a2, a1]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a1]).unwrap().unwrap(); assert_eq!(lca.hash, a1); assert_eq!(lca.number, 1); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a2, a2]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a2]).unwrap().unwrap(); assert_eq!(lca.hash, a2); assert_eq!(lca.number, 2); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a3, d2, c2]) - .unwrap() - .unwrap(); + let lca = + lowest_common_ancestor_multiblock(blockchain, &[a3, d2, c2]).unwrap().unwrap(); assert_eq!(lca.hash, block0); assert_eq!(lca.number, 0); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![c2, d2, b2]) - .unwrap() - .unwrap(); + let lca = + lowest_common_ancestor_multiblock(blockchain, &[c2, d2, b2]).unwrap().unwrap(); assert_eq!(lca.hash, b1); assert_eq!(lca.number, 1); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a1, a2, a3]) - .unwrap() - .unwrap(); + let lca = + lowest_common_ancestor_multiblock(blockchain, &[a1, a2, a3]).unwrap().unwrap(); assert_eq!(lca.hash, a1); assert_eq!(lca.number, 1); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![b1, b2, d1]) - .unwrap() - .unwrap(); + let lca = + lowest_common_ancestor_multiblock(blockchain, &[b1, b2, d1]).unwrap().unwrap(); assert_eq!(lca.hash, b1); assert_eq!(lca.number, 1); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![]); + let lca = lowest_common_ancestor_multiblock(blockchain, &[]); assert_eq!(true, matches!(lca, Ok(None))); } { - let lca = lowest_common_ancestor_multiblock(blockchain, vec![a1]).unwrap().unwrap(); + let lca = lowest_common_ancestor_multiblock(blockchain, &[a1]).unwrap().unwrap(); assert_eq!(lca.hash, a1); assert_eq!(lca.number, 1); diff --git a/substrate/primitives/blockchain/Cargo.toml b/substrate/primitives/blockchain/Cargo.toml index 5e51a2d06ed7..4d33e452dd46 100644 --- a/substrate/primitives/blockchain/Cargo.toml +++ b/substrate/primitives/blockchain/Cargo.toml @@ -24,6 +24,7 @@ parking_lot = "0.12.1" schnellru = "0.2.1" thiserror = { workspace = true } sp-api = { path = "../api" } +sp-core = { path = "../core" } sp-consensus = { path = "../consensus/common" } sp-database = { path = "../database" } sp-runtime = { path = "../runtime" } diff --git a/substrate/primitives/blockchain/src/backend.rs b/substrate/primitives/blockchain/src/backend.rs index 123e0e6b4895..3b6a51df67f5 100644 --- a/substrate/primitives/blockchain/src/backend.rs +++ b/substrate/primitives/blockchain/src/backend.rs @@ -24,12 +24,12 @@ use sp_runtime::{ traits::{Block as BlockT, CheckedSub, Header as HeaderT, NumberFor, Zero}, Justifications, }; -use std::collections::{btree_map::BTreeMap, btree_set::BTreeSet}; +use std::collections::btree_set::BTreeSet; use crate::{ error::{Error, Result}, header_metadata::{self, HeaderMetadata}, - lowest_common_ancestor_multiblock, tree_route, TreeRoute, + lowest_common_ancestor_multiblock, tree_route, }; /// Blockchain database header backend. Does not perform any validation. @@ -240,7 +240,7 @@ pub trait Backend: ))?; let leaf_block_header = self.expect_header(*first_leaf)?; - // If the distance between the leafs and the finalized block is large, calculating + // If the distance between the leafs and the finalized block is large, calculating // tree routes can be very expensive. In that case, we will try to find the // lowest common ancestor between all the leaves. The assumption here is that the forks are // close to the tip and not long. So the LCA can be computed from the header cache. If the @@ -252,15 +252,21 @@ pub trait Backend: .unwrap_or(0u32.into()) > header_metadata::LRU_CACHE_SIZE.into() { - if let Some(lca) = lowest_common_ancestor_multiblock(self, leaves.clone())? { + if let Some(lca) = lowest_common_ancestor_multiblock(self, &leaves)? { if lca.number > finalized_block_number { return Ok(result) } else { - log::warn!("The distance between leafs and finalized block is large. Finalization can take a long time."); + warn!( + "The distance between leafs and finalized block is large. Finalization \ + can take a long time." + ); } }; } + result.displaced_leaves.reserve_exact(leaves.len()); + result.displaced_blocks.reserve_exact(leaves.len()); + // For each leaf determine whether it belongs to a non-canonical branch. for leaf_hash in leaves { let leaf_block_header = self.expect_header(leaf_hash)?; @@ -279,38 +285,43 @@ pub trait Backend: let needs_pruning = leaf_tree_route.common_block().hash != finalized_block_hash; if needs_pruning { - result.displaced_leaves.insert(leaf_hash, leaf_number); - result.tree_routes.insert(leaf_hash, leaf_tree_route); + result.displaced_leaves.push((leaf_number, leaf_hash)); + result.displaced_blocks.extend( + leaf_tree_route + .retracted() + .into_iter() + .map(|hash_and_number| hash_and_number.hash), + ); } } Ok(result) } - /// Clears the block gap from DB after the fast-sync. - fn clear_block_gap(&self) -> Result<()>; + /// Clears the block gap from DB after the fast-sync. + fn clear_block_gap(&self) -> Result<()>; } /// Result of [`Backend::displaced_leaves_after_finalizing`]. #[derive(Clone, Debug)] pub struct DisplacedLeavesAfterFinalization { - /// A collection of hashes and block numbers for displaced leaves. - pub displaced_leaves: BTreeMap>, + /// A list of hashes and block numbers of displaced leaves. + pub displaced_leaves: Vec<(NumberFor, Block::Hash)>, - /// A collection of tree routes from the leaves to finalized block. - pub tree_routes: BTreeMap>, + /// A list of hashes displaced blocks from all displaced leaves. + pub displaced_blocks: Vec, } impl Default for DisplacedLeavesAfterFinalization { fn default() -> Self { - Self { displaced_leaves: Default::default(), tree_routes: Default::default() } + Self { displaced_leaves: Vec::new(), displaced_blocks: Vec::new() } } } impl DisplacedLeavesAfterFinalization { /// Returns a collection of hashes for the displaced leaves. pub fn hashes(&self) -> impl Iterator + '_ { - self.displaced_leaves.keys().cloned() + self.displaced_leaves.iter().map(|(_, hash)| *hash) } } diff --git a/substrate/primitives/blockchain/src/header_metadata.rs b/substrate/primitives/blockchain/src/header_metadata.rs index c2054445b067..84955746f8a0 100644 --- a/substrate/primitives/blockchain/src/header_metadata.rs +++ b/substrate/primitives/blockchain/src/header_metadata.rs @@ -20,12 +20,14 @@ use parking_lot::RwLock; use schnellru::{ByLength, LruMap}; +use sp_core::U256; use sp_runtime::traits::{Block as BlockT, Header, NumberFor, One}; +use sp_runtime::Saturating; /// Set to the expected max difference between `best` and `finalized` blocks at sync. pub(crate) const LRU_CACHE_SIZE: u32 = 5_000; -/// Get lowest common ancestor between two blocks in the tree. +/// Get the lowest common ancestor between two blocks in the tree. /// /// This implementation is efficient because our trees have very few and /// small branches, and because of our current query pattern: @@ -96,16 +98,18 @@ pub fn lowest_common_ancestor + ?Sized>( Ok(HashAndNumber { hash: header_one.hash, number: header_one.number }) } -/// Get lowest common ancestor between multiple blocks. +/// Get the lowest common ancestor between multiple blocks. +/// +/// Returns `Ok(None)` only when input list is empty. pub fn lowest_common_ancestor_multiblock + ?Sized>( backend: &T, - hashes: Vec, + hashes: &[Block::Hash], ) -> Result>, T::Error> { // Ensure the list of hashes is not empty - let mut hashes_iter = hashes.into_iter(); + let mut hashes_iter = hashes.iter(); let first_hash = match hashes_iter.next() { - Some(hash) => hash, + Some(hash) => *hash, None => return Ok(None), }; @@ -114,7 +118,7 @@ pub fn lowest_common_ancestor_multiblock let mut lca = HashAndNumber { number: first_cached.number, hash: first_cached.hash }; for hash in hashes_iter { // Calculate the LCA of the current LCA and the next hash - lca = lowest_common_ancestor(backend, lca.hash, hash)?; + lca = lowest_common_ancestor(backend, lca.hash, *hash)?; } Ok(Some(lca)) @@ -129,15 +133,16 @@ pub fn tree_route + ?Sized>( let mut from = backend.header_metadata(from)?; let mut to = backend.header_metadata(to)?; - let mut from_branch = Vec::new(); - let mut to_branch = Vec::new(); - + let mut to_branch = + Vec::with_capacity(Into::::into(to.number.saturating_sub(from.number)).as_usize()); while to.number > from.number { to_branch.push(HashAndNumber { number: to.number, hash: to.hash }); to = backend.header_metadata(to.parent)?; } + let mut from_branch = + Vec::with_capacity(Into::::into(to.number.saturating_sub(from.number)).as_usize()); while from.number > to.number { from_branch.push(HashAndNumber { number: from.number, hash: from.hash }); from = backend.header_metadata(from.parent)?; @@ -156,6 +161,7 @@ pub fn tree_route + ?Sized>( // add the pivot block. and append the reversed to-branch // (note that it's reverse order originals) let pivot = from_branch.len(); + from_branch.reserve_exact(to_branch.len() + 1); from_branch.push(HashAndNumber { number: to.number, hash: to.hash }); from_branch.extend(to_branch.into_iter().rev()); @@ -173,7 +179,7 @@ pub struct HashAndNumber { /// A tree-route from one block to another in the chain. /// -/// All blocks prior to the pivot in the deque is the reverse-order unique ancestry +/// All blocks prior to the pivot in the vector is the reverse-order unique ancestry /// of the first block, the block at the pivot index is the common ancestor, /// and all blocks after the pivot is the ancestry of the second block, in /// order. From 160bdc0d504feab8ebd797ea577ac1c4e918654d Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 2 Jul 2024 05:17:27 +0300 Subject: [PATCH 2/3] Replace `displaced_leaves_after_finalizing` implementation with more efficient --- substrate/client/db/src/lib.rs | 183 +++++++++--------- .../primitives/blockchain/src/backend.rs | 145 ++++++++------ .../blockchain/src/header_metadata.rs | 26 --- 3 files changed, 177 insertions(+), 177 deletions(-) diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index c30be837c54a..b978ce261e09 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -2558,7 +2558,7 @@ pub(crate) mod tests { backend::{Backend as BTrait, BlockImportOperation as Op}, blockchain::Backend as BLBTrait, }; - use sp_blockchain::{lowest_common_ancestor, lowest_common_ancestor_multiblock, tree_route}; + use sp_blockchain::{lowest_common_ancestor, tree_route}; use sp_core::H256; use sp_runtime::{ testing::{Block as RawBlock, ExtrinsicWrapper, Header}, @@ -3120,117 +3120,118 @@ pub(crate) mod tests { } #[test] - fn lowest_common_ancestors_multiblock_works() { + fn displaced_leaves_after_finalizing_works() { let backend = Backend::::new_test(1000, 100); let blockchain = backend.blockchain(); - let block0 = insert_header(&backend, 0, Default::default(), None, Default::default()); + let genesis_number = 0; + let genesis_hash = + insert_header(&backend, genesis_number, Default::default(), None, Default::default()); // fork from genesis: 3 prong. // block 0 -> a1 -> a2 -> a3 - // | - // -> b1 -> b2 -> c1 -> c2 - // | - // -> d1 -> d2 - let a1 = insert_header(&backend, 1, block0, None, Default::default()); - let a2 = insert_header(&backend, 2, a1, None, Default::default()); - let a3 = insert_header(&backend, 3, a2, None, Default::default()); - - // fork from genesis: 2 prong. - let b1 = insert_header(&backend, 1, block0, None, H256::from([1; 32])); - let b2 = insert_header(&backend, 2, b1, None, Default::default()); - - // fork from b2. - let c1 = insert_header(&backend, 3, b2, None, H256::from([2; 32])); - let c2 = insert_header(&backend, 4, c1, None, Default::default()); - - // fork from b1. - let d1 = insert_header(&backend, 2, b1, None, H256::from([3; 32])); - let d2 = insert_header(&backend, 3, d1, None, Default::default()); - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a3, b2]).unwrap().unwrap(); - - assert_eq!(lca.hash, block0); - assert_eq!(lca.number, 0); - } - - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a1, a3]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); - } - - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a3, a1]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); - } + // \ + // -> b1 -> b2 -> c1 -> c2 + // \ + // -> d1 -> d2 + let a1_number = 1; + let a1_hash = insert_header(&backend, a1_number, genesis_hash, None, Default::default()); + let a2_number = 2; + let a2_hash = insert_header(&backend, a2_number, a1_hash, None, Default::default()); + let a3_number = 3; + let a3_hash = insert_header(&backend, a3_number, a2_hash, None, Default::default()); { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a3]).unwrap().unwrap(); - - assert_eq!(lca.hash, a2); - assert_eq!(lca.number, 2); - } - - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a1]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); + let displaced = blockchain + .displaced_leaves_after_finalizing(genesis_hash, genesis_number) + .unwrap(); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); } - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a2]).unwrap().unwrap(); - - assert_eq!(lca.hash, a2); - assert_eq!(lca.number, 2); + let displaced_a1 = + blockchain.displaced_leaves_after_finalizing(a1_hash, a1_number).unwrap(); + assert_eq!(displaced_a1.displaced_leaves, vec![]); + assert_eq!(displaced_a1.displaced_blocks, vec![]); + + let displaced_a2 = + blockchain.displaced_leaves_after_finalizing(a2_hash, a3_number).unwrap(); + assert_eq!(displaced_a2.displaced_leaves, vec![]); + assert_eq!(displaced_a2.displaced_blocks, vec![]); + + let displaced_a3 = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(displaced_a3.displaced_leaves, vec![]); + assert_eq!(displaced_a3.displaced_blocks, vec![]); } - { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[a3, d2, c2]).unwrap().unwrap(); - - assert_eq!(lca.hash, block0); - assert_eq!(lca.number, 0); - } + // fork from genesis: 2 prong. + let b1_number = 1; + let b1_hash = insert_header(&backend, b1_number, genesis_hash, None, H256::from([1; 32])); + let b2_number = 2; + let b2_hash = insert_header(&backend, b2_number, b1_hash, None, Default::default()); - { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[c2, d2, b2]).unwrap().unwrap(); + // fork from b2. + let c1_number = 3; + let c1_hash = insert_header(&backend, c1_number, b2_hash, None, H256::from([2; 32])); + let c2_number = 4; + let c2_hash = insert_header(&backend, c2_number, c1_hash, None, Default::default()); - assert_eq!(lca.hash, b1); - assert_eq!(lca.number, 1); - } + // fork from b1. + let d1_number = 2; + let d1_hash = insert_header(&backend, d1_number, b1_hash, None, H256::from([3; 32])); + let d2_number = 3; + let d2_hash = insert_header(&backend, d2_number, d1_hash, None, Default::default()); { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[a1, a2, a3]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); + let displaced_a1 = + blockchain.displaced_leaves_after_finalizing(a1_hash, a1_number).unwrap(); + assert_eq!( + displaced_a1.displaced_leaves, + vec![(c2_number, c2_hash), (d2_number, d2_hash)] + ); + let mut displaced_blocks = vec![b1_hash, b2_hash, c1_hash, c2_hash, d1_hash, d2_hash]; + displaced_blocks.sort(); + assert_eq!(displaced_a1.displaced_blocks, displaced_blocks); + + let displaced_a2 = + blockchain.displaced_leaves_after_finalizing(a2_hash, a2_number).unwrap(); + assert_eq!(displaced_a1.displaced_leaves, displaced_a2.displaced_leaves); + assert_eq!(displaced_a1.displaced_blocks, displaced_a2.displaced_blocks); + + let displaced_a3 = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(displaced_a1.displaced_leaves, displaced_a3.displaced_leaves); + assert_eq!(displaced_a1.displaced_blocks, displaced_a3.displaced_blocks); } - { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[b1, b2, d1]).unwrap().unwrap(); - - assert_eq!(lca.hash, b1); - assert_eq!(lca.number, 1); + let displaced = + blockchain.displaced_leaves_after_finalizing(b1_hash, b1_number).unwrap(); + assert_eq!(displaced.displaced_leaves, vec![(a3_number, a3_hash)]); + let mut displaced_blocks = vec![a1_hash, a2_hash, a3_hash]; + displaced_blocks.sort(); + assert_eq!(displaced.displaced_blocks, displaced_blocks); } - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[]); - - assert_eq!(true, matches!(lca, Ok(None))); + let displaced = + blockchain.displaced_leaves_after_finalizing(b2_hash, b2_number).unwrap(); + assert_eq!( + displaced.displaced_leaves, + vec![(a3_number, a3_hash), (d2_number, d2_hash)] + ); + let mut displaced_blocks = vec![a1_hash, a2_hash, a3_hash, d1_hash, d2_hash]; + displaced_blocks.sort(); + assert_eq!(displaced.displaced_blocks, displaced_blocks); } - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a1]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); + let displaced = + blockchain.displaced_leaves_after_finalizing(c2_hash, c2_number).unwrap(); + assert_eq!( + displaced.displaced_leaves, + vec![(a3_number, a3_hash), (d2_number, d2_hash)] + ); + let mut displaced_blocks = vec![a1_hash, a2_hash, a3_hash, d1_hash, d2_hash]; + displaced_blocks.sort(); + assert_eq!(displaced.displaced_blocks, displaced_blocks); } } diff --git a/substrate/primitives/blockchain/src/backend.rs b/substrate/primitives/blockchain/src/backend.rs index 3b6a51df67f5..fc47413c24c2 100644 --- a/substrate/primitives/blockchain/src/backend.rs +++ b/substrate/primitives/blockchain/src/backend.rs @@ -21,15 +21,15 @@ use log::warn; use parking_lot::RwLock; use sp_runtime::{ generic::BlockId, - traits::{Block as BlockT, CheckedSub, Header as HeaderT, NumberFor, Zero}, + traits::{Block as BlockT, Header as HeaderT, NumberFor, One, Zero}, Justifications, }; -use std::collections::btree_set::BTreeSet; +use std::collections::{btree_set::BTreeSet, HashMap, VecDeque}; use crate::{ error::{Error, Result}, - header_metadata::{self, HeaderMetadata}, - lowest_common_ancestor_multiblock, tree_route, + header_metadata::HeaderMetadata, + tree_route, CachedHeaderMetadata, }; /// Blockchain database header backend. Does not perform any validation. @@ -226,76 +226,101 @@ pub trait Backend: finalized_block_hash: Block::Hash, finalized_block_number: NumberFor, ) -> std::result::Result, Error> { - let mut result = DisplacedLeavesAfterFinalization::default(); - let leaves = self.leaves()?; // If we have only one leaf there are no forks, and we can return early. if finalized_block_number == Zero::zero() || leaves.len() == 1 { - return Ok(result) + return Ok(DisplacedLeavesAfterFinalization::default()) } - let first_leaf = leaves.first().ok_or(Error::Backend( - "Unable to find any leaves. This should not happen.".to_string(), - ))?; - let leaf_block_header = self.expect_header(*first_leaf)?; - - // If the distance between the leafs and the finalized block is large, calculating - // tree routes can be very expensive. In that case, we will try to find the - // lowest common ancestor between all the leaves. The assumption here is that the forks are - // close to the tip and not long. So the LCA can be computed from the header cache. If the - // LCA is above the finalized block, we know that there are no displaced leaves by the - // finalization. - if leaf_block_header - .number() - .checked_sub(&finalized_block_number) - .unwrap_or(0u32.into()) > - header_metadata::LRU_CACHE_SIZE.into() - { - if let Some(lca) = lowest_common_ancestor_multiblock(self, &leaves)? { - if lca.number > finalized_block_number { - return Ok(result) - } else { - warn!( - "The distance between leafs and finalized block is large. Finalization \ - can take a long time." - ); - } - }; - } + // Store hashes of finalized blocks for quick checking later, the last block if the + // finalized one + let mut finalized_chain = VecDeque::new(); + finalized_chain.push_front(self.header_metadata(finalized_block_hash)?); + + // Local cache is a performance optimization in case of finalized block deep below the + // tip of the chain with a lot of leaves above finalized block + let mut local_cache = HashMap::>::new(); - result.displaced_leaves.reserve_exact(leaves.len()); - result.displaced_blocks.reserve_exact(leaves.len()); + let mut result = DisplacedLeavesAfterFinalization { + displaced_leaves: Vec::with_capacity(leaves.len()), + displaced_blocks: Vec::with_capacity(leaves.len()), + }; + let mut displaced_blocks_candidates = Vec::new(); - // For each leaf determine whether it belongs to a non-canonical branch. for leaf_hash in leaves { - let leaf_block_header = self.expect_header(leaf_hash)?; - let leaf_number = *leaf_block_header.number(); + let mut current_header_metadata = self.header_metadata(leaf_hash)?; + let leaf_number = current_header_metadata.number; + + // Collect all block hashes until the height of the finalized block + displaced_blocks_candidates.clear(); + while current_header_metadata.number > finalized_block_number { + displaced_blocks_candidates.push(current_header_metadata.hash); + + let parent_hash = current_header_metadata.parent; + match local_cache.get(&parent_hash) { + Some(metadata_header) => { + current_header_metadata = metadata_header.clone(); + }, + None => { + current_header_metadata = self.header_metadata(parent_hash)?; + // Cache locally in case more branches above finalized block reference + // the same block hash + local_cache.insert(parent_hash, current_header_metadata.clone()); + }, + } + } - let leaf_tree_route = match tree_route(self, leaf_hash, finalized_block_hash) { - Ok(tree_route) => tree_route, - Err(Error::UnknownBlock(_)) => { - // Sometimes routes can't be calculated. E.g. after warp sync. + // If points back to the finalized header then nothing left to do, this leaf will be + // checked again later + if current_header_metadata.hash == finalized_block_hash { + continue; + } + + // Otherwise the whole leaf branch needs to be pruned, track it all the way to the + // point of branching from the finalized chain + result.displaced_leaves.push((leaf_number, leaf_hash)); + result.displaced_blocks.extend(displaced_blocks_candidates.drain(..)); + result.displaced_blocks.push(current_header_metadata.hash); + // Collect the rest of the displaced blocks of leaf branch + for distance_from_finalized in 1_u32.. { + // Find block at `distance_from_finalized` from finalized block + let (finalized_chain_block_number, finalized_chain_block_hash) = + match finalized_chain.iter().rev().nth(distance_from_finalized as usize) { + Some(header) => (header.number, header.hash), + None => { + let header = self.header_metadata( + finalized_chain.front().expect("Not empty; qed").parent, + )?; + let result = (header.number, header.hash); + finalized_chain.push_front(header); + result + }, + }; + + if current_header_metadata.number < finalized_chain_block_number + One::one() { + // Skip more blocks until we get all blocks on finalized chain until the height + // of the parent block continue; - }, - Err(e) => Err(e)?, - }; - - // Is it a stale fork? - let needs_pruning = leaf_tree_route.common_block().hash != finalized_block_hash; - - if needs_pruning { - result.displaced_leaves.push((leaf_number, leaf_hash)); - result.displaced_blocks.extend( - leaf_tree_route - .retracted() - .into_iter() - .map(|hash_and_number| hash_and_number.hash), - ); + } + + let parent_hash = current_header_metadata.parent; + if finalized_chain_block_hash == parent_hash { + // Reached finalized chain, nothing left to do + break; + } + + // Store displaced block and look deeper for block on finalized chain + result.displaced_blocks.push(parent_hash); + current_header_metadata = self.header_metadata(parent_hash)?; } } - Ok(result) + // There could be duplicates shared by multiple branches, clean them up + result.displaced_blocks.sort_unstable(); + result.displaced_blocks.dedup_by_key(|hash| *hash); + + return Ok(result); } /// Clears the block gap from DB after the fast-sync. diff --git a/substrate/primitives/blockchain/src/header_metadata.rs b/substrate/primitives/blockchain/src/header_metadata.rs index 84955746f8a0..43f9ac84db0f 100644 --- a/substrate/primitives/blockchain/src/header_metadata.rs +++ b/substrate/primitives/blockchain/src/header_metadata.rs @@ -98,32 +98,6 @@ pub fn lowest_common_ancestor + ?Sized>( Ok(HashAndNumber { hash: header_one.hash, number: header_one.number }) } -/// Get the lowest common ancestor between multiple blocks. -/// -/// Returns `Ok(None)` only when input list is empty. -pub fn lowest_common_ancestor_multiblock + ?Sized>( - backend: &T, - hashes: &[Block::Hash], -) -> Result>, T::Error> { - // Ensure the list of hashes is not empty - let mut hashes_iter = hashes.iter(); - - let first_hash = match hashes_iter.next() { - Some(hash) => *hash, - None => return Ok(None), - }; - - // Start with the first hash as the initial LCA - let first_cached = backend.header_metadata(first_hash)?; - let mut lca = HashAndNumber { number: first_cached.number, hash: first_cached.hash }; - for hash in hashes_iter { - // Calculate the LCA of the current LCA and the next hash - lca = lowest_common_ancestor(backend, lca.hash, *hash)?; - } - - Ok(Some(lca)) -} - /// Compute a tree-route between two blocks. See tree-route docs for more details. pub fn tree_route + ?Sized>( backend: &T, From 9eae85c8574f16ba96ea988430f56b5f86fc48ba Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 2 Jul 2024 08:02:13 +0300 Subject: [PATCH 3/3] Optimize pruning during batch finalization of blocks --- substrate/client/api/src/backend.rs | 3 +- substrate/client/db/src/lib.rs | 84 +++++++++++++++-------------- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/substrate/client/api/src/backend.rs b/substrate/client/api/src/backend.rs index 31b100433c70..0b2a34952401 100644 --- a/substrate/client/api/src/backend.rs +++ b/substrate/client/api/src/backend.rs @@ -217,7 +217,8 @@ pub trait BlockImportOperation { where I: IntoIterator, Option>)>; - /// Mark a block as finalized. + /// Mark a block as finalized, if multiple blocks are finalized in the same operation then they + /// must be marked in ascending order. fn mark_finalized( &mut self, hash: Block::Hash, diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index b978ce261e09..bd24fdde809d 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -1369,6 +1369,8 @@ impl Backend { Ok(()) } + /// `remove_displaced` can be set to `false` if this is not the last of many subsequent calls + /// for performance reasons. fn finalize_block_with_transaction( &self, transaction: &mut Transaction, @@ -1377,6 +1379,7 @@ impl Backend { last_finalized: Option, justification: Option, current_transaction_justifications: &mut HashMap, + remove_displaced: bool, ) -> ClientResult> { // TODO: ensure best chain contains this block. let number = *header.number(); @@ -1389,6 +1392,7 @@ impl Backend { hash, with_state, current_transaction_justifications, + remove_displaced, )?; if let Some(justification) = justification { @@ -1466,7 +1470,8 @@ impl Backend { let mut current_transaction_justifications: HashMap = HashMap::new(); - for (block_hash, justification) in operation.finalized_blocks { + let mut finalized_blocks = operation.finalized_blocks.into_iter().peekable(); + while let Some((block_hash, justification)) = finalized_blocks.next() { let block_header = self.blockchain.expect_header(block_hash)?; meta_updates.push(self.finalize_block_with_transaction( &mut transaction, @@ -1475,6 +1480,7 @@ impl Backend { Some(last_finalized_hash), justification, &mut current_transaction_justifications, + finalized_blocks.peek().is_none(), )?); last_finalized_hash = block_hash; last_finalized_num = *block_header.number(); @@ -1654,6 +1660,7 @@ impl Backend { hash, operation.commit_state, &mut current_transaction_justifications, + true, )?; } else { // canonicalize blocks which are old enough, regardless of finality. @@ -1779,9 +1786,10 @@ impl Backend { Ok(()) } - // write stuff to a transaction after a new block is finalized. - // this canonicalizes finalized blocks. Fails if called with a block which - // was not a child of the last finalized block. + // Write stuff to a transaction after a new block is finalized. This canonicalizes finalized + // blocks. Fails if called with a block which was not a child of the last finalized block. + /// `remove_displaced` can be set to `false` if this is not the last of many subsequent calls + /// for performance reasons. fn note_finalized( &self, transaction: &mut Transaction, @@ -1789,6 +1797,7 @@ impl Backend { f_hash: Block::Hash, with_state: bool, current_transaction_justifications: &mut HashMap, + remove_displaced: bool, ) -> ClientResult<()> { let f_num = *f_header.number(); @@ -1813,13 +1822,19 @@ impl Backend { apply_state_commit(transaction, commit); } - let new_displaced = self.blockchain.displaced_leaves_after_finalizing(f_hash, f_num)?; + if remove_displaced { + let new_displaced = self.blockchain.displaced_leaves_after_finalizing(f_hash, f_num)?; - self.blockchain.leaves.write().remove_displaced_leaves(FinalizationOutcome::new( - new_displaced.displaced_leaves.iter().copied(), - )); + self.blockchain.leaves.write().remove_displaced_leaves(FinalizationOutcome::new( + new_displaced.displaced_leaves.iter().copied(), + )); + + if !matches!(self.blocks_pruning, BlocksPruning::KeepAll) { + self.prune_displaced_branches(transaction, &new_displaced)?; + } + } - self.prune_blocks(transaction, f_num, &new_displaced, current_transaction_justifications)?; + self.prune_blocks(transaction, f_num, current_transaction_justifications)?; Ok(()) } @@ -1828,39 +1843,29 @@ impl Backend { &self, transaction: &mut Transaction, finalized_number: NumberFor, - displaced: &DisplacedLeavesAfterFinalization, current_transaction_justifications: &mut HashMap, ) -> ClientResult<()> { - match self.blocks_pruning { - BlocksPruning::KeepAll => {}, - BlocksPruning::Some(blocks_pruning) => { - // Always keep the last finalized block - let keep = std::cmp::max(blocks_pruning, 1); - if finalized_number >= keep.into() { - let number = finalized_number.saturating_sub(keep.into()); - - // Before we prune a block, check if it is pinned - if let Some(hash) = self.blockchain.hash(number)? { - self.blockchain.insert_persisted_body_if_pinned(hash)?; - - // If the block was finalized in this transaction, it will not be in the db - // yet. - if let Some(justification) = - current_transaction_justifications.remove(&hash) - { - self.blockchain.insert_justifications_if_pinned(hash, justification); - } else { - self.blockchain.insert_persisted_justifications_if_pinned(hash)?; - } - }; + if let BlocksPruning::Some(blocks_pruning) = self.blocks_pruning { + // Always keep the last finalized block + let keep = std::cmp::max(blocks_pruning, 1); + if finalized_number >= keep.into() { + let number = finalized_number.saturating_sub(keep.into()); + + // Before we prune a block, check if it is pinned + if let Some(hash) = self.blockchain.hash(number)? { + self.blockchain.insert_persisted_body_if_pinned(hash)?; + + // If the block was finalized in this transaction, it will not be in the db + // yet. + if let Some(justification) = current_transaction_justifications.remove(&hash) { + self.blockchain.insert_justifications_if_pinned(hash, justification); + } else { + self.blockchain.insert_persisted_justifications_if_pinned(hash)?; + } + }; - self.prune_block(transaction, BlockId::::number(number))?; - } - self.prune_displaced_branches(transaction, displaced)?; - }, - BlocksPruning::KeepFinalized => { - self.prune_displaced_branches(transaction, displaced)?; - }, + self.prune_block(transaction, BlockId::::number(number))?; + } } Ok(()) } @@ -2121,6 +2126,7 @@ impl sc_client_api::backend::Backend for Backend { None, justification, &mut current_transaction_justifications, + true, )?; self.storage.db.commit(transaction)?;