diff --git a/incrementalmerkletree/CHANGELOG.md b/incrementalmerkletree/CHANGELOG.md index efa8906..633e1ac 100644 --- a/incrementalmerkletree/CHANGELOG.md +++ b/incrementalmerkletree/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to Rust's notion of ### Added - `incrementalmerkletree::Marking` +- `incrementalmerkletree::Level::ZERO` ### Changed - `incrementalmerkletree::Retention` diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index ea4d009..6c74a7c 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -280,6 +280,9 @@ impl TryFrom for usize { pub struct Level(u8); impl Level { + /// Level 0 corresponds to a leaf of the tree. + pub const ZERO: Self = Level(0); + pub const fn new(value: u8) -> Self { Self(value) } diff --git a/shardtree/CHANGELOG.md b/shardtree/CHANGELOG.md index 5175e78..16d9e6e 100644 --- a/shardtree/CHANGELOG.md +++ b/shardtree/CHANGELOG.md @@ -10,20 +10,24 @@ and this project adheres to Rust's notion of ### Added - `shardtree::tree::Tree::{is_leaf, map, try_map, empty_pruned}` - `shardtree::tree::LocatedTree::{map, try_map}` -- `shardtree::prunable::PrunableTree::{has_computable_root}` - -### Changed -- `shardtree::tree::Node` has additional variant `Node::Pruned`. +- `shardtree::prunable::PrunableTree::{has_computable_root, is_full}` +- `shardtree::prunable::LocatedPrunableTree::{max_position}` ### Removed -- `shardtree::tree::Tree::is_complete` as it is no longer well-defined in the - presence of `Pruned` nodes. Use `PrunableTree::has_computable_root` to - determine whether it is possible to compute the root of a tree. +- `shardtree::tree::LocatedTree::max_position` did not behave correctly regarding + annotated parent nodes. Use `LocatedPrunableTree::max_position` instead. +- `shardtree::tree::Tree::is_complete` was somewhat misleadingly named, as `Nil` + nodes that were inserted as a consequence of insertion after pruning could be + interpreted as rendering the tree incomplete. Use `PrunableTree::has_computable_root` + instead to determine whether it is possible to compute the root of a tree. ### Fixed - Fixes an error that could occur if an inserted `Frontier` node was interpreted as a node that had actually had its value observed as though it had been inserted using the ordinary tree insertion methods. +- Fixes an error in an internal method that could result in subtree root + annotation data being discarded when pruning a `Parent` node having + `Nil` nodes for both its left and right children. ## [0.3.1] - 2024-04-03 diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 398b24b..48ee472 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -210,7 +210,8 @@ impl< Ok(()) } - /// Append a single value at the first available position in the tree. + /// Append a single value at the first unfilled position greater than the maximum position of + /// any previously inserted leaf. /// /// Prefer to use [`Self::batch_insert`] when appending multiple values, as these operations /// require fewer traversals of the tree than are necessary when performing multiple sequential @@ -234,17 +235,17 @@ impl< let (append_result, position, checkpoint_id) = if let Some(subtree) = self.store.last_shard().map_err(ShardTreeError::Storage)? { - match subtree.max_position() { - // If the subtree is full, then construct a successor tree. - Some(pos) if pos == subtree.root_addr.max_position() => { - let addr = subtree.root_addr; - if subtree.root_addr.index() < Self::max_subtree_index() { - LocatedTree::empty(addr.next_at_level()).append(value, retention)? - } else { - return Err(InsertionError::TreeFull.into()); - } + if subtree.root().is_full() { + // If the shard is full, then construct a successor tree. + let addr = subtree.root_addr; + if subtree.root_addr.index() < Self::max_subtree_index() { + LocatedTree::empty(addr.next_at_level()).append(value, retention)? + } else { + return Err(InsertionError::TreeFull.into()); } - _ => subtree.append(value, retention)?, + } else { + // Otherwise, just append to the shard. + subtree.append(value, retention)? } } else { let root_addr = Address::from_parts(Self::subtree_level(), 0); @@ -320,17 +321,8 @@ impl< .map_err(ShardTreeError::Storage)? .unwrap_or_else(|| LocatedTree::empty(subtree_root_addr)); - trace!( - max_position = ?current_shard.max_position(), - subtree = ?current_shard, - "Current shard"); let (updated_subtree, supertree) = current_shard.insert_frontier_nodes(frontier, &leaf_retention)?; - trace!( - max_position = ?updated_subtree.max_position(), - subtree = ?updated_subtree, - "Replacement shard" - ); self.store .put_shard(updated_subtree) .map_err(ShardTreeError::Storage)?; @@ -447,7 +439,7 @@ impl< Tree::leaf((h.clone(), *r | RetentionFlags::CHECKPOINT)), root_addr.max_position(), )), - Node::Nil | Node::Pruned => None, + Node::Nil => None, } } @@ -581,13 +573,7 @@ impl< .map_err(ShardTreeError::Storage)?; if let Some(to_clear) = to_clear { - let pre_clearing_max_position = to_clear.max_position(); let cleared = to_clear.clear_flags(positions); - - // Clearing flags should not modify the max position of leaves represented - // in the shard. - assert!(cleared.max_position() == pre_clearing_max_position); - self.store .put_shard(cleared) .map_err(ShardTreeError::Storage)?; @@ -867,7 +853,7 @@ impl< )) } } - Node::Nil | Node::Pruned => { + Node::Nil => { if cap.root_addr == target_addr || cap.root_addr.level() == ShardTree::::subtree_level() { @@ -1464,7 +1450,8 @@ mod tests { id: 1, marking: frontier_marking, }, - )?; + ) + .unwrap(); // Insert a few leaves beginning at the subsequent position, so as to cross the shard // boundary. @@ -1473,7 +1460,8 @@ mod tests { ('g'..='j') .into_iter() .map(|c| (c.to_string(), Retention::Ephemeral)), - )?; + ) + .unwrap(); // Trigger pruning by adding 5 more checkpoints for i in 2..7 { @@ -1486,7 +1474,8 @@ mod tests { ('e'..='f') .into_iter() .map(|c| (c.to_string(), Retention::Marked)), - )?; + ) + .unwrap(); // Compute the witness tree.witness_at_checkpoint_id(frontier_end, &6) diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index d230093..fed767f 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -109,7 +109,17 @@ impl PrunableTree { || (left.as_ref().has_computable_root() && right.as_ref().has_computable_root()) } Node::Leaf { .. } => true, - Node::Nil | Node::Pruned => false, + Node::Nil => false, + } + } + + /// Returns `true` if no additional leaves can be appended to the right hand side of this tree + /// without over-filling it given its current depth. + pub fn is_full(&self) -> bool { + match &self.0 { + Node::Nil => false, + Node::Leaf { .. } => true, + Node::Parent { ann, right, .. } => ann.is_some() || right.is_full(), } } @@ -118,7 +128,7 @@ impl PrunableTree { match &self.0 { Node::Parent { left, right, .. } => left.contains_marked() || right.contains_marked(), Node::Leaf { value: (_, r) } => r.is_marked(), - Node::Nil | Node::Pruned => false, + Node::Nil => false, } } @@ -170,7 +180,7 @@ impl PrunableTree { Err(vec![root_addr]) } } - Node::Nil | Node::Pruned => Err(vec![root_addr]), + Node::Nil => Err(vec![root_addr]), } } } @@ -205,7 +215,7 @@ impl PrunableTree { } result } - Node::Nil | Node::Pruned => BTreeSet::new(), + Node::Nil => BTreeSet::new(), } } @@ -242,7 +252,6 @@ impl PrunableTree { let no_default_fill = addr.position_range_end(); match (t0, t1) { (Tree(Node::Nil), other) | (other, Tree(Node::Nil)) => Ok(other), - (Tree(Node::Pruned), other) | (other, Tree(Node::Pruned)) => Ok(other), (Tree(Node::Leaf { value: vl }), Tree(Node::Leaf { value: vr })) => { if vl.0 == vr.0 { // Merge the flags together. @@ -312,7 +321,7 @@ impl PrunableTree { /// `level` must be the level of the two nodes that are being joined. pub(crate) fn unite(level: Level, ann: Option>, left: Self, right: Self) -> Self { match (left, right) { - (Tree(Node::Nil), Tree(Node::Nil)) => Tree::empty(), + (Tree(Node::Nil), Tree(Node::Nil)) if ann.is_none() => Tree::empty(), (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) // we can prune right-hand leaves that are not marked or reference leaves; if a // leaf is a checkpoint then that information will be propagated to the replacement @@ -322,11 +331,7 @@ impl PrunableTree { { Tree::leaf((H::combine(level, &lv.0, &rv.0), rv.1)) } - (left, right) => Tree::parent( - ann, - left, - right, - ), + (left, right) => Tree::parent(ann, left, right), } } } @@ -348,6 +353,32 @@ pub struct IncompleteAt { } impl LocatedPrunableTree { + /// Returns the maximum position at which a non-`Nil` leaf has been observed in the tree. + /// + /// Note that no actual leaf value may exist at this position, as it may have previously been + /// pruned. + pub fn max_position(&self) -> Option { + fn go( + addr: Address, + root: &Tree>, (H, RetentionFlags)>, + ) -> Option { + match &root.0 { + Node::Nil => None, + Node::Leaf { .. } => Some(addr.position_range_end() - 1), + Node::Parent { ann, left, right } => { + if ann.is_some() { + Some(addr.max_position()) + } else { + let (l_addr, r_addr) = addr.children().unwrap(); + go(r_addr, right.as_ref()).or_else(|| go(l_addr, left.as_ref())) + } + } + } + } + + go(self.root_addr, &self.root) + } + /// Computes the root hash of this tree, truncated to the given position. /// /// If the tree contains any [`Node::Nil`] nodes corresponding to positions less than @@ -366,10 +397,11 @@ impl LocatedPrunableTree { /// If the tree contains any [`Node::Nil`] nodes that are to the left of filled nodes in the /// tree, this will return an error containing the addresses of those nodes. pub fn right_filled_root(&self) -> Result> { - self.root_hash( - self.max_position() - .map_or_else(|| self.root_addr.position_range_start(), |pos| pos + 1), - ) + let truncate_at = self + .max_position() + .map_or_else(|| self.root_addr.position_range_start(), |pos| pos + 1); + + self.root_hash(truncate_at) } /// Returns the positions of marked leaves in the tree. @@ -524,7 +556,7 @@ impl LocatedPrunableTree { None } } - Node::Nil | Node::Pruned => None, + Node::Nil => None, } } @@ -563,12 +595,6 @@ impl LocatedPrunableTree { subtree: LocatedPrunableTree, contains_marked: bool, ) -> Result<(PrunableTree, Vec), InsertionError> { - trace!( - root_addr = ?root_addr, - max_position = ?LocatedTree::max_position_internal(root_addr, into), - to_insert = ?subtree.root_addr(), - "Subtree insert" - ); // An empty tree cannot replace any other type of tree. if subtree.root().is_nil() { Ok((into.clone(), vec![])) @@ -576,36 +602,29 @@ impl LocatedPrunableTree { // In the case that we are replacing a node entirely, we need to extend the // subtree up to the level of the node being replaced, adding Nil siblings // and recording the presence of those incomplete nodes when necessary - let replacement = - |ann: Option>, mut node: LocatedPrunableTree, pruned: bool| { - // construct the replacement node bottom-up - let mut incomplete = vec![]; - while node.root_addr.level() < root_addr.level() { - incomplete.push(IncompleteAt { - address: node.root_addr.sibling(), - required_for_witness: contains_marked, - }); - let empty = if pruned { - Tree::empty_pruned() + let replacement = |ann: Option>, mut node: LocatedPrunableTree| { + // construct the replacement node bottom-up + let mut incomplete = vec![]; + while node.root_addr.level() < root_addr.level() { + incomplete.push(IncompleteAt { + address: node.root_addr.sibling(), + required_for_witness: contains_marked, + }); + let full = node.root; + node = LocatedTree { + root_addr: node.root_addr.parent(), + root: if node.root_addr.is_right_child() { + Tree::parent(None, Tree::empty(), full) } else { - Tree::empty() - }; - let full = node.root; - node = LocatedTree { - root_addr: node.root_addr.parent(), - root: if node.root_addr.is_right_child() { - Tree::parent(None, empty, full) - } else { - Tree::parent(None, full, empty) - }, - }; - } - (node.root.reannotate_root(ann), incomplete) - }; + Tree::parent(None, full, Tree::empty()) + }, + }; + } + (node.root.reannotate_root(ann), incomplete) + }; match into { - Tree(Node::Nil) => Ok(replacement(None, subtree, false)), - Tree(Node::Pruned) => Ok(replacement(None, subtree, true)), + Tree(Node::Nil) => Ok(replacement(None, subtree)), Tree(Node::Leaf { value: (value, retention), }) => { @@ -660,22 +679,7 @@ impl LocatedPrunableTree { Err(InsertionError::Conflict(root_addr)) } } else { - Ok(replacement( - Some(Arc::new(value.clone())), - subtree, - // The subtree being inserted may have its root at some level lower - // than the next level down. The siblings of nodes that will be - // generated while descending to the subtree root level will be - // `Nil` nodes (indicating that the value of these nodes have never - // been observed) if the leaf being replaced has `REFERENCE` - // retention. Any other leaf without `REFERENCE` retention will - // have been produced by pruning of previously observed node - // values, so in those cases we use `Pruned` nodes for the absent - // siblings. This allows us to retain the distinction between what - // parts of the tree have been directly observed and what parts - // have not. - !retention.contains(RetentionFlags::REFERENCE), - )) + Ok(replacement(Some(Arc::new(value.clone())), subtree)) } } parent if root_addr == subtree.root_addr => { @@ -930,7 +934,6 @@ impl LocatedPrunableTree { } } Node::Nil => Tree::empty(), - Node::Pruned => Tree::empty_pruned(), } } } @@ -941,6 +944,33 @@ impl LocatedPrunableTree { root: go(&to_clear, self.root_addr, &self.root), } } + + #[cfg(test)] + pub(crate) fn flag_positions(&self) -> BTreeMap { + fn go( + root: &PrunableTree, + root_addr: Address, + acc: &mut BTreeMap, + ) { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr + .children() + .expect("A parent node cannot appear at level 0"); + go(left, l_addr, acc); + go(right, r_addr, acc); + } + Node::Leaf { value } if value.1 != RetentionFlags::EPHEMERAL => { + acc.insert(root_addr.max_position(), value.1); + } + _ => (), + } + } + + let mut result = BTreeMap::new(); + go(&self.root, self.root_addr, &mut result); + result + } } // We need an applicative functor for Result for this function so that we can correctly @@ -963,13 +993,15 @@ fn accumulate_result_with( #[cfg(test)] mod tests { - use std::collections::BTreeSet; + use std::collections::{BTreeMap, BTreeSet}; use incrementalmerkletree::{Address, Level, Position}; + use proptest::proptest; use super::{LocatedPrunableTree, PrunableTree, RetentionFlags}; use crate::{ error::{InsertionError, QueryError}, + testing::{arb_char_str, arb_prunable_tree}, tree::{ tests::{leaf, nil, parent}, LocatedTree, @@ -1189,4 +1221,34 @@ mod tests { )])) ); } + + proptest! { + #[test] + fn clear_flags( + root in arb_prunable_tree(arb_char_str(), 8, 2^6) + ) { + let root_addr = Address::from_parts(Level::from(7), 0); + let tree = LocatedTree::from_parts(root_addr, root); + + let (to_clear, to_retain) = tree.flag_positions().into_iter().enumerate().fold( + (BTreeMap::new(), BTreeMap::new()), + |(mut to_clear, mut to_retain), (i, (pos, flags))| { + if i % 2 == 0 { + to_clear.insert(pos, flags); + } else { + to_retain.insert(pos, flags); + } + (to_clear, to_retain) + } + ); + + let pre_clearing_max_position = tree.max_position(); + let cleared = tree.clear_flags(to_clear); + + // Clearing flags should not modify the max position of leaves represented + // in the shard. + assert!(cleared.max_position() == pre_clearing_max_position); + assert_eq!(to_retain, cleared.flag_positions()); + } + } } diff --git a/shardtree/src/tree.rs b/shardtree/src/tree.rs index 6682c7b..af33295 100644 --- a/shardtree/src/tree.rs +++ b/shardtree/src/tree.rs @@ -19,9 +19,6 @@ pub enum Node { Leaf { value: V }, /// The empty tree; a subtree or leaf for which no information is available. Nil, - /// An empty node in the tree created as a consequence of partial reinserion of data into a - /// subtree after the subtree was previously pruned. - Pruned, } impl Node { @@ -39,7 +36,6 @@ impl Node { Node::Parent { .. } => None, Node::Leaf { value } => Some(value), Node::Nil => None, - Node::Pruned => None, } } @@ -49,7 +45,6 @@ impl Node { Node::Parent { ann, .. } => Some(ann), Node::Leaf { .. } => None, Node::Nil => None, - Node::Pruned => None, } } @@ -76,7 +71,6 @@ impl<'a, C: Clone, A: Clone, V: Clone> Node { value: (*value).clone(), }, Node::Nil => Node::Nil, - Node::Pruned => Node::Pruned, } } } @@ -94,16 +88,16 @@ impl Deref for Tree { impl Tree { /// Constructs the empty tree. - pub fn empty() -> Self { + /// + /// This represents a tree for which we have no information. + pub const fn empty() -> Self { Tree(Node::Nil) } - /// Constructs the empty tree consisting of a single pruned node. - pub fn empty_pruned() -> Self { - Tree(Node::Pruned) - } - /// Constructs a tree containing a single leaf. + /// + /// This represents either leaf of the tree, or an internal parent node of the + /// tree whose children have all been pruned. pub fn leaf(value: V) -> Self { Tree(Node::Leaf { value }) } @@ -117,7 +111,7 @@ impl Tree { }) } - /// Returns `true` if the tree has no leaves. + /// Returns `true` if the tree is the [`Node::Nil`] node. pub fn is_empty(&self) -> bool { self.0.is_nil() } @@ -133,7 +127,7 @@ impl Tree { matches!(&self.0, Node::Leaf { .. }) } - /// Returns a vector of the addresses of [`Node::Nil`] and [`Node::Pruned`] subtree roots + /// Returns a vector of the addresses of [`Node::Nil`] subtree roots /// within this tree. /// /// The given address must correspond to the root of this tree, or this method will @@ -155,7 +149,7 @@ impl Tree { left_incomplete } Node::Leaf { .. } => vec![], - Node::Nil | Node::Pruned => vec![root_addr], + Node::Nil => vec![root_addr], } } @@ -173,7 +167,6 @@ impl Tree { }, Node::Leaf { value } => Node::Leaf { value: f(value) }, Node::Nil => Node::Nil, - Node::Pruned => Node::Pruned, }) } @@ -192,7 +185,6 @@ impl Tree { }, Node::Leaf { value } => Node::Leaf { value: f(value)? }, Node::Nil => Node::Nil, - Node::Pruned => Node::Pruned, })) } } @@ -240,26 +232,6 @@ impl LocatedTree { self.root.incomplete_nodes(self.root_addr) } - /// Returns the maximum position at which a non-`Nil` leaf has been observed in the tree. - /// - /// Note that no actual leaf value may exist at this position, as it may have previously been - /// pruned. - pub fn max_position(&self) -> Option { - Self::max_position_internal(self.root_addr, &self.root) - } - - pub(crate) fn max_position_internal(addr: Address, root: &Tree) -> Option { - match &root.0 { - Node::Nil => None, - Node::Leaf { .. } | Node::Pruned => Some(addr.position_range_end() - 1), - Node::Parent { left, right, .. } => { - let (l_addr, r_addr) = addr.children().unwrap(); - Self::max_position_internal(r_addr, right.as_ref()) - .or_else(|| Self::max_position_internal(l_addr, left.as_ref())) - } - } - } - /// Returns the value at the specified position, if any. pub fn value_at_position(&self, position: Position) -> Option<&V> { fn go(pos: Position, addr: Address, root: &Tree) -> Option<&V> { @@ -464,7 +436,6 @@ pub(crate) mod tests { root: parent(l.clone(), r.clone()), }; - assert_eq!(t.max_position(), Some(7.into())); assert_eq!(t.value_at_position(5.into()), Some(&"b".to_string())); assert_eq!(t.value_at_position(8.into()), None); assert_eq!(t.subtree(Address::from_parts(0.into(), 1)), None);