-
Notifications
You must be signed in to change notification settings - Fork 325
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Avoid inserting an empty leaf in indexed trees on update #10281
Changes from 2 commits
07c34b5
bac1a18
fa55e7f
aaea0f5
6d7355f
daa898a
3634530
4325962
9f33898
8895dfb
1748df6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -252,9 +252,11 @@ class ContentAddressedIndexedTree : public ContentAddressedAppendOnlyTree<Store, | |
const InsertionGenerationCallback& completion); | ||
|
||
struct InsertionUpdates { | ||
// On insertion, we always update a low leaf. If it's a new leaf, we update the pointer of the low leaf. | ||
// If it's an update, we update the 'low' leaf itself. | ||
LeafUpdate low_leaf_update; | ||
IndexedLeafValueType new_leaf; | ||
index_t new_leaf_index; | ||
// If it's not an update, we also create a new leaf. | ||
std::optional<std::pair<IndexedLeafValueType, index_t>> new_leaf; | ||
}; | ||
|
||
struct SequentialInsertionGenerationResponse { | ||
|
@@ -1422,6 +1424,14 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq | |
const AddSequentiallyCompletionCallbackWithWitness& completion, | ||
bool capture_witness) | ||
{ | ||
|
||
// This struct is used to collect some state from the asynchronous operations we are about to perform | ||
struct IntermediateResults { | ||
std::shared_ptr<std::vector<InsertionUpdates>> updates_to_perform; | ||
size_t appended_leaves = 0; | ||
}; | ||
std::shared_ptr<IntermediateResults> results = std::make_shared<IntermediateResults>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Usual idiom when creating smart pointers is
No need to repeat the type more than once. The rule in general is "use auto if and only if the type is clear from context". |
||
|
||
auto on_error = [=](const std::string& message) { | ||
try { | ||
TypedResponse<AddIndexedDataSequentiallyResponse<LeafValueType>> response; | ||
|
@@ -1443,7 +1453,7 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq | |
ReadTransactionPtr tx = store_->create_read_transaction(); | ||
store_->get_meta(meta, *tx, true); | ||
|
||
index_t new_total_size = values.size() + meta.size; | ||
index_t new_total_size = results->appended_leaves + meta.size; | ||
meta.size = new_total_size; | ||
meta.root = store_->get_current_root(*tx, true); | ||
|
||
|
@@ -1452,23 +1462,30 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq | |
|
||
if (capture_witness) { | ||
// Split results->update_witnesses between low_leaf_witness_data and insertion_witness_data | ||
// Currently we always insert an empty leaf, even if it's an update, so we can split based | ||
// on the index of the witness data | ||
response.inner.insertion_witness_data = | ||
std::make_shared<std::vector<LeafUpdateWitnessData<LeafValueType>>>(); | ||
; | ||
response.inner.insertion_witness_data->reserve(results->updates_to_perform->size()); | ||
|
||
response.inner.low_leaf_witness_data = | ||
std::make_shared<std::vector<LeafUpdateWitnessData<LeafValueType>>>(); | ||
; | ||
|
||
for (size_t i = 0; i < updates_completion_response.inner.update_witnesses->size(); ++i) { | ||
LeafUpdateWitnessData<LeafValueType>& witness_data = | ||
updates_completion_response.inner.update_witnesses->at(i); | ||
// If even, it's a low leaf, if odd, it's an insertion witness | ||
if (i % 2 == 0) { | ||
response.inner.low_leaf_witness_data->push_back(witness_data); | ||
response.inner.low_leaf_witness_data->reserve(results->updates_to_perform->size()); | ||
|
||
size_t current_witness_index = 0; | ||
for (size_t i = 0; i < results->updates_to_perform->size(); ++i) { | ||
LeafUpdateWitnessData<LeafValueType> low_leaf_witness = | ||
updates_completion_response.inner.update_witnesses->at(current_witness_index++); | ||
response.inner.low_leaf_witness_data->push_back(low_leaf_witness); | ||
|
||
InsertionUpdates& insertion_update = results->updates_to_perform->at(i); | ||
|
||
if (insertion_update.new_leaf.has_value()) { | ||
LeafUpdateWitnessData<LeafValueType> insertion_witness = | ||
updates_completion_response.inner.update_witnesses->at(current_witness_index++); | ||
response.inner.insertion_witness_data->push_back(insertion_witness); | ||
} else { | ||
response.inner.insertion_witness_data->push_back(witness_data); | ||
// Append an empty insertion witness | ||
response.inner.insertion_witness_data->push_back(LeafUpdateWitnessData<LeafValueType>{ | ||
.leaf = IndexedLeafValueType::empty(), .index = 0, .path = std::vector<fr>(depth_) }); | ||
} | ||
} | ||
} | ||
|
@@ -1487,15 +1504,24 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq | |
} | ||
|
||
std::shared_ptr<std::vector<LeafUpdate>> flat_updates = std::make_shared<std::vector<LeafUpdate>>(); | ||
flat_updates->reserve(insertion_response.inner.updates_to_perform->size() * 2); | ||
|
||
for (size_t i = 0; i < insertion_response.inner.updates_to_perform->size(); ++i) { | ||
InsertionUpdates& insertion_update = insertion_response.inner.updates_to_perform->at(i); | ||
flat_updates->push_back(insertion_update.low_leaf_update); | ||
flat_updates->push_back(LeafUpdate{ | ||
.leaf_index = insertion_update.new_leaf_index, | ||
.updated_leaf = insertion_update.new_leaf, | ||
.original_leaf = IndexedLeafValueType::empty(), | ||
}); | ||
if (insertion_update.new_leaf.has_value()) { | ||
results->appended_leaves++; | ||
IndexedLeafValueType new_leaf; | ||
index_t new_leaf_index = 0; | ||
std::tie(new_leaf, new_leaf_index) = insertion_update.new_leaf.value(); | ||
flat_updates->push_back(LeafUpdate{ | ||
.leaf_index = new_leaf_index, | ||
.updated_leaf = new_leaf, | ||
.original_leaf = IndexedLeafValueType::empty(), | ||
}); | ||
} | ||
} | ||
results->updates_to_perform = std::move(insertion_response.inner.updates_to_perform); | ||
|
||
if (capture_witness) { | ||
perform_updates(flat_updates->size(), flat_updates, final_completion); | ||
|
@@ -1514,7 +1540,6 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse | |
{ | ||
execute_and_report<SequentialInsertionGenerationResponse>( | ||
[=, this](TypedResponse<SequentialInsertionGenerationResponse>& response) { | ||
response.inner.highest_index = 0; | ||
response.inner.updates_to_perform = std::make_shared<std::vector<InsertionUpdates>>(); | ||
|
||
TreeMeta meta; | ||
|
@@ -1523,18 +1548,6 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse | |
|
||
RequestContext requestContext; | ||
requestContext.includeUncommitted = true; | ||
// Ensure that the tree is not going to be overfilled | ||
index_t new_total_size = values.size() + meta.size; | ||
if (new_total_size > max_size_) { | ||
throw std::runtime_error(format("Unable to insert values into tree ", | ||
meta.name, | ||
" new size: ", | ||
new_total_size, | ||
" max size: ", | ||
max_size_)); | ||
} | ||
// The highest index touched will be the last leaf index, since we append a zero for updates | ||
response.inner.highest_index = new_total_size - 1; | ||
requestContext.root = store_->get_current_root(*tx, true); | ||
// Fetch the frontier (non empty nodes to the right) of the tree. This will ensure that perform_updates or | ||
// perform_updates_without_witness has all the cached nodes it needs to perform the insertions. See comment | ||
|
@@ -1543,12 +1556,13 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse | |
find_leaf_hash(meta.size - 1, requestContext, *tx, true); | ||
} | ||
|
||
index_t current_size = meta.size; | ||
|
||
for (size_t i = 0; i < values.size(); ++i) { | ||
const LeafValueType& new_payload = values[i]; | ||
if (new_payload.is_empty()) { | ||
continue; | ||
} | ||
index_t index_of_new_leaf = i + meta.size; | ||
fr value = new_payload.get_key(); | ||
|
||
// This gives us the leaf that need updating | ||
|
@@ -1595,25 +1609,25 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse | |
.updated_leaf = IndexedLeafValueType::empty(), | ||
.original_leaf = low_leaf, | ||
}, | ||
.new_leaf = IndexedLeafValueType::empty(), | ||
.new_leaf_index = index_of_new_leaf, | ||
.new_leaf = std::nullopt, | ||
}; | ||
|
||
if (!is_already_present) { | ||
// Update the current leaf to point it to the new leaf | ||
IndexedLeafValueType new_leaf = | ||
IndexedLeafValueType(new_payload, low_leaf.nextIndex, low_leaf.nextValue); | ||
|
||
index_t index_of_new_leaf = current_size; | ||
low_leaf.nextIndex = index_of_new_leaf; | ||
low_leaf.nextValue = value; | ||
current_size++; | ||
// Cache the new leaf | ||
store_->set_leaf_key_at_index(index_of_new_leaf, new_leaf); | ||
store_->put_cached_leaf_by_index(index_of_new_leaf, new_leaf); | ||
// Update cached low leaf | ||
store_->put_cached_leaf_by_index(low_leaf_index, low_leaf); | ||
|
||
insertion_update.low_leaf_update.updated_leaf = low_leaf; | ||
insertion_update.new_leaf = new_leaf; | ||
insertion_update.new_leaf = std::pair(new_leaf, index_of_new_leaf); | ||
} else if (IndexedLeafValueType::is_updateable()) { | ||
// Update the current leaf's value, don't change it's link | ||
IndexedLeafValueType replacement_leaf = | ||
|
@@ -1633,6 +1647,18 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse | |
|
||
response.inner.updates_to_perform->push_back(insertion_update); | ||
} | ||
|
||
// Ensure that the tree is not going to be overfilled | ||
if (current_size > max_size_) { | ||
throw std::runtime_error(format("Unable to insert values into tree ", | ||
meta.name, | ||
" new size: ", | ||
current_size, | ||
" max size: ", | ||
max_size_)); | ||
} | ||
// The highest index touched will be current_size - 1 | ||
response.inner.highest_index = current_size - 1; | ||
}, | ||
completion); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ pub mod check_valid_low_leaf; | |
use crate::{ | ||
abis::append_only_tree_snapshot::AppendOnlyTreeSnapshot, | ||
merkle_tree::{ | ||
leaf_preimage::{IndexedTreeLeafPreimage, IndexedTreeLeafValue}, | ||
membership::{assert_check_membership, MembershipWitness}, | ||
root::{calculate_empty_tree_root, calculate_subtree_root, root_from_sibling_path}, | ||
}, | ||
|
@@ -112,14 +113,14 @@ pub fn insert<Value, Leaf, let TreeHeight: u32>( | |
build_insertion_leaf: fn(Value, Leaf) -> Leaf, | ||
) -> AppendOnlyTreeSnapshot | ||
where | ||
Value: Eq + Empty, | ||
Leaf: Hash + Empty, | ||
Value: IndexedTreeLeafValue, | ||
Leaf: IndexedTreeLeafPreimage, | ||
{ | ||
Comment on lines
+116
to
118
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm going to refactor in a followup the indexed tree insertions code using the leaf and preimage traits. the original code was done when traits weren't a thing so we were passing callbacks to the functions, which is uglier! |
||
assert(is_valid_low_leaf(low_leaf_preimage, value), "Invalid low leaf"); | ||
|
||
// perform membership check for the low leaf against the original root | ||
assert_check_membership( | ||
low_leaf_preimage.hash(), | ||
low_leaf_preimage.as_leaf(), | ||
low_leaf_membership_witness.leaf_index, | ||
low_leaf_membership_witness.sibling_path, | ||
snapshot.root, | ||
|
@@ -129,29 +130,34 @@ where | |
let updated_low_leaf = | ||
update_low_leaf(low_leaf_preimage, value, snapshot.next_available_leaf_index); | ||
|
||
// Update low leaf | ||
snapshot.root = root_from_sibling_path( | ||
updated_low_leaf.hash(), | ||
updated_low_leaf.as_leaf(), | ||
low_leaf_membership_witness.leaf_index, | ||
low_leaf_membership_witness.sibling_path, | ||
); | ||
|
||
let insertion_leaf = build_insertion_leaf(value, low_leaf_preimage); | ||
|
||
assert_check_membership( | ||
0, | ||
snapshot.next_available_leaf_index as Field, | ||
insertion_sibling_path, | ||
snapshot.root, | ||
); | ||
|
||
// Calculate the new root | ||
snapshot.root = root_from_sibling_path( | ||
insertion_leaf.hash(), | ||
snapshot.next_available_leaf_index as Field, | ||
insertion_sibling_path, | ||
); | ||
|
||
snapshot.next_available_leaf_index += 1; | ||
|
||
snapshot | ||
if low_leaf_preimage.get_key() == value.get_key() { | ||
// If it's an update, we don't need to insert the new leaf and advance the tree | ||
snapshot | ||
} else { | ||
let insertion_leaf = build_insertion_leaf(value, low_leaf_preimage); | ||
assert_check_membership( | ||
0, | ||
snapshot.next_available_leaf_index as Field, | ||
insertion_sibling_path, | ||
snapshot.root, | ||
); | ||
|
||
// Calculate the new root | ||
snapshot.root = root_from_sibling_path( | ||
insertion_leaf.as_leaf(), | ||
snapshot.next_available_leaf_index as Field, | ||
insertion_sibling_path, | ||
); | ||
|
||
snapshot.next_available_leaf_index += 1; | ||
|
||
snapshot | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,16 @@ | ||
use crate::traits::Empty; | ||
|
||
pub trait LeafPreimage { | ||
fn get_key(self) -> Field; | ||
fn as_leaf(self) -> Field; | ||
} | ||
|
||
pub trait IndexedTreeLeafPreimage { | ||
pub trait IndexedTreeLeafPreimage: Empty { | ||
fn get_key(self) -> Field; | ||
fn get_next_key(self) -> Field; | ||
fn as_leaf(self) -> Field; | ||
} | ||
|
||
pub trait IndexedTreeLeafValue: Eq + Empty { | ||
fn get_key(self) -> Field; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ask me for review later, it's rare to need so many smart ptrs :)