Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize ReadCacheLookup, 6.7 speed-up #517

Merged
merged 4 commits into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ serde = "1.0.214"
serde_json = "1.0.133"
clap = "4.5.20"
rand_chacha = "0.3.1"
bitvec = "1.0.1"

[dependencies]
lazy_static = { workspace = true }
Expand All @@ -70,11 +71,13 @@ num-integer = { workspace = true }
chia-bls = { workspace = true }
chia-sha2 = { workspace = true }
hex-literal = { workspace = true }
bitvec = { workspace = true }
# for secp sigs
k256 = { version = "0.13.4", features = ["ecdsa"] }
p256 = { version = "0.13.2", features = ["ecdsa"] }
# for keccak256
sha3 = "0.10.8"
rand = { workspace = true }

[dev-dependencies]
rstest = { workspace = true }
Expand Down
1 change: 0 additions & 1 deletion benches/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ fn serialize_benchmark(c: &mut Criterion) {
let block1: &[u8] = include_bytes!("1.generator");

let mut group = c.benchmark_group("serialize");
group.sample_size(10);

for (block, name) in [(&block0, "0"), (&block1, "1")] {
let mut a = Allocator::new();
Expand Down
43 changes: 43 additions & 0 deletions src/serde/identity_hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use rand::Rng;
use std::hash::{BuildHasher, Hasher};

#[derive(Default, Clone, Copy)]
pub struct IdentityHash(u64, u64);

impl IdentityHash {
fn new(salt: u64) -> Self {
Self(0, salt)
}
}

impl Hasher for IdentityHash {
fn finish(&self) -> u64 {
self.0
}

fn write(&mut self, bytes: &[u8]) {
self.0 =
u64::from_le_bytes(bytes[0..8].try_into().expect("expected 32 byte hashes")) ^ self.1;
}

fn write_u64(&mut self, _i: u64) {
panic!("This hasher only takes bytes");
}
}

pub struct RandomState(u64);

impl Default for RandomState {
fn default() -> Self {
let mut rng = rand::thread_rng();
Self(rng.gen())
}
}

impl BuildHasher for RandomState {
type Hasher = IdentityHash;

fn build_hasher(&self) -> Self::Hasher {
IdentityHash::new(self.0)
}
}
2 changes: 2 additions & 0 deletions src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod de;
mod de_br;
mod de_tree;
mod errors;
mod identity_hash;
mod incremental;
mod object_cache;
mod parse_atom;
Expand All @@ -19,6 +20,7 @@ mod test;
pub use de::node_from_bytes;
pub use de_br::{node_from_bytes_backrefs, node_from_bytes_backrefs_record};
pub use de_tree::{parse_triples, ParsedTriple};
pub use identity_hash::RandomState;
pub use incremental::{Serializer, UndoState};
pub use object_cache::{serialized_length, treehash, ObjectCache};
pub use ser::{node_to_bytes, node_to_bytes_limit};
Expand Down
86 changes: 52 additions & 34 deletions src/serde/read_cache_lookup.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use crate::serde::RandomState;
use bitvec::prelude::*;
use bitvec::vec::BitVec;
/// When deserializing a clvm object, a stack of deserialized child objects
/// is created, which can be used with back-references. A `ReadCacheLookup` keeps
/// track of the state of this stack and all child objects under each root
Expand Down Expand Up @@ -28,10 +31,10 @@ pub struct ReadCacheLookup {
/// the tree hashes of the contents on the left and right
read_stack: Vec<(Bytes32, Bytes32)>,

count: HashMap<Bytes32, u32>,
count: HashMap<Bytes32, u32, RandomState>,

/// a mapping of tree hashes to `(parent, is_right)` tuples
parent_lookup: HashMap<Bytes32, Vec<(Bytes32, u8)>>,
parent_lookup: HashMap<Bytes32, Vec<(Bytes32, bool)>, RandomState>,
}

impl Default for ReadCacheLookup {
Expand All @@ -43,10 +46,12 @@ impl Default for ReadCacheLookup {
impl ReadCacheLookup {
pub fn new() -> Self {
let root_hash = hash_blob(&[1]);
let read_stack = vec![];
let mut count = HashMap::default();
let read_stack = Vec::with_capacity(1000);
// all keys in count and parent_lookup are tree-hashes. There's no need
// to hash them again for the hash map
let mut count = HashMap::with_hasher(RandomState::default());
count.insert(root_hash, 1);
let parent_lookup = HashMap::default();
let parent_lookup = HashMap::with_hasher(RandomState::default());
Self {
root_hash,
read_stack,
Expand All @@ -67,13 +72,13 @@ impl ReadCacheLookup {
*self.count.entry(id).or_insert(0) += 1;
*self.count.entry(new_root_hash).or_insert(0) += 1;

let new_parent_to_old_root = (new_root_hash, 0);
let new_parent_to_old_root = (new_root_hash, false);
self.parent_lookup
.entry(id)
.or_default()
.push(new_parent_to_old_root);

let new_parent_to_id = (new_root_hash, 1);
let new_parent_to_id = (new_root_hash, true);
self.parent_lookup
.entry(self.root_hash)
.or_default()
Expand Down Expand Up @@ -108,31 +113,41 @@ impl ReadCacheLookup {
self.parent_lookup
.entry(left.0)
.or_default()
.push((new_root_hash, 0));
.push((new_root_hash, false));

self.parent_lookup
.entry(right.0)
.or_default()
.push((new_root_hash, 1));
.push((new_root_hash, true));

self.push(new_root_hash);
}

/// return the list of minimal-length paths to the given hash which will serialize to no larger
/// than the given size (or an empty list if no such path exists)
pub fn find_paths(&self, id: &Bytes32, serialized_length: u64) -> Vec<Vec<u8>> {
let mut seen_ids = HashSet::<&Bytes32>::default();
let mut possible_responses = vec![];
if serialized_length < 3 {
return possible_responses;
// this function is not cheap. only keep going if there's potential to
// save enough bytes
if serialized_length < 4 {
return vec![];
}
assert!(serialized_length > 2);

let mut possible_responses = Vec::with_capacity(50);

// all the values we put in this hash set are themselves sha256 hashes.
// There's no point in hashing the hashes
let mut seen_ids = HashSet::<&Bytes32, RandomState>::with_capacity_and_hasher(
1000,
RandomState::default(),
);

let max_bytes_for_path_encoding = serialized_length - 2; // 1 byte for 0xfe, 1 min byte for savings
let max_path_length: usize = (max_bytes_for_path_encoding.saturating_mul(8) - 1)
.try_into()
.unwrap_or(usize::MAX);
seen_ids.insert(id);
let mut partial_paths = vec![(*id, vec![])];
let mut partial_paths = Vec::with_capacity(500);
partial_paths.push((*id, BitVec::with_capacity(100)));

while !partial_paths.is_empty() {
let mut new_partial_paths = vec![];
Expand All @@ -147,11 +162,11 @@ impl ReadCacheLookup {
for (parent, direction) in items.iter() {
if *(self.count.get(parent).unwrap_or(&0)) > 0 && !seen_ids.contains(parent)
{
let mut new_path = path.clone();
new_path.push(*direction);
if new_path.len() > max_path_length {
if path.len() + 1 > max_path_length {
richardkiss marked this conversation as resolved.
Show resolved Hide resolved
return possible_responses;
}
let mut new_path = path.clone();
new_path.push(*direction);
new_partial_paths.push((*parent, new_path));
}
seen_ids.insert(parent);
Expand Down Expand Up @@ -185,13 +200,13 @@ impl ReadCacheLookup {
/// If `A` => `v` then `[A] + [0]` => `v * 2` and `[A] + [1]` => `v * 2 + 1`
/// Then the integer is turned into the minimal-length array of `u8` representing
/// that value as an unsigned integer.
fn reversed_path_to_vec_u8(path: &[u8]) -> Vec<u8> {
fn reversed_path_to_vec_u8(path: &BitSlice) -> Vec<u8> {
richardkiss marked this conversation as resolved.
Show resolved Hide resolved
let byte_count = (path.len() + 1 + 7) >> 3;
let mut v = vec![0; byte_count];
let mut index = byte_count - 1;
let mut mask: u8 = 1;
for p in path.iter().rev() {
if *p != 0 {
if p != false {
v[index] |= mask;
}
mask = {
Expand All @@ -213,30 +228,33 @@ mod tests {

#[test]
fn test_path_to_vec_u8() {
assert_eq!(reversed_path_to_vec_u8(&[]), vec!(0b1));
assert_eq!(reversed_path_to_vec_u8(&[0]), vec!(0b10));
assert_eq!(reversed_path_to_vec_u8(&[1]), vec!(0b11));
assert_eq!(reversed_path_to_vec_u8(&[0, 0]), vec!(0b100));
assert_eq!(reversed_path_to_vec_u8(&[0, 1]), vec!(0b101));
assert_eq!(reversed_path_to_vec_u8(&[1, 0]), vec!(0b110));
assert_eq!(reversed_path_to_vec_u8(&[1, 1]), vec!(0b111));
assert_eq!(reversed_path_to_vec_u8(&[1, 1, 1]), vec!(0b1111));
assert_eq!(reversed_path_to_vec_u8(&[0, 1, 1, 1]), vec!(0b10111));
assert_eq!(reversed_path_to_vec_u8(&[1, 0, 1, 1, 1]), vec!(0b110111));
assert_eq!(reversed_path_to_vec_u8(bits![]), vec!(0b1));
assert_eq!(reversed_path_to_vec_u8(bits![0]), vec!(0b10));
assert_eq!(reversed_path_to_vec_u8(bits![1]), vec!(0b11));
assert_eq!(reversed_path_to_vec_u8(bits![0, 0]), vec!(0b100));
assert_eq!(reversed_path_to_vec_u8(bits![0, 1]), vec!(0b101));
assert_eq!(reversed_path_to_vec_u8(bits![1, 0]), vec!(0b110));
assert_eq!(reversed_path_to_vec_u8(bits![1, 1]), vec!(0b111));
assert_eq!(reversed_path_to_vec_u8(bits![1, 1, 1]), vec!(0b1111));
assert_eq!(reversed_path_to_vec_u8(bits![0, 1, 1, 1]), vec!(0b10111));
assert_eq!(
reversed_path_to_vec_u8(bits![1, 0, 1, 1, 1]),
vec!(0b110111)
);
assert_eq!(
reversed_path_to_vec_u8(&[1, 1, 0, 1, 1, 1]),
reversed_path_to_vec_u8(bits![1, 1, 0, 1, 1, 1]),
vec!(0b1110111)
);
assert_eq!(
reversed_path_to_vec_u8(&[0, 1, 1, 0, 1, 1, 1]),
reversed_path_to_vec_u8(bits![0, 1, 1, 0, 1, 1, 1]),
vec!(0b10110111)
);
assert_eq!(
reversed_path_to_vec_u8(&[0, 0, 1, 1, 0, 1, 1, 1]),
reversed_path_to_vec_u8(bits![0, 0, 1, 1, 0, 1, 1, 1]),
vec!(0b1, 0b00110111)
);
assert_eq!(
reversed_path_to_vec_u8(&[1, 0, 0, 1, 1, 0, 1, 1, 1]),
reversed_path_to_vec_u8(bits![1, 0, 0, 1, 1, 0, 1, 1, 1]),
vec!(0b11, 0b00110111)
);
}
Expand Down
Loading