From 0127ceb1dde4c313f5953dabedc2bc974df98113 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Tue, 7 Feb 2017 15:23:35 +0100 Subject: [PATCH] Code for gathering stats about extremely slow HashMap operations --- Cargo.toml | 2 +- examples/stats.rs | 26 ++++++++++++++++++++++++++ src/adaptive_map.rs | 2 +- src/lib.rs | 4 ++++ src/table.rs | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 examples/stats.rs diff --git a/Cargo.toml b/Cargo.toml index 942d483..926927b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,4 +36,4 @@ repository = "https://github.com/carllerche/hashmap2" homepage = "https://github.com/carllerche/hashmap2" [dependencies] -rand = "0.3.12" +rand = "0.3.15" diff --git a/examples/stats.rs b/examples/stats.rs new file mode 100644 index 0000000..d1f5655 --- /dev/null +++ b/examples/stats.rs @@ -0,0 +1,26 @@ +extern crate hashmap2; +extern crate rand; + +use hashmap2::HashMap; +use rand::Rng; + +fn main() { + let mut map: HashMap = HashMap::new(); + assert_eq!(map.len(), 0); + let mut rng = rand::weak_rng(); + let mut iter = rng.gen_iter(); + let len = 2 << 20; + let usable_cap = (len as f32 * 0.833) as usize; + let mut stats = vec![]; + for _ in 0..10000 { + while map.len() < usable_cap { + map.insert(iter.next().unwrap(), ()); + } + map.stats(&mut stats); + map.clear(); + } + for (i, (displacement, forward_shift)) in stats.into_iter().enumerate() { + println!("{}: {}\t{}", i, displacement, forward_shift); + } + println!("map len={:?} capacity={:?}", map.len(), map.capacity()); +} diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index bb6b1ca..decbdd6 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -22,7 +22,7 @@ use HashMap; // Beyond this displacement, we switch to safe hashing or grow the table. const DISPLACEMENT_THRESHOLD: usize = 128; -const FORWARD_SHIFT_THRESHOLD: usize = 512; +const FORWARD_SHIFT_THRESHOLD: usize = 1024; // When the map's load factor is below this threshold, we switch to safe hashing. // Otherwise, we grow the table. // const LOAD_FACTOR_THRESHOLD: f32 = 0.625; diff --git a/src/lib.rs b/src/lib.rs index 2cfe896..fa91a20 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1239,6 +1239,10 @@ impl HashMap self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket)) } + + pub fn stats(&self, stats: &mut Vec<(u64, u64)>) { + self.table.stats(stats); + } } // Not copying this requires specialization diff --git a/src/table.rs b/src/table.rs index ae06d5a..9a292aa 100644 --- a/src/table.rs +++ b/src/table.rs @@ -590,6 +590,39 @@ fn test_offset_calculation() { } impl RawTable { + pub fn stats(&self, stats: &mut Vec<(u64, u64)>) { + if stats.is_empty() { + *stats = vec![(0, 0); 2000]; + } + // stats.clear(); + // stats.extend(iter::repeat((0, 0)).take(1000)); + let mut iter = self.raw_buckets(); + let first_hash = iter.raw.hash; + let mut latest_hash = iter.raw.hash; + let mut chunk_info = vec![]; + while let Some(raw) = iter.next() { + let num_empty = (raw.hash as usize - latest_hash as usize) / size_of::(); + let idx = (raw.hash as usize - first_hash as usize) / size_of::(); + stats[0].0 += num_empty as u64; + stats[0].1 += num_empty as u64; + if num_empty > 0 { + for n in chunk_info.drain(..) { + stats[n as usize].1 += 1; + } + } else { + for n in chunk_info.iter_mut() { + *n += 1; + } + } + chunk_info.push(0); + let ib = unsafe { + (*raw.hash) as usize & (self.capacity - 1) + }; + let displacement = (idx as isize - ib as isize) as usize & (self.capacity - 1); + stats[displacement].0 += 1; + latest_hash = iter.raw.hash; + } + } /// Does not initialize the buckets. The caller should ensure they, /// at the very least, set every hash to EMPTY_BUCKET. unsafe fn new_uninitialized(capacity: usize) -> RawTable {