Skip to content

Commit

Permalink
Code for gathering stats about extremely slow HashMap operations
Browse files Browse the repository at this point in the history
  • Loading branch information
pczarn committed Feb 7, 2017
1 parent 76c3875 commit 0127ceb
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ repository = "https://github.com/carllerche/hashmap2"
homepage = "https://github.com/carllerche/hashmap2"

[dependencies]
rand = "0.3.12"
rand = "0.3.15"
26 changes: 26 additions & 0 deletions examples/stats.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
extern crate hashmap2;
extern crate rand;

use hashmap2::HashMap;
use rand::Rng;

fn main() {
let mut map: HashMap<i32, ()> = HashMap::new();
assert_eq!(map.len(), 0);
let mut rng = rand::weak_rng();
let mut iter = rng.gen_iter();
let len = 2 << 20;
let usable_cap = (len as f32 * 0.833) as usize;
let mut stats = vec![];
for _ in 0..10000 {
while map.len() < usable_cap {
map.insert(iter.next().unwrap(), ());
}
map.stats(&mut stats);
map.clear();
}
for (i, (displacement, forward_shift)) in stats.into_iter().enumerate() {
println!("{}: {}\t{}", i, displacement, forward_shift);
}
println!("map len={:?} capacity={:?}", map.len(), map.capacity());
}
2 changes: 1 addition & 1 deletion src/adaptive_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use HashMap;

// Beyond this displacement, we switch to safe hashing or grow the table.
const DISPLACEMENT_THRESHOLD: usize = 128;
const FORWARD_SHIFT_THRESHOLD: usize = 512;
const FORWARD_SHIFT_THRESHOLD: usize = 1024;
// When the map's load factor is below this threshold, we switch to safe hashing.
// Otherwise, we grow the table.
// const LOAD_FACTOR_THRESHOLD: f32 = 0.625;
Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1239,6 +1239,10 @@ impl<K, V, S> HashMap<K, V, S>

self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket))
}

pub fn stats(&self, stats: &mut Vec<(u64, u64)>) {
self.table.stats(stats);
}
}

// Not copying this requires specialization
Expand Down
33 changes: 33 additions & 0 deletions src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,39 @@ fn test_offset_calculation() {
}

impl<K, V> RawTable<K, V> {
pub fn stats(&self, stats: &mut Vec<(u64, u64)>) {
if stats.is_empty() {
*stats = vec![(0, 0); 2000];
}
// stats.clear();
// stats.extend(iter::repeat((0, 0)).take(1000));
let mut iter = self.raw_buckets();
let first_hash = iter.raw.hash;
let mut latest_hash = iter.raw.hash;
let mut chunk_info = vec![];
while let Some(raw) = iter.next() {
let num_empty = (raw.hash as usize - latest_hash as usize) / size_of::<u64>();
let idx = (raw.hash as usize - first_hash as usize) / size_of::<u64>();
stats[0].0 += num_empty as u64;
stats[0].1 += num_empty as u64;
if num_empty > 0 {
for n in chunk_info.drain(..) {
stats[n as usize].1 += 1;
}
} else {
for n in chunk_info.iter_mut() {
*n += 1;
}
}
chunk_info.push(0);
let ib = unsafe {
(*raw.hash) as usize & (self.capacity - 1)
};
let displacement = (idx as isize - ib as isize) as usize & (self.capacity - 1);
stats[displacement].0 += 1;
latest_hash = iter.raw.hash;
}
}
/// Does not initialize the buckets. The caller should ensure they,
/// at the very least, set every hash to EMPTY_BUCKET.
unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {
Expand Down

0 comments on commit 0127ceb

Please sign in to comment.