Skip to content

Commit

Permalink
implement hashing for term
Browse files Browse the repository at this point in the history
  • Loading branch information
hansihe committed Nov 29, 2021
1 parent eb0aafa commit 46494d2
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 1 deletion.
35 changes: 35 additions & 0 deletions rustler/src/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::wrapper::NIF_TERM;
use crate::{Binary, Decoder, Env, NifResult};
use std::cmp::Ordering;
use std::fmt::{self, Debug};
use std::hash::{Hash, Hasher};

/// Term is used to represent all erlang terms. Terms are always lifetime limited by a Env.
///
Expand Down Expand Up @@ -95,6 +96,31 @@ impl<'a> Term<'a> {
let raw_binary = unsafe { term_to_binary(self.env.as_c_arg(), self.as_c_arg()) }.unwrap();
unsafe { OwnedBinary::from_raw(raw_binary) }
}

/// Non-portable hash function that only guarantees the same hash for the same term within
/// one Erlang VM instance.
///
/// It takes 32-bit salt values and generates hashes within 0..2^32-1.
pub fn hash_internal(&self, salt: u32) -> u32 {
unsafe {
rustler_sys::enif_hash(
rustler_sys::ErlNifHash::ERL_NIF_INTERNAL_HASH,
self.as_c_arg(),
salt as u64,
) as u32
}
}

/// Portable hash function that gives the same hash for the same Erlang term regardless of
/// machine architecture and ERTS version.
///
/// It generates hashes within 0..2^27-1.
pub fn hash_phash2(&self) -> u32 {
unsafe {
rustler_sys::enif_hash(rustler_sys::ErlNifHash::ERL_NIF_PHASH2, self.as_c_arg(), 0)
as u32
}
}
}

impl<'a> PartialEq for Term<'a> {
Expand Down Expand Up @@ -124,5 +150,14 @@ impl<'a> PartialOrd for Term<'a> {
}
}

impl<'a> Hash for Term<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
// As far as I can see, there is really no way
// to get a seed from the hasher. This is definitely
// not optimal, but it's the best we can do for now.
state.write_u32(self.hash_internal(0));
}
}

unsafe impl<'a> Sync for Term<'a> {}
unsafe impl<'a> Send for Term<'a> {}
2 changes: 1 addition & 1 deletion rustler/src/types/local_pid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::wrapper::{pid, ErlNifPid};
use crate::{Decoder, Encoder, Env, Error, NifResult, Term};
use std::mem::MaybeUninit;

#[derive(Clone)]
#[derive(Copy, Clone)]
pub struct LocalPid {
c: ErlNifPid,
}
Expand Down
2 changes: 2 additions & 0 deletions rustler_tests/lib/rustler_test.ex
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ defmodule RustlerTest do
def term_debug(_), do: err()
def term_eq(_, _), do: err()
def term_cmp(_, _), do: err()
def term_internal_hash(_, _), do: err()
def term_phash2_hash(_), do: err()

def sum_map_values(_), do: err()
def map_entries_sorted(_), do: err()
Expand Down
2 changes: 2 additions & 0 deletions rustler_tests/native/rustler_test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ rustler::init!(
test_term::term_debug,
test_term::term_eq,
test_term::term_cmp,
test_term::term_internal_hash,
test_term::term_phash2_hash,
test_map::sum_map_values,
test_map::map_entries_sorted,
test_map::map_from_arrays,
Expand Down
10 changes: 10 additions & 0 deletions rustler_tests/native/rustler_test/src/test_term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,13 @@ pub fn term_cmp<'a>(a: Term<'a>, b: Term<'a>) -> Atom {
Ordering::Greater => atoms::greater(),
}
}

#[rustler::nif]
pub fn term_internal_hash(term: Term, salt: u32) -> u32 {
term.hash_internal(salt)
}

#[rustler::nif]
pub fn term_phash2_hash(term: Term) -> u32 {
term.hash_phash2()
}
23 changes: 23 additions & 0 deletions rustler_tests/test/term_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,27 @@ defmodule RustlerTest.TermTest do
# Other term types
assert RustlerTest.term_cmp(5, :test) == :less
end

test "term hash" do
assert RustlerTest.term_phash2_hash(:foobar) == :erlang.phash2(:foobar)
assert RustlerTest.term_phash2_hash("testing") == :erlang.phash2("testing")
assert RustlerTest.term_phash2_hash(42) == :erlang.phash2(42)

# Assume a certain distribution
unique =
0..100
|> Enum.map(&RustlerTest.term_phash2_hash(&1))
|> Enum.group_by(fn n -> n end, fn n -> n end)
|> map_size

assert unique > 50

unique =
0..100
|> Enum.map(&RustlerTest.term_internal_hash(&1, 0))
|> Enum.group_by(fn n -> n end, fn n -> n end)
|> map_size

assert unique > 50
end
end

0 comments on commit 46494d2

Please sign in to comment.