From 46494d261cbedd3c798f584459e42ab7ee6ea1f4 Mon Sep 17 00:00:00 2001 From: "Hans Elias B. Josephsen" Date: Wed, 27 Feb 2019 13:14:57 +0100 Subject: [PATCH] implement hashing for term --- rustler/src/term.rs | 35 +++++++++++++++++++ rustler/src/types/local_pid.rs | 2 +- rustler_tests/lib/rustler_test.ex | 2 ++ rustler_tests/native/rustler_test/src/lib.rs | 2 ++ .../native/rustler_test/src/test_term.rs | 10 ++++++ rustler_tests/test/term_test.exs | 23 ++++++++++++ 6 files changed, 73 insertions(+), 1 deletion(-) diff --git a/rustler/src/term.rs b/rustler/src/term.rs index 3158978c..d9a3e149 100644 --- a/rustler/src/term.rs +++ b/rustler/src/term.rs @@ -4,6 +4,7 @@ use crate::wrapper::NIF_TERM; use crate::{Binary, Decoder, Env, NifResult}; use std::cmp::Ordering; use std::fmt::{self, Debug}; +use std::hash::{Hash, Hasher}; /// Term is used to represent all erlang terms. Terms are always lifetime limited by a Env. /// @@ -95,6 +96,31 @@ impl<'a> Term<'a> { let raw_binary = unsafe { term_to_binary(self.env.as_c_arg(), self.as_c_arg()) }.unwrap(); unsafe { OwnedBinary::from_raw(raw_binary) } } + + /// Non-portable hash function that only guarantees the same hash for the same term within + /// one Erlang VM instance. + /// + /// It takes 32-bit salt values and generates hashes within 0..2^32-1. + pub fn hash_internal(&self, salt: u32) -> u32 { + unsafe { + rustler_sys::enif_hash( + rustler_sys::ErlNifHash::ERL_NIF_INTERNAL_HASH, + self.as_c_arg(), + salt as u64, + ) as u32 + } + } + + /// Portable hash function that gives the same hash for the same Erlang term regardless of + /// machine architecture and ERTS version. + /// + /// It generates hashes within 0..2^27-1. + pub fn hash_phash2(&self) -> u32 { + unsafe { + rustler_sys::enif_hash(rustler_sys::ErlNifHash::ERL_NIF_PHASH2, self.as_c_arg(), 0) + as u32 + } + } } impl<'a> PartialEq for Term<'a> { @@ -124,5 +150,14 @@ impl<'a> PartialOrd for Term<'a> { } } +impl<'a> Hash for Term<'a> { + fn hash(&self, state: &mut H) { + // As far as I can see, there is really no way + // to get a seed from the hasher. This is definitely + // not optimal, but it's the best we can do for now. + state.write_u32(self.hash_internal(0)); + } +} + unsafe impl<'a> Sync for Term<'a> {} unsafe impl<'a> Send for Term<'a> {} diff --git a/rustler/src/types/local_pid.rs b/rustler/src/types/local_pid.rs index c1b50779..d5ee1980 100644 --- a/rustler/src/types/local_pid.rs +++ b/rustler/src/types/local_pid.rs @@ -2,7 +2,7 @@ use crate::wrapper::{pid, ErlNifPid}; use crate::{Decoder, Encoder, Env, Error, NifResult, Term}; use std::mem::MaybeUninit; -#[derive(Clone)] +#[derive(Copy, Clone)] pub struct LocalPid { c: ErlNifPid, } diff --git a/rustler_tests/lib/rustler_test.ex b/rustler_tests/lib/rustler_test.ex index d3b16a1b..ec351e21 100644 --- a/rustler_tests/lib/rustler_test.ex +++ b/rustler_tests/lib/rustler_test.ex @@ -23,6 +23,8 @@ defmodule RustlerTest do def term_debug(_), do: err() def term_eq(_, _), do: err() def term_cmp(_, _), do: err() + def term_internal_hash(_, _), do: err() + def term_phash2_hash(_), do: err() def sum_map_values(_), do: err() def map_entries_sorted(_), do: err() diff --git a/rustler_tests/native/rustler_test/src/lib.rs b/rustler_tests/native/rustler_test/src/lib.rs index 8dadead8..402637f5 100644 --- a/rustler_tests/native/rustler_test/src/lib.rs +++ b/rustler_tests/native/rustler_test/src/lib.rs @@ -26,6 +26,8 @@ rustler::init!( test_term::term_debug, test_term::term_eq, test_term::term_cmp, + test_term::term_internal_hash, + test_term::term_phash2_hash, test_map::sum_map_values, test_map::map_entries_sorted, test_map::map_from_arrays, diff --git a/rustler_tests/native/rustler_test/src/test_term.rs b/rustler_tests/native/rustler_test/src/test_term.rs index e23eb4db..a499d80f 100644 --- a/rustler_tests/native/rustler_test/src/test_term.rs +++ b/rustler_tests/native/rustler_test/src/test_term.rs @@ -30,3 +30,13 @@ pub fn term_cmp<'a>(a: Term<'a>, b: Term<'a>) -> Atom { Ordering::Greater => atoms::greater(), } } + +#[rustler::nif] +pub fn term_internal_hash(term: Term, salt: u32) -> u32 { + term.hash_internal(salt) +} + +#[rustler::nif] +pub fn term_phash2_hash(term: Term) -> u32 { + term.hash_phash2() +} diff --git a/rustler_tests/test/term_test.exs b/rustler_tests/test/term_test.exs index 5c54c1d8..4a68f75e 100644 --- a/rustler_tests/test/term_test.exs +++ b/rustler_tests/test/term_test.exs @@ -48,4 +48,27 @@ defmodule RustlerTest.TermTest do # Other term types assert RustlerTest.term_cmp(5, :test) == :less end + + test "term hash" do + assert RustlerTest.term_phash2_hash(:foobar) == :erlang.phash2(:foobar) + assert RustlerTest.term_phash2_hash("testing") == :erlang.phash2("testing") + assert RustlerTest.term_phash2_hash(42) == :erlang.phash2(42) + + # Assume a certain distribution + unique = + 0..100 + |> Enum.map(&RustlerTest.term_phash2_hash(&1)) + |> Enum.group_by(fn n -> n end, fn n -> n end) + |> map_size + + assert unique > 50 + + unique = + 0..100 + |> Enum.map(&RustlerTest.term_internal_hash(&1, 0)) + |> Enum.group_by(fn n -> n end, fn n -> n end) + |> map_size + + assert unique > 50 + end end