From a27699393bfeb34b582d6f79c3d99cf5a9773f1d Mon Sep 17 00:00:00 2001
From: Akzhan Abdulin <akzhan.abdulin@gmail.com>
Date: Sat, 11 Nov 2017 21:07:46 +0300
Subject: [PATCH 1/6] Introduces real Number normalization for Crystal::Hasher.

As declared by Crystal language reference, 1i32.hash should equal to 1f64.hash.

Extracted from #4675, also replaces #4581.
---
 spec/std/crystal/hasher_spec.cr |  24 +++++-
 src/crystal/hasher.cr           | 145 +++++++++++++++++++++++++++++++-
 2 files changed, 164 insertions(+), 5 deletions(-)

diff --git a/spec/std/crystal/hasher_spec.cr b/spec/std/crystal/hasher_spec.cr
index 123909b69dcd..ebb67ef94968 100644
--- a/spec/std/crystal/hasher_spec.cr
+++ b/spec/std/crystal/hasher_spec.cr
@@ -1,5 +1,6 @@
 require "spec"
 require "bit_array"
+require "big"
 require "random/secure"
 
 struct Crystal::Hasher
@@ -51,6 +52,15 @@ describe "Crystal::Hasher" do
       2.hash.should eq(2_u64.hash)
     end
 
+    it "Big i64 numbers should be hashed ok" do
+      Int64::MAX.hash.should eq (Int64::MAX.hash)
+    end
+
+    pending "128bit types should be hashed ok" do
+      1.to_i128.hash.should eq (1_i8.hash)
+      1.to_u128.hash.should eq (1_u8.hash)
+    end
+
     it "#float should change state and differ" do
       hasher = TestHasher.for_test
       hasher1 = 1.0.hash(hasher)
@@ -191,8 +201,8 @@ describe "Crystal::Hasher" do
       hasher = TestHasher.for_test
       hasher1 = 1.0.hash(hasher)
       hasher2 = 2.0.hash(hasher)
-      hasher1.result.should eq(0xecfbe7798e8f67f2_u64)
-      hasher2.result.should eq(0x72847386c9572c30_u64)
+      hasher1.result.should eq(10728791798497425537_u64)
+      hasher2.result.should eq(12628815283865879015_u64)
     end
 
     it "#string should match test vectors" do
@@ -229,4 +239,14 @@ describe "Crystal::Hasher" do
       hasher.inspect.should_not contain(hasher.@b.to_s(16))
     end
   end
+
+  describe "normalization of numbers" do
+    it "should 1_i32 and 1_f64 hashes equal" do
+      1_i32.hash.should eq(1_f64.hash)
+    end
+
+    it "should 1_f32 and 1.to_big_f hashes equal" do
+      1_f32.hash.should eq(1.to_big_f.hash)
+    end
+  end
 end
diff --git a/src/crystal/hasher.cr b/src/crystal/hasher.cr
index 91b6feb6e4e4..f404c31cb136 100644
--- a/src/crystal/hasher.cr
+++ b/src/crystal/hasher.cr
@@ -35,6 +35,52 @@ struct Crystal::Hasher
   # Do not output calculated hash value to user's console/form/
   # html/api response, etc. Use some from digest package instead.
 
+  # Based on https://github.com/python/cpython/blob/f051e43/Python/pyhash.c#L34
+  #
+  # For numeric types, the hash of a number x is based on the reduction
+  # of x modulo the Mersen Prime P = 2**HASH_BITS - 1.  It's designed
+  # so that hash(x) == hash(y) whenever x and y are numerically equal,
+  # even if x and y have different types.
+  # A quick summary of the hashing strategy:
+  # (1) First define the 'reduction of x modulo P' for any rational
+  # number x; this is a standard extension of the usual notion of
+  # reduction modulo P for integers.  If x == p/q (written in lowest
+  # terms), the reduction is interpreted as the reduction of p times
+  # the inverse of the reduction of q, all modulo P; if q is exactly
+  # divisible by P then define the reduction to be infinity.  So we've
+  # got a well-defined map
+  #   reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
+  # (2) Now for a rational number x, define hash(x) by:
+  #   reduce(x)   if x >= 0
+  #   -reduce(-x) if x < 0
+  # If the result of the reduction is infinity (this is impossible for
+  # integers, floats and Decimals) then use the predefined hash value
+  # HASH_INF_PLUS for x >= 0, or HASH_INF_MINUS for x < 0, instead.
+  # HASH_INF_PLUS, HASH_INF_MINUS and HASH_NAN are also used for the
+  # hashes of float and Decimal infinities and nans.
+  # A selling point for the above strategy is that it makes it possible
+  # to compute hashes of decimal and binary floating-point numbers
+  # efficiently, even if the exponent of the binary or decimal number
+  # is large.  The key point is that
+  #   reduce(x * y) == reduce(x) * reduce(y) (modulo HASH_MODULUS)
+  # provided that {reduce(x), reduce(y)} != {0, infinity}.  The reduction of a
+  # binary or decimal float is never infinity, since the denominator is a power
+  # of 2 (for binary) or a divisor of a power of 10 (for decimal).  So we have,
+  # for nonnegative x,
+  #   reduce(x * 2**e) == reduce(x) * reduce(2**e) % HASH_MODULUS
+  #   reduce(x * 10**e) == reduce(x) * reduce(10**e) % HASH_MODULUS
+  # and reduce(10**e) can be computed efficiently by the usual modular
+  # exponentiation algorithm.  For reduce(2**e) it's even better: since
+  # P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
+  # by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
+
+  private HASH_BITS    = 61
+  private HASH_MODULUS = (1_i64 << HASH_BITS) - 1
+
+  private HASH_NAN       =      0_u64
+  private HASH_INF_PLUS  = 314159_u64
+  private HASH_INF_MINUS = (-314159_i64).unsafe_as(UInt64)
+
   @@seed = uninitialized UInt64[2]
   Random::Secure.random_bytes(Slice.new(pointerof(@@seed).as(UInt8*), sizeof(typeof(@@seed))))
 
@@ -75,12 +121,105 @@ struct Crystal::Hasher
     (value ? 1 : 0).hash(self)
   end
 
-  def int(value)
+  def int(value : Int8 | Int16 | Int32)
+    permute(value.to_i64.unsafe_as(UInt64))
+  end
+
+  def int(value : UInt8 | UInt16 | UInt32)
     permute(value.to_u64)
   end
 
-  def float(value)
-    permute(value.to_f64.unsafe_as(UInt64))
+  def int(value : Int::Unsigned)
+    permute(value.remainder(HASH_MODULUS).to_u64)
+  end
+
+  def int(value : Int)
+    permute(value.remainder(HASH_MODULUS).to_i64.unsafe_as(UInt64))
+  end
+
+  # This function is for reference implementation, and it is used for `BigFloat`.
+  # For `Float64` and `Float32` all supported architectures allows more effective
+  # bitwise calculation.
+  # Arguments `frac` and `exp` are result of equivalent `Math.frexp`, though
+  # for `BigFloat` custom calculation used for more precision.
+  private def float_normalize_reference(value, frac, exp)
+    if value < 0
+      frac = -frac
+    end
+    # process 28 bits at a time;  this should work well both for binary
+    # and hexadecimal floating point.
+    x = 0_i64
+    while frac > 0
+      x = ((x << 28) & HASH_MODULUS) | x >> (HASH_BITS - 28)
+      frac *= 268435456.0 # 2**28
+      exp -= 28
+      y = frac.to_u32 # pull out integer part
+      frac -= y
+      x += y
+      x -= HASH_MODULUS if x >= HASH_MODULUS
+    end
+    {x, exp}
+  end
+
+  private def float_normalize_wrap(value)
+    return HASH_NAN if value.nan?
+    if value.infinite?
+      return value > 0 ? HASH_INF_PLUS : HASH_INF_MINUS
+    end
+
+    x, exp = yield value
+
+    # adjust for the exponent;  first reduce it modulo HASH_BITS
+    exp = exp >= 0 ? exp % HASH_BITS : HASH_BITS - 1 - ((-1 - exp) % HASH_BITS)
+    x = ((x << exp) & HASH_MODULUS) | x >> (HASH_BITS - exp)
+
+    (x * (value < 0 ? -1 : 1)).to_i64.unsafe_as(UInt64)
+  end
+
+  def float(value : Float32)
+    permute(float_normalize_wrap(value) do |value|
+      # This optimized version works on every architecture where endianess
+      # of Float32 and Int32 matches and float is IEEE754. All supported
+      # architectures fall into this category.
+      unsafe_int = value.unsafe_as(Int32)
+      exp = (((unsafe_int >> 23) & 0xff) - 127)
+      mantissa = unsafe_int & ((1 << 23) - 1)
+      if exp > -127
+        exp -= 23
+        mantissa |= 1 << 23
+      else
+        # subnormals
+        exp -= 22
+      end
+      {mantissa.to_i64, exp}
+    end)
+  end
+
+  def float(value : Float64)
+    permute(float_normalize_wrap(value) do |value|
+      # This optimized version works on every architecture where endianess
+      # of Float64 and Int64 matches and float is IEEE754. All supported
+      # architectures fall into this category.
+      unsafe_int = value.unsafe_as(Int64)
+      exp = (((unsafe_int >> 52) & 0x7ff) - 1023)
+      mantissa = unsafe_int & ((1_u64 << 52) - 1)
+      if exp > -1023
+        exp -= 52
+        mantissa |= 1_u64 << 52
+      else
+        # subnormals
+        exp -= 51
+      end
+
+      {mantissa.to_i64, exp}
+    end)
+  end
+
+  def float(value : Float)
+    frac, exp = Math.frexp value
+    permute(float_normalize_wrap(value) do |value|
+      float_normalize_reference(value, frac, exp)
+    end)
   end
 
   def char(value)

From c46503e0fd30ddfeb17546719e6c83541988f48f Mon Sep 17 00:00:00 2001
From: Akzhan Abdulin <akzhan.abdulin@gmail.com>
Date: Sun, 12 Nov 2017 05:07:30 +0300
Subject: [PATCH 2/6] hash specializations for BigInt, BigFloat, BigRational.

---
 spec/std/crystal/hasher_spec.cr |  8 ++++++++
 src/big/big_float.cr            | 18 ++++++++++++++++++
 src/big/big_int.cr              | 18 ++++++++++++++++++
 src/big/big_rational.cr         | 18 ++++++++++++++++++
 src/crystal/hasher.cr           |  2 +-
 5 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/spec/std/crystal/hasher_spec.cr b/spec/std/crystal/hasher_spec.cr
index ebb67ef94968..c8f1b25be257 100644
--- a/spec/std/crystal/hasher_spec.cr
+++ b/spec/std/crystal/hasher_spec.cr
@@ -248,5 +248,13 @@ describe "Crystal::Hasher" do
     it "should 1_f32 and 1.to_big_f hashes equal" do
       1_f32.hash.should eq(1.to_big_f.hash)
     end
+
+    it "should 1_f32 and 1.to_big_r hashes equal" do
+      1_f32.hash.should eq(1.to_big_r.hash)
+    end
+
+    it "should 1_f32 and 1.to_big_i hashes equal" do
+      1_f32.hash.should eq(1.to_big_i.hash)
+    end
   end
 end
diff --git a/src/big/big_float.cr b/src/big/big_float.cr
index 9efdecaa20a6..a16017a05831 100644
--- a/src/big/big_float.cr
+++ b/src/big/big_float.cr
@@ -293,3 +293,21 @@ module Math
     BigFloat.new { |mpf| LibGMP.mpf_sqrt(mpf, value) }
   end
 end
+
+# :nodoc:
+struct Crystal::Hasher
+  def float(value : BigFloat)
+    permute(float_normalize_wrap(value) do |value|
+      # more exact version of `Math.frexp`
+      LibGMP.mpf_get_d_2exp(out exp, value)
+      frac = BigFloat.new do |mpf|
+        if exp >= 0
+          LibGMP.mpf_div_2exp(mpf, value, exp)
+        else
+          LibGMP.mpf_mul_2exp(mpf, value, -exp)
+        end
+      end
+      float_normalize_reference(value, frac, exp)
+    end)
+  end
+end
diff --git a/src/big/big_int.cr b/src/big/big_int.cr
index d73f97b5928b..1513028c719d 100644
--- a/src/big/big_int.cr
+++ b/src/big/big_int.cr
@@ -551,3 +551,21 @@ module Math
     sqrt(value.to_big_f)
   end
 end
+
+# :nodoc:
+struct Crystal::Hasher
+  private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
+  private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)
+
+  def int(value : BigInt)
+    # it should calculate `remainder(HASH_MODULUS)`
+    if LibGMP::ULong == UInt64
+      v = LibGMP.tdiv_ui(value, HASH_MODULUS).to_i64
+      value < 0 ? -v : v
+    elsif value >= HASH_MODULUS_INT_P || value <= HASH_MODULUS_INT_N
+      value.unsafe_truncated_mod(HASH_MODULUS_INT_P).to_i64
+    else
+      value.to_i64
+    end
+  end
+end
diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr
index e6cd39640a40..8949c8077f2c 100644
--- a/src/big/big_rational.cr
+++ b/src/big/big_rational.cr
@@ -275,3 +275,21 @@ module Math
     sqrt(value.to_big_f)
   end
 end
+
+# :nodoc:
+struct Crystal::Hasher
+  private HASH_MODULUS_RAT_P = BigRational.new((1_u64 << HASH_BITS) - 1)
+  private HASH_MODULUS_RAT_N = -BigRational.new((1_u64 << HASH_BITS) - 1)
+
+  def float(value : BigRational)
+    rem = value
+    if value >= HASH_MODULUS_RAT_P || value <= HASH_MODULUS_RAT_N
+      num = value.numerator
+      denom = value.denominator
+      div = num.tdiv(denom)
+      floor = div.tdiv(HASH_MODULUS)
+      rem -= floor * HASH_MODULUS
+    end
+    rem.to_big_f.hash
+  end
+end
diff --git a/src/crystal/hasher.cr b/src/crystal/hasher.cr
index f404c31cb136..3e9808970aa9 100644
--- a/src/crystal/hasher.cr
+++ b/src/crystal/hasher.cr
@@ -216,8 +216,8 @@ struct Crystal::Hasher
   end
 
   def float(value : Float)
-    frac, exp = Math.frexp value
     permute(float_normalize_wrap(value) do |value|
+      frac, exp = Math.frexp value
       float_normalize_reference(value, frac, exp)
     end)
   end

From 46b2eef06f132220452994991a4589e3ba5a33bc Mon Sep 17 00:00:00 2001
From: Akzhan Abdulin <akzhan.abdulin@gmail.com>
Date: Mon, 13 Nov 2017 20:02:10 +0300
Subject: [PATCH 3/6] follow @luislavena

---
 spec/std/crystal/hasher_spec.cr | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spec/std/crystal/hasher_spec.cr b/spec/std/crystal/hasher_spec.cr
index c8f1b25be257..fb4b5e568e7a 100644
--- a/spec/std/crystal/hasher_spec.cr
+++ b/spec/std/crystal/hasher_spec.cr
@@ -53,12 +53,12 @@ describe "Crystal::Hasher" do
     end
 
     it "Big i64 numbers should be hashed ok" do
-      Int64::MAX.hash.should eq (Int64::MAX.hash)
+      Int64::MAX.hash.should eq(Int64::MAX.hash)
     end
 
     pending "128bit types should be hashed ok" do
-      1.to_i128.hash.should eq (1_i8.hash)
-      1.to_u128.hash.should eq (1_u8.hash)
+      1.to_i128.hash.should eq(1_i8.hash)
+      1.to_u128.hash.should eq(1_u8.hash)
     end
 
     it "#float should change state and differ" do

From 252539c89fd7175f7085e6dcd86d4caf1335d4a5 Mon Sep 17 00:00:00 2001
From: Akzhan Abdulin <akzhan.abdulin@gmail.com>
Date: Sat, 18 Nov 2017 19:55:34 +0300
Subject: [PATCH 4/6] Followed @RX14

---
 src/crystal/hasher.cr | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/crystal/hasher.cr b/src/crystal/hasher.cr
index 3e9808970aa9..333b3bca9f93 100644
--- a/src/crystal/hasher.cr
+++ b/src/crystal/hasher.cr
@@ -177,7 +177,7 @@ struct Crystal::Hasher
   end
 
   def float(value : Float32)
-    permute(float_normalize_wrap(value) do |value|
+    norm_hash = float_normalize_wrap(value) do |value|
       # This optimized version works on every architecture where endianess
       # of Float32 and Int32 matches and float is IEEE754. All supported
       # architectures fall into this category.
@@ -192,11 +192,12 @@ struct Crystal::Hasher
         exp -= 22
       end
       {mantissa.to_i64, exp}
-    end)
+    end
+    permute(norm_hash)
   end
 
   def float(value : Float64)
-    permute(float_normalize_wrap(value) do |value|
+    norm_hash = float_normalize_wrap(value) do |value|
       # This optimized version works on every architecture where endianess
       # of Float64 and Int64 matches and float is IEEE754. All supported
       # architectures fall into this category.
@@ -212,14 +213,16 @@ struct Crystal::Hasher
       end
 
       {mantissa.to_i64, exp}
-    end)
+    end
+    permute(norm_hash)
   end
 
   def float(value : Float)
-    permute(float_normalize_wrap(value) do |value|
+    norm_hash = float_normalize_wrap(value) do |value|
       frac, exp = Math.frexp value
       float_normalize_reference(value, frac, exp)
-    end)
+    end
+    permute(norm_hash)
   end
 
   def char(value)

From c92aa1784a4224f8d5e1ac6f71cb1d3e25fdc52d Mon Sep 17 00:00:00 2001
From: Akzhan Abdulin <akzhan.abdulin@gmail.com>
Date: Sat, 18 Nov 2017 20:47:28 +0300
Subject: [PATCH 5/6] normalized_hash

---
 src/crystal/hasher.cr | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/crystal/hasher.cr b/src/crystal/hasher.cr
index 333b3bca9f93..92de7044b1ff 100644
--- a/src/crystal/hasher.cr
+++ b/src/crystal/hasher.cr
@@ -177,7 +177,7 @@ struct Crystal::Hasher
   end
 
   def float(value : Float32)
-    norm_hash = float_normalize_wrap(value) do |value|
+    normalized_hash = float_normalize_wrap(value) do |value|
       # This optimized version works on every architecture where endianess
       # of Float32 and Int32 matches and float is IEEE754. All supported
       # architectures fall into this category.
@@ -193,11 +193,11 @@ struct Crystal::Hasher
       end
       {mantissa.to_i64, exp}
     end
-    permute(norm_hash)
+    permute(normalized_hash)
   end
 
   def float(value : Float64)
-    norm_hash = float_normalize_wrap(value) do |value|
+    normalized_hash = float_normalize_wrap(value) do |value|
       # This optimized version works on every architecture where endianess
       # of Float64 and Int64 matches and float is IEEE754. All supported
       # architectures fall into this category.
@@ -214,15 +214,15 @@ struct Crystal::Hasher
 
       {mantissa.to_i64, exp}
     end
-    permute(norm_hash)
+    permute(normalized_hash)
   end
 
   def float(value : Float)
-    norm_hash = float_normalize_wrap(value) do |value|
+    normalized_hash = float_normalize_wrap(value) do |value|
       frac, exp = Math.frexp value
       float_normalize_reference(value, frac, exp)
     end
-    permute(norm_hash)
+    permute(normalized_hash)
   end
 
   def char(value)

From d6d295272ef6638ef4c865df5446282f053fafd9 Mon Sep 17 00:00:00 2001
From: Akzhan Abdulin <akzhan.abdulin@gmail.com>
Date: Sat, 18 Nov 2017 23:54:51 +0300
Subject: [PATCH 6/6] oops @Sija :)

---
 src/big/big_float.cr | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/big/big_float.cr b/src/big/big_float.cr
index a16017a05831..a428ff04e04a 100644
--- a/src/big/big_float.cr
+++ b/src/big/big_float.cr
@@ -297,7 +297,7 @@ end
 # :nodoc:
 struct Crystal::Hasher
   def float(value : BigFloat)
-    permute(float_normalize_wrap(value) do |value|
+    normalized_hash = float_normalize_wrap(value) do |value|
       # more exact version of `Math.frexp`
       LibGMP.mpf_get_d_2exp(out exp, value)
       frac = BigFloat.new do |mpf|
@@ -308,6 +308,7 @@ struct Crystal::Hasher
         end
       end
       float_normalize_reference(value, frac, exp)
-    end)
+    end
+    permute(normalized_hash)
   end
 end