change computation of hash value.

To protect against Hash DoS, change the way hash value is computed. Class|Struct should define method `def hash(hasher)` and call `hasher << @ivar` inside. As an option, for speed, and for backward compatibility, `def hash` still could be implemented. It will be used for Hash of matched type. `Thread#hash` and `Signal#hash` is implemented as unseeded cause they are used before `StdHasher @@seed` is initialized. But it is better to implement `def hash(hasher)`. StdHasher is default hasher that uses `hash(hasher)` and it is used as default seeded hasher. It also implements `unseeded` for `Enums`. Also, number normalization for hashing introduced, ie rule 'equality forces hash equality' is forced (`a == b` => `a.hash == b.hash`). Normalization idea is borrowed from Python implementation. (idea by Akzhan Abdulin @akzhan) Fixes crystal-lang#4578 Prerequisite for crystal-lang#4557 Replaces crystal-lang#4581
funny-falcon · Jul 5, 2017 · 3856136 · 3856136
1 parent d24f79c
commit 3856136
Show file tree

Hide file tree

Showing 43 changed files with 617 additions and 138 deletions.
diff --git a/spec/std/big/big_int_spec.cr b/spec/std/big/big_int_spec.cr
@@ -287,8 +287,7 @@ describe "BigInt" do
 
   it "#hash" do
     hash = 5.to_big_i.hash
-    hash.should eq(5)
-    typeof(hash).should eq(UInt64)
+    hash.should eq(5.hash)
   end
 
   it "clones" do

diff --git a/spec/std/bool_spec.cr b/spec/std/bool_spec.cr
@@ -28,8 +28,9 @@ describe "Bool" do
   end
 
   describe "hash" do
-    it { true.hash.should eq(1) }
-    it { false.hash.should eq(0) }
+    it { true.hash.should eq(true.hash) }
+    it { false.hash.should eq(false.hash) }
+    it { true.hash.should_not eq(false.hash) }
   end
 
   describe "to_s" do

diff --git a/spec/std/enum_spec.cr b/spec/std/enum_spec.cr
@@ -142,7 +142,7 @@ describe Enum do
   end
 
   it "has hash" do
-    SpecEnum::Two.hash.should eq(1.hash)
+    SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
   end
 
   it "parses" do

diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr
@@ -145,8 +145,8 @@ describe "Hash" do
       end
     end
 
-    it "works with mixed types" do
-      {1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
+    it "works with mixed types and normalized numbers" do
+      {1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
     end
   end
 

diff --git a/spec/std/struct_spec.cr b/spec/std/struct_spec.cr
@@ -42,11 +42,14 @@ describe "Struct" do
 
   it "does hash" do
     s = StructSpec::TestClass.new(1, "hello")
-    s.hash.should eq(31 + "hello".hash)
+    hasher = StdHasher.new
+    hasher << 1
+    hasher << "hello"
+    s.hash.should eq(hasher.digest)
   end
 
   it "does hash for struct wrapper (#1940)" do
-    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
+    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
   end
 
   it "does dup" do

diff --git a/spec/std/time/span_spec.cr b/spec/std/time/span_spec.cr
@@ -176,7 +176,7 @@ describe Time::Span do
   end
 
   it "test hash code" do
-    Time::Span.new(77).hash.should eq(77)
+    Time::Span.new(77).hash.should eq(77.hash)
   end
 
   it "test subtract" do

diff --git a/src/big/big_float.cr b/src/big/big_float.cr
@@ -17,6 +17,18 @@ struct BigFloat < Float
     LibGMP.mpf_init_set_str(out @mpf, str, 10)
   end
 
+  def initialize(num : BigInt)
+    # Probably should detect precision and use mpf_init2
+    LibGMP.mpf_init(out @mpf)
+    LibGMP.mpf_set_z(self, num)
+  end
+
+  def initialize(num : BigRational)
+    # Probably should detect precision and use mpf_init2
+    LibGMP.mpf_init(out @mpf)
+    LibGMP.mpf_set_q(self, num)
+  end
+
   def initialize(num : Number)
     LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
   end
@@ -35,8 +47,8 @@ struct BigFloat < Float
     new(mpf)
   end
 
-  def hash
-    to_f64.hash
+  def hash_normalize
+    remainder(HASH_MODULUS).to_f64.hash_normalize
   end
 
   def self.default_precision

diff --git a/src/big/big_int.cr b/src/big/big_int.cr
@@ -267,8 +267,16 @@ struct BigInt < Int
     to_s io
   end
 
-  def hash
-    to_u64
+  def hash_normalize
+    # remainder(HASH_MODULUS)
+    uv = LibGMP.tdiv_ui(self, HASH_MODULUS)
+    v =
+      {% if HASH_BITS == 31 %}
+        uv.to_i32
+      {% else %}
+        uv.to_i64
+      {% end %}
+    self < 0 ? -v : v
   end
 
   # Returns a string representation of self.

diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr
@@ -41,6 +41,22 @@ struct BigRational < Number
     initialize(num, 1)
   end
 
+  # Creates a exact representation of float as rational.
+  #
+  # It sures that `BigRational.new(f) == f`
+  # It relies on fact, that mantisa is at most 53 bits
+  def initialize(num : Float32 | Float64)
+    frac, exp = Math.frexp num
+    ifrac = (frac.to_f64 * (1.to_i64 << 53).to_f64).to_i64
+    exp -= 53
+    initialize ifrac, 1
+    if exp > 0
+      LibGMP.mpq_mul_2exp(out @mpq, self, exp)
+    elsif exp < 0
+      LibGMP.mpq_div_2exp(out @mpq, self, -exp)
+    end
+  end
+
   # :nodoc:
   def initialize(@mpq : LibGMP::MPQ)
   end
@@ -64,8 +80,12 @@ struct BigRational < Number
     LibGMP.mpq_cmp(mpq, other)
   end
 
+  def <=>(other : Float32 | Float64)
+    self <=> BigRational.new(other)
+  end
+
   def <=>(other : Float)
-    self.to_f <=> other
+    BigFloat.new(self) <=> BigFloat.new(other)
   end
 
   def <=>(other : Int)
@@ -139,8 +159,14 @@ struct BigRational < Number
     BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) }
   end
 
-  def hash
-    to_f64.hash
+  def hash_normalize
+    # self.remainder(HASH_MODULUS).to_f.hash_normalize
+    num = numerator
+    denom = denominator
+    div = num.tdiv(denom)
+    floor = div.tdiv(HASH_MODULUS)
+    rem = self - floor * HASH_MODULUS
+    rem.to_f.hash_normalize
   end
 
   # Returns the `Float64` representing this rational.

diff --git a/src/big/lib_gmp.cr b/src/big/lib_gmp.cr
@@ -63,6 +63,7 @@ lib LibGMP
 
   fun tdiv_r = __gmpz_tdiv_r(rop : MPZ*, op1 : MPZ*, op2 : MPZ*)
   fun tdiv_r_ui = __gmpz_tdiv_r_ui(rop : MPZ*, op1 : MPZ*, op2 : ULong)
+  fun tdiv_ui = __gmpz_tdiv_ui(op1 : MPZ*, op2 : ULong) : ULong
 
   fun neg = __gmpz_neg(rop : MPZ*, op : MPZ*)
   fun abs = __gmpz_abs(rop : MPZ*, op : MPZ*)
@@ -152,6 +153,8 @@ lib LibGMP
   fun mpf_get_str = __gmpf_get_str(str : UInt8*, expptr : MpExp*, base : Int, n_digits : LibC::SizeT, op : MPF*) : UInt8*
   fun mpf_get_d = __gmpf_get_d(op : MPF*) : Double
   fun mpf_set_d = __gmpf_set_d(op : MPF*, op : Double)
+  fun mpf_set_z = __gmpf_set_z(op : MPF*, op : MPZ*)
+  fun mpf_set_q = __gmpf_set_q(op : MPF*, op : MPQ*)
   fun mpf_get_si = __gmpf_get_si(op : MPF*) : Long
   fun mpf_get_ui = __gmpf_get_ui(op : MPF*) : ULong
   fun mpf_ceil = __gmpf_ceil(rop : MPF*, op : MPF*)

diff --git a/src/bool.cr b/src/bool.cr
@@ -41,9 +41,10 @@ struct Bool
     self != other
   end
 
-  # Returns a hash value for this boolean: 0 for `false`, 1 for `true`.
-  def hash
-    self ? 1 : 0
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher << (self ? 1 : 0)
+    hasher
   end
 
   # Returns `"true"` for `true` and `"false"` for `false`.

diff --git a/src/char.cr b/src/char.cr
@@ -419,6 +419,12 @@ struct Char
     ord
   end
 
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw ord
+    hasher
+  end
+
   # Returns a Char that is one codepoint bigger than this char's codepoint.
   #
   # ```

diff --git a/src/class.cr b/src/class.cr
@@ -3,8 +3,9 @@ class Class
     to_s(io)
   end
 
-  def hash
-    crystal_type_id
+  def hash(hasher)
+    hasher.raw(crystal_type_id)
+    hasher
   end
 
   def ==(other : Class)

diff --git a/src/compiler/crystal/syntax/ast.cr b/src/compiler/crystal/syntax/ast.cr
@@ -1175,8 +1175,9 @@ module Crystal
       self
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
@@ -1545,8 +1546,9 @@ module Crystal
       Self.new
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
@@ -2025,8 +2027,9 @@ module Crystal
       Underscore.new
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 

diff --git a/src/enum.cr b/src/enum.cr
@@ -274,9 +274,10 @@ struct Enum
     value == other.value
   end
 
-  # Returns a hash value. This is the hash of the underlying value.
-  def hash
-    value.hash
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw(value)
+    hasher
   end
 
   # Iterates each values in a Flags Enum.

diff --git a/src/event/signal_handler.cr b/src/event/signal_handler.cr
@@ -1,5 +1,6 @@
 require "c/signal"
 require "c/unistd"
+require "signal"
 
 # :nodoc:
 # Singleton that runs Signal events (libevent2) in it's own Fiber.

diff --git a/src/float.cr b/src/float.cr
@@ -1,6 +1,7 @@
 require "c/stdio"
 require "c/string"
 require "./float/printer"
+require "./number/hash_normalize"
 
 # Float is the base type of all floating point numbers.
 #
@@ -148,13 +149,33 @@ struct Float32
     Printer.print(self, io)
   end
 
-  def hash
-    unsafe_as(Int32)
-  end
-
   def clone
     self
   end
+
+  include Number::HashNormalize
+
+  def hash_normalize
+    float_normalize_wrap do
+      {% if flag?(:x86) || flag?(:x86_64) || flag(:arm) || flag(:aarch64) %}
+	# it should work on every architecture where endianess of Float32 and Int32
+	# matches and float is IEEE754.
+	unsafe_int = unsafe_as(Int32)
+	exp = (((unsafe_int >> 23) & 0xff) - 127)
+	mantisa = unsafe_int & ((1 << 23) - 1)
+	if exp > -127
+	  exp -= 23
+	  mantisa |= 1 << 23
+	else
+	  # subnormals
+	  exp -= 22
+	end
+	{mantisa, exp}
+      {% else %}
+	float_normalize_reference
+      {% end %}
+    end
+  end
 end
 
 struct Float64
@@ -206,11 +227,36 @@ struct Float64
     Printer.print(self, io)
   end
 
-  def hash
-    unsafe_as(Int64)
-  end
-
   def clone
     self
   end
+
+  include Number::HashNormalize
+
+  def hash_normalize
+    float_normalize_wrap do
+      {% if flag?(:x86) || flag?(:x86_64) || flag(:arm) || flag(:aarch64) %}
+	# it should work on every architecture where endianess of Float64 and Int64
+	# matches and float is IEEE754.
+	unsafe_int = unsafe_as(Int64)
+	exp = (((unsafe_int >> 52) & 0x7ff) - 1023)
+	mantisa = unsafe_int & ((1_u64 << 52) - 1)
+	if exp > -1023
+	  exp -= 52
+	  mantisa |= 1_u64 << 52
+	else
+	  # subnormals
+	  exp -= 51
+	end
+
+	{% if HASH_BITS == 31 %}
+	  mantisa %= HASH_MODULUS
+	{% end %}
+
+	{mantisa, exp}
+      {% else %}
+	float_normalize_reference
+      {% end %}
+    end
+  end
 end