From 38561367fec8f068ab8ad4271e7bc8011575e0e7 Mon Sep 17 00:00:00 2001
From: Sokolov Yura aka funny_falcon <funny.falcon@gmail.com>
Date: Sun, 25 Jun 2017 22:59:52 +0300
Subject: [PATCH] change computation of hash value.

To protect against Hash DoS, change the way hash value is computed.
Class|Struct should define method `def hash(hasher)` and call
`hasher << @ivar` inside.

As an option, for speed, and for backward compatibility, `def hash`
still could be implemented. It will be used for Hash of matched type.
`Thread#hash` and `Signal#hash` is implemented as unseeded cause they are
 used before `StdHasher @@seed` is initialized.

But it is better to implement `def hash(hasher)`.

StdHasher is default hasher that uses `hash(hasher)` and it is used as default
seeded hasher. It also implements `unseeded` for `Enums`.

Also, number normalization for hashing introduced, ie rule 'equality
forces hash equality' is forced (`a == b` => `a.hash == b.hash`).
Normalization idea is borrowed from Python implementation.
(idea by Akzhan Abdulin @akzhan)

Fixes #4578
Prerequisite for #4557
Replaces #4581
---
 spec/std/big/big_int_spec.cr       |   3 +-
 spec/std/bool_spec.cr              |   5 +-
 spec/std/enum_spec.cr              |   2 +-
 spec/std/hash_spec.cr              |   4 +-
 spec/std/struct_spec.cr            |   7 +-
 spec/std/time/span_spec.cr         |   2 +-
 src/big/big_float.cr               |  16 ++-
 src/big/big_int.cr                 |  12 +-
 src/big/big_rational.cr            |  32 ++++-
 src/big/lib_gmp.cr                 |   3 +
 src/bool.cr                        |   7 +-
 src/char.cr                        |   6 +
 src/class.cr                       |   5 +-
 src/compiler/crystal/syntax/ast.cr |  15 ++-
 src/enum.cr                        |   7 +-
 src/event/signal_handler.cr        |   1 +
 src/float.cr                       |  62 +++++++--
 src/hash.cr                        |  21 ++-
 src/http/headers.cr                |  13 +-
 src/indexable.cr                   |  12 +-
 src/int.cr                         |  44 ++++++-
 src/json/any.cr                    |   5 -
 src/named_tuple.cr                 |  14 +-
 src/nil.cr                         |   7 +-
 src/number.cr                      |  21 +++
 src/number/hash_normalize.cr       |  95 ++++++++++++++
 src/object.cr                      |  39 ++++--
 src/prelude.cr                     |   1 +
 src/proc.cr                        |   5 +-
 src/reference.cr                   |   7 +-
 src/set.cr                         |   4 -
 src/signal.cr                      |   7 +
 src/stdhasher.cr                   | 199 +++++++++++++++++++++++++++++
 src/string.cr                      |  13 +-
 src/struct.cr                      |   9 +-
 src/symbol.cr                      |   9 +-
 src/thread.cr                      |   6 +
 src/time.cr                        |   4 -
 src/tuple.cr                       |  11 +-
 src/xml/namespace.cr               |   5 +-
 src/xml/node.cr                    |   5 +-
 src/xml/node_set.cr                |   5 +-
 src/yaml/any.cr                    |   5 -
 43 files changed, 617 insertions(+), 138 deletions(-)
 create mode 100644 src/number/hash_normalize.cr
 create mode 100644 src/stdhasher.cr

diff --git a/spec/std/big/big_int_spec.cr b/spec/std/big/big_int_spec.cr
index b9ecfa13aa6f..7de5a4d8c8de 100644
--- a/spec/std/big/big_int_spec.cr
+++ b/spec/std/big/big_int_spec.cr
@@ -287,8 +287,7 @@ describe "BigInt" do
 
   it "#hash" do
     hash = 5.to_big_i.hash
-    hash.should eq(5)
-    typeof(hash).should eq(UInt64)
+    hash.should eq(5.hash)
   end
 
   it "clones" do
diff --git a/spec/std/bool_spec.cr b/spec/std/bool_spec.cr
index 960ed8653f33..fc5eb6e548c6 100644
--- a/spec/std/bool_spec.cr
+++ b/spec/std/bool_spec.cr
@@ -28,8 +28,9 @@ describe "Bool" do
   end
 
   describe "hash" do
-    it { true.hash.should eq(1) }
-    it { false.hash.should eq(0) }
+    it { true.hash.should eq(true.hash) }
+    it { false.hash.should eq(false.hash) }
+    it { true.hash.should_not eq(false.hash) }
   end
 
   describe "to_s" do
diff --git a/spec/std/enum_spec.cr b/spec/std/enum_spec.cr
index ff55cdd6e014..a3c17aaf6575 100644
--- a/spec/std/enum_spec.cr
+++ b/spec/std/enum_spec.cr
@@ -142,7 +142,7 @@ describe Enum do
   end
 
   it "has hash" do
-    SpecEnum::Two.hash.should eq(1.hash)
+    SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
   end
 
   it "parses" do
diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr
index 4097a4aefecb..328df1564b38 100644
--- a/spec/std/hash_spec.cr
+++ b/spec/std/hash_spec.cr
@@ -145,8 +145,8 @@ describe "Hash" do
       end
     end
 
-    it "works with mixed types" do
-      {1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
+    it "works with mixed types and normalized numbers" do
+      {1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
     end
   end
 
diff --git a/spec/std/struct_spec.cr b/spec/std/struct_spec.cr
index f266068d92e6..79a40c1de934 100644
--- a/spec/std/struct_spec.cr
+++ b/spec/std/struct_spec.cr
@@ -42,11 +42,14 @@ describe "Struct" do
 
   it "does hash" do
     s = StructSpec::TestClass.new(1, "hello")
-    s.hash.should eq(31 + "hello".hash)
+    hasher = StdHasher.new
+    hasher << 1
+    hasher << "hello"
+    s.hash.should eq(hasher.digest)
   end
 
   it "does hash for struct wrapper (#1940)" do
-    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
+    StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
   end
 
   it "does dup" do
diff --git a/spec/std/time/span_spec.cr b/spec/std/time/span_spec.cr
index 472472fa664f..1405cb63ffb5 100644
--- a/spec/std/time/span_spec.cr
+++ b/spec/std/time/span_spec.cr
@@ -176,7 +176,7 @@ describe Time::Span do
   end
 
   it "test hash code" do
-    Time::Span.new(77).hash.should eq(77)
+    Time::Span.new(77).hash.should eq(77.hash)
   end
 
   it "test subtract" do
diff --git a/src/big/big_float.cr b/src/big/big_float.cr
index e4d8f0e946de..d12d7bd4ca2d 100644
--- a/src/big/big_float.cr
+++ b/src/big/big_float.cr
@@ -17,6 +17,18 @@ struct BigFloat < Float
     LibGMP.mpf_init_set_str(out @mpf, str, 10)
   end
 
+  def initialize(num : BigInt)
+    # Probably should detect precision and use mpf_init2
+    LibGMP.mpf_init(out @mpf)
+    LibGMP.mpf_set_z(self, num)
+  end
+
+  def initialize(num : BigRational)
+    # Probably should detect precision and use mpf_init2
+    LibGMP.mpf_init(out @mpf)
+    LibGMP.mpf_set_q(self, num)
+  end
+
   def initialize(num : Number)
     LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
   end
@@ -35,8 +47,8 @@ struct BigFloat < Float
     new(mpf)
   end
 
-  def hash
-    to_f64.hash
+  def hash_normalize
+    remainder(HASH_MODULUS).to_f64.hash_normalize
   end
 
   def self.default_precision
diff --git a/src/big/big_int.cr b/src/big/big_int.cr
index 6f843c3be285..e3468d5e695b 100644
--- a/src/big/big_int.cr
+++ b/src/big/big_int.cr
@@ -267,8 +267,16 @@ struct BigInt < Int
     to_s io
   end
 
-  def hash
-    to_u64
+  def hash_normalize
+    # remainder(HASH_MODULUS)
+    uv = LibGMP.tdiv_ui(self, HASH_MODULUS)
+    v =
+      {% if HASH_BITS == 31 %}
+        uv.to_i32
+      {% else %}
+        uv.to_i64
+      {% end %}
+    self < 0 ? -v : v
   end
 
   # Returns a string representation of self.
diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr
index 0ded929ea985..61072bbf1158 100644
--- a/src/big/big_rational.cr
+++ b/src/big/big_rational.cr
@@ -41,6 +41,22 @@ struct BigRational < Number
     initialize(num, 1)
   end
 
+  # Creates a exact representation of float as rational.
+  #
+  # It sures that `BigRational.new(f) == f`
+  # It relies on fact, that mantisa is at most 53 bits
+  def initialize(num : Float32 | Float64)
+    frac, exp = Math.frexp num
+    ifrac = (frac.to_f64 * (1.to_i64 << 53).to_f64).to_i64
+    exp -= 53
+    initialize ifrac, 1
+    if exp > 0
+      LibGMP.mpq_mul_2exp(out @mpq, self, exp)
+    elsif exp < 0
+      LibGMP.mpq_div_2exp(out @mpq, self, -exp)
+    end
+  end
+
   # :nodoc:
   def initialize(@mpq : LibGMP::MPQ)
   end
@@ -64,8 +80,12 @@ struct BigRational < Number
     LibGMP.mpq_cmp(mpq, other)
   end
 
+  def <=>(other : Float32 | Float64)
+    self <=> BigRational.new(other)
+  end
+
   def <=>(other : Float)
-    self.to_f <=> other
+    BigFloat.new(self) <=> BigFloat.new(other)
   end
 
   def <=>(other : Int)
@@ -139,8 +159,14 @@ struct BigRational < Number
     BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) }
   end
 
-  def hash
-    to_f64.hash
+  def hash_normalize
+    # self.remainder(HASH_MODULUS).to_f.hash_normalize
+    num = numerator
+    denom = denominator
+    div = num.tdiv(denom)
+    floor = div.tdiv(HASH_MODULUS)
+    rem = self - floor * HASH_MODULUS
+    rem.to_f.hash_normalize
   end
 
   # Returns the `Float64` representing this rational.
diff --git a/src/big/lib_gmp.cr b/src/big/lib_gmp.cr
index 012eeee9750c..66645657d070 100644
--- a/src/big/lib_gmp.cr
+++ b/src/big/lib_gmp.cr
@@ -63,6 +63,7 @@ lib LibGMP
 
   fun tdiv_r = __gmpz_tdiv_r(rop : MPZ*, op1 : MPZ*, op2 : MPZ*)
   fun tdiv_r_ui = __gmpz_tdiv_r_ui(rop : MPZ*, op1 : MPZ*, op2 : ULong)
+  fun tdiv_ui = __gmpz_tdiv_ui(op1 : MPZ*, op2 : ULong) : ULong
 
   fun neg = __gmpz_neg(rop : MPZ*, op : MPZ*)
   fun abs = __gmpz_abs(rop : MPZ*, op : MPZ*)
@@ -152,6 +153,8 @@ lib LibGMP
   fun mpf_get_str = __gmpf_get_str(str : UInt8*, expptr : MpExp*, base : Int, n_digits : LibC::SizeT, op : MPF*) : UInt8*
   fun mpf_get_d = __gmpf_get_d(op : MPF*) : Double
   fun mpf_set_d = __gmpf_set_d(op : MPF*, op : Double)
+  fun mpf_set_z = __gmpf_set_z(op : MPF*, op : MPZ*)
+  fun mpf_set_q = __gmpf_set_q(op : MPF*, op : MPQ*)
   fun mpf_get_si = __gmpf_get_si(op : MPF*) : Long
   fun mpf_get_ui = __gmpf_get_ui(op : MPF*) : ULong
   fun mpf_ceil = __gmpf_ceil(rop : MPF*, op : MPF*)
diff --git a/src/bool.cr b/src/bool.cr
index 5e7f5f81ae56..898b79ac81d3 100644
--- a/src/bool.cr
+++ b/src/bool.cr
@@ -41,9 +41,10 @@ struct Bool
     self != other
   end
 
-  # Returns a hash value for this boolean: 0 for `false`, 1 for `true`.
-  def hash
-    self ? 1 : 0
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher << (self ? 1 : 0)
+    hasher
   end
 
   # Returns `"true"` for `true` and `"false"` for `false`.
diff --git a/src/char.cr b/src/char.cr
index 0fd31d1c97c2..3e5619a9abdd 100644
--- a/src/char.cr
+++ b/src/char.cr
@@ -419,6 +419,12 @@ struct Char
     ord
   end
 
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw ord
+    hasher
+  end
+
   # Returns a Char that is one codepoint bigger than this char's codepoint.
   #
   # ```
diff --git a/src/class.cr b/src/class.cr
index cfe17b1e900c..5634b64a0b3f 100644
--- a/src/class.cr
+++ b/src/class.cr
@@ -3,8 +3,9 @@ class Class
     to_s(io)
   end
 
-  def hash
-    crystal_type_id
+  def hash(hasher)
+    hasher.raw(crystal_type_id)
+    hasher
   end
 
   def ==(other : Class)
diff --git a/src/compiler/crystal/syntax/ast.cr b/src/compiler/crystal/syntax/ast.cr
index 76a5fce49342..911ea3120b39 100644
--- a/src/compiler/crystal/syntax/ast.cr
+++ b/src/compiler/crystal/syntax/ast.cr
@@ -1175,8 +1175,9 @@ module Crystal
       self
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
@@ -1545,8 +1546,9 @@ module Crystal
       Self.new
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
@@ -2025,8 +2027,9 @@ module Crystal
       Underscore.new
     end
 
-    def hash
-      0
+    def hash(hasher)
+      hasher << 0
+      hasher
     end
   end
 
diff --git a/src/enum.cr b/src/enum.cr
index e9758ba05d13..76deb2dcfafb 100644
--- a/src/enum.cr
+++ b/src/enum.cr
@@ -274,9 +274,10 @@ struct Enum
     value == other.value
   end
 
-  # Returns a hash value. This is the hash of the underlying value.
-  def hash
-    value.hash
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw(value)
+    hasher
   end
 
   # Iterates each values in a Flags Enum.
diff --git a/src/event/signal_handler.cr b/src/event/signal_handler.cr
index 857a2bea9701..de3caf5a2a0e 100644
--- a/src/event/signal_handler.cr
+++ b/src/event/signal_handler.cr
@@ -1,5 +1,6 @@
 require "c/signal"
 require "c/unistd"
+require "signal"
 
 # :nodoc:
 # Singleton that runs Signal events (libevent2) in it's own Fiber.
diff --git a/src/float.cr b/src/float.cr
index bf60ffc5b9e1..48af60a4cc49 100644
--- a/src/float.cr
+++ b/src/float.cr
@@ -1,6 +1,7 @@
 require "c/stdio"
 require "c/string"
 require "./float/printer"
+require "./number/hash_normalize"
 
 # Float is the base type of all floating point numbers.
 #
@@ -148,13 +149,33 @@ struct Float32
     Printer.print(self, io)
   end
 
-  def hash
-    unsafe_as(Int32)
-  end
-
   def clone
     self
   end
+
+  include Number::HashNormalize
+
+  def hash_normalize
+    float_normalize_wrap do
+      {% if flag?(:x86) || flag?(:x86_64) || flag(:arm) || flag(:aarch64) %}
+	# it should work on every architecture where endianess of Float32 and Int32
+	# matches and float is IEEE754.
+	unsafe_int = unsafe_as(Int32)
+	exp = (((unsafe_int >> 23) & 0xff) - 127)
+	mantisa = unsafe_int & ((1 << 23) - 1)
+	if exp > -127
+	  exp -= 23
+	  mantisa |= 1 << 23
+	else
+	  # subnormals
+	  exp -= 22
+	end
+	{mantisa, exp}
+      {% else %}
+	float_normalize_reference
+      {% end %}
+    end
+  end
 end
 
 struct Float64
@@ -206,11 +227,36 @@ struct Float64
     Printer.print(self, io)
   end
 
-  def hash
-    unsafe_as(Int64)
-  end
-
   def clone
     self
   end
+
+  include Number::HashNormalize
+
+  def hash_normalize
+    float_normalize_wrap do
+      {% if flag?(:x86) || flag?(:x86_64) || flag(:arm) || flag(:aarch64) %}
+	# it should work on every architecture where endianess of Float64 and Int64
+	# matches and float is IEEE754.
+	unsafe_int = unsafe_as(Int64)
+	exp = (((unsafe_int >> 52) & 0x7ff) - 1023)
+	mantisa = unsafe_int & ((1_u64 << 52) - 1)
+	if exp > -1023
+	  exp -= 52
+	  mantisa |= 1_u64 << 52
+	else
+	  # subnormals
+	  exp -= 51
+	end
+
+	{% if HASH_BITS == 31 %}
+	  mantisa %= HASH_MODULUS
+	{% end %}
+
+	{mantisa, exp}
+      {% else %}
+	float_normalize_reference
+      {% end %}
+    end
+  end
 end
diff --git a/src/hash.cr b/src/hash.cr
index 6d7fe7567b52..05899fec5c6c 100644
--- a/src/hash.cr
+++ b/src/hash.cr
@@ -710,14 +710,19 @@ class Hash(K, V)
   #
   # ```
   # foo = {"foo" => "bar"}
-  # foo.hash # => 3247054
+  # foo.hash # => 3247054 (not exactly)
   # ```
-  def hash
-    hash = size
+  def hash(hasher)
+    hasher.raw(size)
+    digest = hasher.digest
     each do |key, value|
-      hash += key.hash ^ value.hash
+      copy = hasher.clone
+      copy << key
+      copy << value
+      digest += copy.digest
     end
-    hash
+    hasher.raw(digest)
+    hasher
   end
 
   # Duplicates a `Hash`.
@@ -864,7 +869,11 @@ class Hash(K, V)
   end
 
   private def bucket_index(key)
-    key.hash.to_u32.remainder(@buckets_size).to_i
+    hash_key(key).to_u32.remainder(@buckets_size).to_i
+  end
+
+  protected def hash_key(key)
+    key.hash
   end
 
   private def calculate_new_size(size)
diff --git a/src/http/headers.cr b/src/http/headers.cr
index e47ff17765b0..f25a2371e2b4 100644
--- a/src/http/headers.cr
+++ b/src/http/headers.cr
@@ -9,13 +9,12 @@ struct HTTP::Headers
   record Key, name : String do
     forward_missing_to @name
 
-    def hash
-      h = 0
-      name.each_byte do |c|
-        c = normalize_byte(c)
-        h = 31 * h + c
+    def hash(hasher)
+      hasher.raw(bytesize.to_u32)
+      name.each_byte do |b|
+        hasher.raw normalize_byte(b)
       end
-      h
+      hasher
     end
 
     def ==(key2)
@@ -44,7 +43,7 @@ struct HTTP::Headers
 
       return byte if char.ascii_lowercase? || char == '-' # Optimize the common case
       return byte + 32 if char.ascii_uppercase?
-      return '-'.ord if char == '_'
+      return '-'.ord.to_u8 if char == '_'
 
       byte
     end
diff --git a/src/indexable.cr b/src/indexable.cr
index fa78b16f7b21..bc873c98497f 100644
--- a/src/indexable.cr
+++ b/src/indexable.cr
@@ -271,13 +271,13 @@ module Indexable(T)
     first { nil }
   end
 
-  # Returns a hash code based on `self`'s size and elements.
-  #
-  # See also: `Object#hash`.
-  def hash
-    reduce(31 * size) do |memo, elem|
-      31 * memo + elem.hash
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw(size.to_u32)
+    each do |elem|
+      hasher << elem
     end
+    hasher
   end
 
   # Returns the index of the first appearance of *value* in `self`
diff --git a/src/int.cr b/src/int.cr
index 3dfcdb3acd9a..a85baeb55f7c 100644
--- a/src/int.cr
+++ b/src/int.cr
@@ -316,10 +316,6 @@ struct Int
     !even?
   end
 
-  def hash
-    self
-  end
-
   def succ
     self + 1
   end
@@ -575,6 +571,10 @@ struct Int8
   def clone
     self
   end
+
+  def hash_normalize
+    self
+  end
 end
 
 struct Int16
@@ -597,6 +597,10 @@ struct Int16
   def clone
     self
   end
+
+  def hash_normalize
+    self
+  end
 end
 
 struct Int32
@@ -619,6 +623,14 @@ struct Int32
   def clone
     self
   end
+
+  def hash_normalize
+    {% if HASH_BITS == 31 %}
+      unsafe_mod(HASH_MODULUS)
+    {% else %}
+      self
+    {% end %}
+  end
 end
 
 struct Int64
@@ -641,6 +653,10 @@ struct Int64
   def clone
     self
   end
+
+  def hash_normalize
+    unsafe_mod(HASH_MODULUS)
+  end
 end
 
 struct UInt8
@@ -663,6 +679,10 @@ struct UInt8
   def clone
     self
   end
+
+  def hash_normalize
+    self
+  end
 end
 
 struct UInt16
@@ -685,6 +705,10 @@ struct UInt16
   def clone
     self
   end
+
+  def hash_normalize
+    self
+  end
 end
 
 struct UInt32
@@ -707,6 +731,14 @@ struct UInt32
   def clone
     self
   end
+
+  def hash_normalize
+    {% if HASH_BITS == 31 %}
+      unsafe_mod(HASH_MODULUS)
+    {% else %}
+      self
+    {% end %}
+  end
 end
 
 struct UInt64
@@ -729,4 +761,8 @@ struct UInt64
   def clone
     self
   end
+
+  def hash_normalize
+    unsafe_mod(HASH_MODULUS)
+  end
 end
diff --git a/src/json/any.cr b/src/json/any.cr
index fecfa508bd6d..21c7dbca2143 100644
--- a/src/json/any.cr
+++ b/src/json/any.cr
@@ -261,11 +261,6 @@ struct JSON::Any
     raw == other
   end
 
-  # :nodoc:
-  def hash
-    raw.hash
-  end
-
   # :nodoc:
   def to_json(json : JSON::Builder)
     raw.to_json(json)
diff --git a/src/named_tuple.cr b/src/named_tuple.cr
index cd7468b0a418..8f8d182df6ca 100644
--- a/src/named_tuple.cr
+++ b/src/named_tuple.cr
@@ -159,16 +159,14 @@ struct NamedTuple
     yield
   end
 
-  # Returns a hash value based on this name tuple's size, keys and values.
-  #
-  # See also: `Object#hash`.
-  def hash
-    hash = 31 * size
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw(size)
     {% for key in T.keys.sort %}
-      hash = 31 * hash + {{key.symbolize}}.hash
-      hash = 31 * hash + self[{{key.symbolize}}].hash
+      hasher << {{key.symbolize}}
+      hasher << self[{{key.symbolize}}]
     {% end %}
-    hash
+    hasher
   end
 
   # Same as `to_s`.
diff --git a/src/nil.cr b/src/nil.cr
index 644fcc98cb71..5948772b3a46 100644
--- a/src/nil.cr
+++ b/src/nil.cr
@@ -67,9 +67,10 @@ struct Nil
     false
   end
 
-  # Returns `0`.
-  def hash
-    0
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher << nil
+    hasher
   end
 
   # Returns an empty string.
diff --git a/src/number.cr b/src/number.cr
index cf32b3ae6b96..96605d494a8e 100644
--- a/src/number.cr
+++ b/src/number.cr
@@ -1,3 +1,5 @@
+require "./number/hash_normalize"
+
 # The top-level number type.
 struct Number
   include Comparable(Number)
@@ -255,6 +257,25 @@ struct Number
     self == 0
   end
 
+  include Number::HashNormalize
+
+  # Protocol method for generic hashing
+  # All number types should define `hash_normalize`, so equal number will
+  # produce equal normalized value.
+  # Integer numbers should calculate `self.remainder(HASH_MODULUS)`
+  # Float64 and Float32 version generalize it for numbers with fractional part.
+  # BigFloat and BigRational should calculate it as
+  # `(v.remainder HASH_MODULUS).to_f64.hash_normalize`
+  # See comments in "number/hash_normalize.cr"
+  def hash(hasher)
+    {% if HASH_BITS == 31 %}
+      hasher.raw hash_normalize.to_i32
+    {% else %}
+      hasher.raw hash_normalize.to_i64
+    {% end %}
+    hasher
+  end
+
   private class StepIterator(T, L, B)
     include Iterator(T)
 
diff --git a/src/number/hash_normalize.cr b/src/number/hash_normalize.cr
new file mode 100644
index 000000000000..8c5736736590
--- /dev/null
+++ b/src/number/hash_normalize.cr
@@ -0,0 +1,95 @@
+module Number::HashNormalize
+  # Idea by Akzhan Abdulin @akzhan
+  # Based on https://github.com/python/cpython/blob/f051e43/Python/pyhash.c#L34
+
+  private HASH_BITS = sizeof(LibC::ULong) == 32 ? 31 : 61
+  {% if HASH_BITS == 31 %}
+    private HASH_MODULUS  = (1 << HASH_BITS) - 1
+  {% else %}
+    private HASH_MODULUS  = (1_i64 << HASH_BITS) - 1
+  {% end %}
+
+  private HASH_NAN      =      0
+  private HASH_INFINITY = 314159
+
+  # Following is a copy from python's comment:
+  #
+  # For numeric types, the hash of a number x is based on the reduction
+  # of x modulo the Mersen Prime P = 2**HASH_BITS - 1.  It's designed
+  # so that hash(x) == hash(y) whenever x and y are numerically equal,
+  # even if x and y have different types.
+  # A quick summary of the hashing strategy:
+  # (1) First define the 'reduction of x modulo P' for any rational
+  # number x; this is a standard extension of the usual notion of
+  # reduction modulo P for integers.  If x == p/q (written in lowest
+  # terms), the reduction is interpreted as the reduction of p times
+  # the inverse of the reduction of q, all modulo P; if q is exactly
+  # divisible by P then define the reduction to be infinity.  So we've
+  # got a well-defined map
+  #   reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
+  # (2) Now for a rational number x, define hash(x) by:
+  #   reduce(x)   if x >= 0
+  #   -reduce(-x) if x < 0
+  # If the result of the reduction is infinity (this is impossible for
+  # integers, floats and Decimals) then use the predefined hash value
+  # HASH_INF for x >= 0, or -HASH_INF for x < 0, instead.
+  # HASH_INF, -HASH_INF and HASH_NAN are also used for the
+  # hashes of float and Decimal infinities and nans.
+  # A selling point for the above strategy is that it makes it possible
+  # to compute hashes of decimal and binary floating-point numbers
+  # efficiently, even if the exponent of the binary or decimal number
+  # is large.  The key point is that
+  #   reduce(x * y) == reduce(x) * reduce(y) (modulo HASH_MODULUS)
+  # provided that {reduce(x), reduce(y)} != {0, infinity}.  The reduction of a
+  # binary or decimal float is never infinity, since the denominator is a power
+  # of 2 (for binary) or a divisor of a power of 10 (for decimal).  So we have,
+  # for nonnegative x,
+  #   reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
+  #   reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
+  # and reduce(10**e) can be computed efficiently by the usual modular
+  # exponentiation algorithm.  For reduce(2**e) it's even better: since
+  # P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
+  # by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
+  #
+  private def float_normalize_wrap
+    return HASH_NAN if nan?
+    if infinite?
+      return self > 0 ? +HASH_INFINITY : -HASH_INFINITY
+    end
+
+    x, exp = yield
+
+    # adjust for the exponent;  first reduce it modulo HASH_BITS
+    exp = exp >= 0 ? exp % HASH_BITS : HASH_BITS - 1 - ((-1 - exp) % HASH_BITS)
+    x = ((x << exp) & HASH_MODULUS) | x >> (HASH_BITS - exp)
+
+    x * (self < 0 ? -1 : 1)
+  end
+
+  # This function if for reference implementation.
+  # Many architectures allows more effective bitwise calculation.
+  private def float_normalize_reference
+    frac, exp = Math.frexp self
+    if self < 0
+      frac = -frac
+    end
+    # process 28 bits at a time;  this should work well both for binary
+    # and hexadecimal floating point.
+    x =
+      {% if HASH_BITS == 31 %}
+	0u32
+      {% else %}
+	0u64
+      {% end %}
+    while frac > 0
+      x = ((x << 28) & HASH_MODULUS) | x >> (HASH_BITS - 28)
+      frac *= 268435456.0 # 2**28
+      exp -= 28
+      y = frac.to_u32 # pull out integer part
+      frac -= y
+      x += y
+      x -= HASH_MODULUS if x >= HASH_MODULUS
+    end
+    {x, exp}
+  end
+end
diff --git a/src/object.cr b/src/object.cr
index 1f66f1837494..2f59f38fa236 100644
--- a/src/object.cr
+++ b/src/object.cr
@@ -64,7 +64,29 @@ class Object
   #
   # The hash value is used along with `==` by the `Hash` class to determine if two objects
   # reference the same hash key.
-  abstract def hash
+  def hash
+    StdHasher.hashit self
+  end
+
+  # Protocol method for generic hashing.
+  #
+  # You should use `hasher << @v` for mixing values. It will recursively call
+  # `hash(hasher)` on values. `hash(hasher)` on numbers defined to generate
+  # same hash value for equal number of different types. For performance sake
+  # use `hasher.raw @v` if you want mix integer as abstract value and not as a
+  # number.
+  #
+  # Cause hasher could be a struct, `hash(hasher)` have to return hasher.
+  # Also, `hasher.<<` method is not chainable, unlike other `<<` methods.
+  #
+  #    def hash(hasher)
+  #      hasher.raw @size
+  #      each do |elem|
+  #        hasher << elem
+  #      end
+  #      hasher
+  #    end
+  abstract def hash(hasher)
 
   # Returns a string representation of this object.
   #
@@ -1078,7 +1100,7 @@ class Object
     {% end %}
   end
 
-  # Defines a `hash` method computed from the given fields.
+  # Defines a `hash(hasher)` method computed from the given fields.
   #
   # ```
   # class Person
@@ -1090,16 +1112,11 @@ class Object
   # end
   # ```
   macro def_hash(*fields)
-    def hash
-      {% if fields.size == 1 %}
-        {{fields[0]}}.hash
-      {% else %}
-        hash = 0
-        {% for field in fields %}
-          hash = 31 * hash + {{field}}.hash
-        {% end %}
-        hash
+    def hash(hasher)
+      {% for field in fields %}
+        hasher << {{field}}
       {% end %}
+      hasher
     end
   end
 
diff --git a/src/prelude.cr b/src/prelude.cr
index 4aa5e5954b57..a22293b5adc1 100644
--- a/src/prelude.cr
+++ b/src/prelude.cr
@@ -17,6 +17,7 @@ require "iterable"
 require "iterator"
 require "indexable"
 require "string"
+require "stdhasher"
 
 # Alpha-sorted list
 require "array"
diff --git a/src/proc.cr b/src/proc.cr
index df48e26f4429..1bf225b6bdec 100644
--- a/src/proc.cr
+++ b/src/proc.cr
@@ -181,8 +181,9 @@ struct Proc
     call(other)
   end
 
-  def hash
-    internal_representation.hash
+  def hash(hasher)
+    hasher << internal_representation
+    hasher
   end
 
   def clone
diff --git a/src/reference.cr b/src/reference.cr
index 94034e03c3bf..e360bf8179fb 100644
--- a/src/reference.cr
+++ b/src/reference.cr
@@ -50,9 +50,10 @@ class Reference
     {% end %}
   end
 
-  # Returns this reference's `object_id` as the hash value.
-  def hash
-    object_id
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw object_id
+    hasher
   end
 
   def inspect(io : IO) : Nil
diff --git a/src/set.cr b/src/set.cr
index 991196e0867e..48750009fc76 100644
--- a/src/set.cr
+++ b/src/set.cr
@@ -308,10 +308,6 @@ struct Set(T)
     pp.list("Set{", self, "}")
   end
 
-  def hash
-    @hash.hash
-  end
-
   # Returns `true` if the set and the given set have at least one element in
   # common.
   #
diff --git a/src/signal.cr b/src/signal.cr
index 4fd0c51a0473..7bcb845b14f6 100644
--- a/src/signal.cr
+++ b/src/signal.cr
@@ -116,6 +116,13 @@ enum Signal
     Signal::PIPE.ignore
     Signal::CHLD.reset
   end
+
+  # There is no much of signals, so don't bother with hashing.
+  # And we couldn't use seeded hash, because seed is not filled yet.
+  # :nodoc:
+  def hash
+    value
+  end
 end
 
 # :nodoc:
diff --git a/src/stdhasher.cr b/src/stdhasher.cr
new file mode 100644
index 000000000000..419602028ec5
--- /dev/null
+++ b/src/stdhasher.cr
@@ -0,0 +1,199 @@
+require "crystal/system/random"
+
+# Hasher usable for `def hash(hasher)` should satisfy protocol:
+#   class MyHasher
+#     # Value should implement commutative `+` for `Hash#hash(hasher)`
+#     alias Value
+#
+#     # must be implemented to mix sizes of collections, and pointers (object_id)
+#     def raw(v : Int::Primitive)
+#       # mutate
+#       nil
+#     end
+#
+#     # must be implemented for Hash#hash
+#     def raw(v : Value)
+#       # mutate
+#       nil
+#     end
+#
+#     def <<(b : Bytes)
+#       # mutate
+#       nil
+#     end
+#
+#     def <<(n : Nil)
+#       # mutate
+#       nil
+#     end
+#
+#     def <<(v)
+#       # v.hash will return hasher
+#       # if hasher is a struct, then it will be copy
+#       copy_from v.hash(self)
+#       nil
+#     end
+#
+#     # digest returns hashsum for current state without state mutation
+#     def digest : Value
+#     end
+#
+#     # should be implemented for `Hash#hash(hasher)`
+#     def clone
+#       copy_of_current_state
+#     end
+#   end
+
+# StdHasher used as standard hasher in `Object#hash`
+# It have to provide defenense against HashDos, and be reasonably fast.
+# To protect against HashDos, it is seeded with secure random, and have
+# permutation that hard to forge without knowing seed and seeing hash digest.
+#
+# Also it has specialized methods for primitive keys with different seeds.
+struct StdHasher
+  alias Value = UInt32
+
+  @@seed = StaticArray(UInt32, 4).new { |i| 0_u32 }
+  buf = pointerof(@@seed).as(Pointer(UInt8))
+  Crystal::System::Random.random_bytes(buf.to_slice(sizeof(typeof(@@seed))))
+
+  protected getter a : UInt32 = 0_u32
+  protected getter b : UInt32 = 0_u32
+
+  def initialize
+    @a, @b = @@seed[0], @@seed[1]
+  end
+
+  def initialize(@a : UInt32, @b : UInt32)
+  end
+
+  def self.hashit(v)
+    s = new(@@seed[0], @@seed[1])
+    s << v
+    s.digest
+  end
+
+  def clone
+    self.class.new(@a, @b)
+  end
+
+  def <<(v : Nil)
+    permute_nil(@@seed[2])
+    nil
+  end
+
+  # mix raw value without number normalizing
+  def raw(v : Int8 | UInt8)
+    permute(v.to_u8, @@seed[2])
+    nil
+  end
+
+  # mix raw value without number normalizing
+  def raw(v : Int16 | Int32 | UInt16 | UInt32)
+    permute(v.to_u32, @@seed[2])
+    nil
+  end
+
+  # mix raw value without number normalizing
+  def raw(v : Int64 | UInt64)
+    high = (v >> 32).to_u32
+    # This condition here cause of some 32bit issue in LLVM binding,
+    # so compiler_spec doesn't pass without it.
+    # Fill free to comment and debug.
+    if high != 0_u32
+      permute(high, @@seed[2])
+    end
+    permute(v.to_u32, @@seed[2])
+    nil
+  end
+
+  def <<(b : Bytes)
+    permute(b, @@seed[2])
+    nil
+  end
+
+  def <<(v)
+    cp = v.hash(self)
+    @a, @b = cp.a, cp.b
+    nil
+  end
+
+  def digest
+    a, b = @a, @b
+    b += @@seed[3]
+    a ^= a >> 15
+    b ^= b >> 16
+    a *= 0xb8b34b2d_u32
+    b *= 0x52c6a2d9_u32
+    a ^= a >> 17
+    b ^= b >> 16
+    b + a
+  end
+
+  protected def permute_nil(s : UInt32)
+    @a += s | 1
+    # LFSR
+    mx = (@b.to_i32 >> 31).to_u32 & 0xa8888eef_u32
+    @b = (@b << 1) ^ mx
+  end
+
+  protected def permute(v : UInt8, s : UInt32)
+    @a += v.to_u32 ^ s
+    @a *= 9
+    @b += @a
+    @b = (@b << 7) | (@b >> 25)
+    @b *= 5
+  end
+
+  protected def permute(v : UInt32, s : UInt32)
+    permute_u32(v, s, @a, @b)
+  end
+
+  @[NoInline]
+  protected def permute(buf : Bytes, s : UInt32)
+    bsz = buf.size
+    v = bsz.to_u32 << 24
+    u = buf.to_unsafe
+    a, b = @a, @b
+    bsz.unsafe_div(4).downto(1) do
+      cv = u.as(Pointer(UInt32)).value
+      permute_u32(cv, s, a, b)
+      u += 4
+    end
+    r = (bsz & 3).to_u32
+    if r != 0
+      v |= u[0].to_u32 | (u[r/2].to_u32 << 8) | (u[r - 1].to_u32 << 16)
+    end
+    permute_u32(v, s, a, b)
+    @a, @b = a, b
+    self
+  end
+
+  private macro permute_u32(v, s, a, b)
+    {{v}} ^= {{s}}
+    {{v}} *= 0xb8b34b2d_u32
+    {{a}} += {{v}}
+    {{a}} = ({{a}}.unsafe_shl(13)) | ({{a}}.unsafe_shr(19))
+    {{b}} ^= {{a}} + {{s}}
+    {{b}} *= {{9}}
+  end
+
+  # unseeded is used for types that are used in early startup
+  def self.unseeded(v : Int8 | Int16 | UInt8 | UInt16 | Int32 | UInt32)
+    h = v.to_u32
+    h ^= h >> 16
+    h *= 0x52c6a2d9_u32
+    h ^ (h >> 16)
+  end
+
+  # unseeded is used for types that are used in early startup
+  def self.unseeded(v : Int64 | UInt64)
+    h = (v >> 32).to_u32
+    h ^= h >> 16
+    h *= 0xb8b34b2d_u32
+    h += v.to_u32
+    h ^= h >> 16
+    h *= 0x52c6a2d9_u32
+    h ^ (h >> 16)
+  end
+end
diff --git a/src/string.cr b/src/string.cr
index 43a0449fec6d..6d5c3329f487 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -3899,15 +3899,10 @@ class String
     sprintf self, other
   end
 
-  # Returns a hash based on this string’s size and content.
-  #
-  # See also: `Object#hash`.
-  def hash
-    h = 0
-    each_byte do |c|
-      h = 31 * h + c
-    end
-    h
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher << to_slice
+    hasher
   end
 
   # Returns the number of unicode codepoints in this string.
diff --git a/src/struct.cr b/src/struct.cr
index 57bda79ad9eb..c730332df280 100644
--- a/src/struct.cr
+++ b/src/struct.cr
@@ -73,12 +73,13 @@ struct Struct
   # Returns a hash value based on this struct's instance variables hash values.
   #
   # See also: `Object#hash`
-  def hash : Int32
-    hash = 0
+
+  # Protocol method for generic hashing.
+  def hash(hasher)
     {% for ivar in @type.instance_vars %}
-      hash = 31 * hash + @{{ivar.id}}.hash.to_i32
+      hasher << @{{ivar.id}}
     {% end %}
-    hash
+    hasher
   end
 
   # Appends this struct's name and instance variables names and values
diff --git a/src/symbol.cr b/src/symbol.cr
index 8e90a7207c68..5e46c3eaf05c 100644
--- a/src/symbol.cr
+++ b/src/symbol.cr
@@ -15,11 +15,10 @@
 struct Symbol
   include Comparable(Symbol)
 
-  # Generates an `Int32` hash value for this symbol.
-  #
-  # See also: `Object#hash`.
-  def hash : Int32
-    to_i
+  # Protocol method for generic hashing.
+  def hash(hasher)
+    hasher.raw to_i
+    hasher
   end
 
   # Compares symbol with other based on `String#<=>` method. Returns `-1`, `0`
diff --git a/src/thread.cr b/src/thread.cr
index 8102a033dbe0..223a096099a2 100644
--- a/src/thread.cr
+++ b/src/thread.cr
@@ -48,6 +48,12 @@ class Thread
     end
   end
 
+  # override, cause StdHasher's seed is not initialized yet
+  # :nodoc:
+  def hash
+    StdHasher.unseeded object_id
+  end
+
   # All threads, so the GC can see them (GC doesn't scan thread locals)
   # and we can find the current thread on platforms that don't support
   # thread local storage (eg: OpenBSD)
diff --git a/src/time.cr b/src/time.cr
index 9d2c270f1148..9cc76aad0577 100644
--- a/src/time.cr
+++ b/src/time.cr
@@ -309,10 +309,6 @@ struct Time
     end
   end
 
-  def hash
-    @encoded
-  end
-
   def self.days_in_month(year, month) : Int32
     unless 1 <= month <= 12
       raise ArgumentError.new "Invalid month"
diff --git a/src/tuple.cr b/src/tuple.cr
index f0f4149fc5af..9bdc250630b9 100644
--- a/src/tuple.cr
+++ b/src/tuple.cr
@@ -306,15 +306,12 @@ struct Tuple
     size <=> other.size
   end
 
-  # Returns a hash value based on this tuple's length and contents.
-  #
-  # See also: `Object#hash`.
-  def hash
-    hash = 31 * size
+  # Protocol method for generic hashing.
+  def hash(hasher)
     {% for i in 0...T.size %}
-      hash = 31 * hash + self[{{i}}].hash
+      hasher << self[{{i}}]
     {% end %}
-    hash
+    hasher
   end
 
   # Returns a tuple containing cloned elements of this tuple using the `clone` method.
diff --git a/src/xml/namespace.cr b/src/xml/namespace.cr
index 43fa3d2bad69..9336930bfb58 100644
--- a/src/xml/namespace.cr
+++ b/src/xml/namespace.cr
@@ -4,8 +4,9 @@ struct XML::Namespace
   def initialize(@document : Node, @ns : LibXML::NS*)
   end
 
-  def hash
-    object_id
+  def hash(hasher)
+    hasher.raw object_id
+    hasher
   end
 
   def href
diff --git a/src/xml/node.cr b/src/xml/node.cr
index 3c23a238036f..f71064d4205e 100644
--- a/src/xml/node.cr
+++ b/src/xml/node.cr
@@ -160,8 +160,9 @@ struct XML::Node
   end
 
   # Returns this node's `#object_id` as the hash value.
-  def hash
-    object_id
+  def hash(hasher)
+    hasher.raw object_id
+    hasher
   end
 
   # Returns the content for this Node.
diff --git a/src/xml/node_set.cr b/src/xml/node_set.cr
index 00c8c759f039..215684278065 100644
--- a/src/xml/node_set.cr
+++ b/src/xml/node_set.cr
@@ -28,8 +28,9 @@ struct XML::NodeSet
     size == 0
   end
 
-  def hash
-    object_id
+  def hash(hasher)
+    hasher.raw object_id
+    hasher
   end
 
   def inspect(io)
diff --git a/src/yaml/any.cr b/src/yaml/any.cr
index 1d2956c41261..efb5160a76f4 100644
--- a/src/yaml/any.cr
+++ b/src/yaml/any.cr
@@ -194,11 +194,6 @@ struct YAML::Any
     raw == other
   end
 
-  # :nodoc:
-  def hash
-    raw.hash
-  end
-
   # :nodoc:
   def to_yaml(io)
     raw.to_yaml(io)