Skip to content

Commit

Permalink
change computation of hash value.
Browse files Browse the repository at this point in the history
To protect against Hash DoS, change the way hash value is computed.
Class|Struct should define method `def hash(hasher)` and call
`hasher << @ivar` inside.

As an option, for speed, and for backward compatibility, `def hash`
still could be implemented. It will be used for Hash of matched type.
`Thread#hash` and `Signal#hash` is implemented as unseeded cause they are
 used before `StdHasher @@seed` is initialized.

But it is better to implement `def hash(hasher)`.

StdHasher is default hasher that uses `hash(hasher)` and it is used as default
seeded hasher. It also implements `unseeded` for `Enums`.

Also, number normalization for hashing introduced, ie rule 'equality
forces hash equality' is forced (`a == b` => `a.hash == b.hash`).
Normalization idea is borrowed from Python implementation.
(idea by Akzhan Abdulin @akzhan)

Fixes crystal-lang#4578
Prerequisite for crystal-lang#4557
Replaces crystal-lang#4581
  • Loading branch information
funny-falcon committed Jul 5, 2017
1 parent d24f79c commit 3856136
Show file tree
Hide file tree
Showing 43 changed files with 617 additions and 138 deletions.
3 changes: 1 addition & 2 deletions spec/std/big/big_int_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,7 @@ describe "BigInt" do

it "#hash" do
hash = 5.to_big_i.hash
hash.should eq(5)
typeof(hash).should eq(UInt64)
hash.should eq(5.hash)
end

it "clones" do
Expand Down
5 changes: 3 additions & 2 deletions spec/std/bool_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ describe "Bool" do
end

describe "hash" do
it { true.hash.should eq(1) }
it { false.hash.should eq(0) }
it { true.hash.should eq(true.hash) }
it { false.hash.should eq(false.hash) }
it { true.hash.should_not eq(false.hash) }
end

describe "to_s" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/enum_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ describe Enum do
end

it "has hash" do
SpecEnum::Two.hash.should eq(1.hash)
SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
end

it "parses" do
Expand Down
4 changes: 2 additions & 2 deletions spec/std/hash_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ describe "Hash" do
end
end

it "works with mixed types" do
{1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
it "works with mixed types and normalized numbers" do
{1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
end
end

Expand Down
7 changes: 5 additions & 2 deletions spec/std/struct_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ describe "Struct" do

it "does hash" do
s = StructSpec::TestClass.new(1, "hello")
s.hash.should eq(31 + "hello".hash)
hasher = StdHasher.new
hasher << 1
hasher << "hello"
s.hash.should eq(hasher.digest)
end

it "does hash for struct wrapper (#1940)" do
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
end

it "does dup" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/time/span_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ describe Time::Span do
end

it "test hash code" do
Time::Span.new(77).hash.should eq(77)
Time::Span.new(77).hash.should eq(77.hash)
end

it "test subtract" do
Expand Down
16 changes: 14 additions & 2 deletions src/big/big_float.cr
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ struct BigFloat < Float
LibGMP.mpf_init_set_str(out @mpf, str, 10)
end

def initialize(num : BigInt)
# Probably should detect precision and use mpf_init2
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set_z(self, num)
end

def initialize(num : BigRational)
# Probably should detect precision and use mpf_init2
LibGMP.mpf_init(out @mpf)
LibGMP.mpf_set_q(self, num)
end

def initialize(num : Number)
LibGMP.mpf_init_set_d(out @mpf, num.to_f64)
end
Expand All @@ -35,8 +47,8 @@ struct BigFloat < Float
new(mpf)
end

def hash
to_f64.hash
def hash_normalize
remainder(HASH_MODULUS).to_f64.hash_normalize
end

def self.default_precision
Expand Down
12 changes: 10 additions & 2 deletions src/big/big_int.cr
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,16 @@ struct BigInt < Int
to_s io
end

def hash
to_u64
def hash_normalize
# remainder(HASH_MODULUS)
uv = LibGMP.tdiv_ui(self, HASH_MODULUS)
v =
{% if HASH_BITS == 31 %}
uv.to_i32
{% else %}
uv.to_i64
{% end %}
self < 0 ? -v : v
end

# Returns a string representation of self.
Expand Down
32 changes: 29 additions & 3 deletions src/big/big_rational.cr
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ struct BigRational < Number
initialize(num, 1)
end

# Creates a exact representation of float as rational.
#
# It sures that `BigRational.new(f) == f`
# It relies on fact, that mantisa is at most 53 bits
def initialize(num : Float32 | Float64)
frac, exp = Math.frexp num
ifrac = (frac.to_f64 * (1.to_i64 << 53).to_f64).to_i64
exp -= 53
initialize ifrac, 1
if exp > 0
LibGMP.mpq_mul_2exp(out @mpq, self, exp)
elsif exp < 0
LibGMP.mpq_div_2exp(out @mpq, self, -exp)
end
end

# :nodoc:
def initialize(@mpq : LibGMP::MPQ)
end
Expand All @@ -64,8 +80,12 @@ struct BigRational < Number
LibGMP.mpq_cmp(mpq, other)
end

def <=>(other : Float32 | Float64)
self <=> BigRational.new(other)
end

def <=>(other : Float)
self.to_f <=> other
BigFloat.new(self) <=> BigFloat.new(other)
end

def <=>(other : Int)
Expand Down Expand Up @@ -139,8 +159,14 @@ struct BigRational < Number
BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) }
end

def hash
to_f64.hash
def hash_normalize
# self.remainder(HASH_MODULUS).to_f.hash_normalize
num = numerator
denom = denominator
div = num.tdiv(denom)
floor = div.tdiv(HASH_MODULUS)
rem = self - floor * HASH_MODULUS
rem.to_f.hash_normalize
end

# Returns the `Float64` representing this rational.
Expand Down
3 changes: 3 additions & 0 deletions src/big/lib_gmp.cr
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ lib LibGMP

fun tdiv_r = __gmpz_tdiv_r(rop : MPZ*, op1 : MPZ*, op2 : MPZ*)
fun tdiv_r_ui = __gmpz_tdiv_r_ui(rop : MPZ*, op1 : MPZ*, op2 : ULong)
fun tdiv_ui = __gmpz_tdiv_ui(op1 : MPZ*, op2 : ULong) : ULong

fun neg = __gmpz_neg(rop : MPZ*, op : MPZ*)
fun abs = __gmpz_abs(rop : MPZ*, op : MPZ*)
Expand Down Expand Up @@ -152,6 +153,8 @@ lib LibGMP
fun mpf_get_str = __gmpf_get_str(str : UInt8*, expptr : MpExp*, base : Int, n_digits : LibC::SizeT, op : MPF*) : UInt8*
fun mpf_get_d = __gmpf_get_d(op : MPF*) : Double
fun mpf_set_d = __gmpf_set_d(op : MPF*, op : Double)
fun mpf_set_z = __gmpf_set_z(op : MPF*, op : MPZ*)
fun mpf_set_q = __gmpf_set_q(op : MPF*, op : MPQ*)
fun mpf_get_si = __gmpf_get_si(op : MPF*) : Long
fun mpf_get_ui = __gmpf_get_ui(op : MPF*) : ULong
fun mpf_ceil = __gmpf_ceil(rop : MPF*, op : MPF*)
Expand Down
7 changes: 4 additions & 3 deletions src/bool.cr
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ struct Bool
self != other
end

# Returns a hash value for this boolean: 0 for `false`, 1 for `true`.
def hash
self ? 1 : 0
# Protocol method for generic hashing.
def hash(hasher)
hasher << (self ? 1 : 0)
hasher
end

# Returns `"true"` for `true` and `"false"` for `false`.
Expand Down
6 changes: 6 additions & 0 deletions src/char.cr
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ struct Char
ord
end

# Protocol method for generic hashing.
def hash(hasher)
hasher.raw ord
hasher
end

# Returns a Char that is one codepoint bigger than this char's codepoint.
#
# ```
Expand Down
5 changes: 3 additions & 2 deletions src/class.cr
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ class Class
to_s(io)
end

def hash
crystal_type_id
def hash(hasher)
hasher.raw(crystal_type_id)
hasher
end

def ==(other : Class)
Expand Down
15 changes: 9 additions & 6 deletions src/compiler/crystal/syntax/ast.cr
Original file line number Diff line number Diff line change
Expand Up @@ -1175,8 +1175,9 @@ module Crystal
self
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down Expand Up @@ -1545,8 +1546,9 @@ module Crystal
Self.new
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down Expand Up @@ -2025,8 +2027,9 @@ module Crystal
Underscore.new
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down
7 changes: 4 additions & 3 deletions src/enum.cr
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,10 @@ struct Enum
value == other.value
end

# Returns a hash value. This is the hash of the underlying value.
def hash
value.hash
# Protocol method for generic hashing.
def hash(hasher)
hasher.raw(value)
hasher
end

# Iterates each values in a Flags Enum.
Expand Down
1 change: 1 addition & 0 deletions src/event/signal_handler.cr
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
require "c/signal"
require "c/unistd"
require "signal"

# :nodoc:
# Singleton that runs Signal events (libevent2) in it's own Fiber.
Expand Down
62 changes: 54 additions & 8 deletions src/float.cr
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require "c/stdio"
require "c/string"
require "./float/printer"
require "./number/hash_normalize"

# Float is the base type of all floating point numbers.
#
Expand Down Expand Up @@ -148,13 +149,33 @@ struct Float32
Printer.print(self, io)
end

def hash
unsafe_as(Int32)
end

def clone
self
end

include Number::HashNormalize

def hash_normalize
float_normalize_wrap do
{% if flag?(:x86) || flag?(:x86_64) || flag(:arm) || flag(:aarch64) %}
# it should work on every architecture where endianess of Float32 and Int32
# matches and float is IEEE754.
unsafe_int = unsafe_as(Int32)
exp = (((unsafe_int >> 23) & 0xff) - 127)
mantisa = unsafe_int & ((1 << 23) - 1)
if exp > -127
exp -= 23
mantisa |= 1 << 23
else
# subnormals
exp -= 22
end
{mantisa, exp}
{% else %}
float_normalize_reference
{% end %}
end
end
end

struct Float64
Expand Down Expand Up @@ -206,11 +227,36 @@ struct Float64
Printer.print(self, io)
end

def hash
unsafe_as(Int64)
end

def clone
self
end

include Number::HashNormalize

def hash_normalize
float_normalize_wrap do
{% if flag?(:x86) || flag?(:x86_64) || flag(:arm) || flag(:aarch64) %}
# it should work on every architecture where endianess of Float64 and Int64
# matches and float is IEEE754.
unsafe_int = unsafe_as(Int64)
exp = (((unsafe_int >> 52) & 0x7ff) - 1023)
mantisa = unsafe_int & ((1_u64 << 52) - 1)
if exp > -1023
exp -= 52
mantisa |= 1_u64 << 52
else
# subnormals
exp -= 51
end

{% if HASH_BITS == 31 %}
mantisa %= HASH_MODULUS
{% end %}

{mantisa, exp}
{% else %}
float_normalize_reference
{% end %}
end
end
end
Loading

0 comments on commit 3856136

Please sign in to comment.