diff --git a/src/Distances.jl b/src/Distances.jl index b447ee5..c8d5268 100644 --- a/src/Distances.jl +++ b/src/Distances.jl @@ -97,6 +97,15 @@ export rmsd, nrmsd +if VERSION < v"1.2-" + import Base: has_offset_axes + require_one_based_indexing(A...) = + !has_offset_axes(A...) || + throw(ArgumentError("offset arrays are not supported but got an array with index other than 1")) +else + import Base: require_one_based_indexing +end + include("common.jl") include("generic.jl") include("metrics.jl") diff --git a/src/bhattacharyya.jl b/src/bhattacharyya.jl index 60a7ae1..f46e1aa 100644 --- a/src/bhattacharyya.jl +++ b/src/bhattacharyya.jl @@ -9,39 +9,54 @@ struct HellingerDist <: Metric end # Bhattacharyya coefficient -function bhattacharyya_coeff(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} - if length(a) != length(b) - throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) +function bhattacharyya_coeff(a, b) + n = length(a) + if n != length(b) + throw(DimensionMismatch("first argument has length $n which does not match the length of the second, $(length(b)).")) end + sqab, asum, bsum = _bhattacharyya_coeff(a, b) + # We must normalize since we cannot assume that the vectors are normalized to probability vectors. + return sqab / sqrt(asum * bsum) +end - n = length(a) +@inline function _bhattacharyya_coeff(a, b) + Ta = _eltype(a) + Tb = _eltype(b) + T = typeof(sqrt(zero(promote_type(Ta, Tb)))) sqab = zero(T) - # We must normalize since we cannot assume that the vectors are normalized to probability vectors. - asum = zero(T) - bsum = zero(T) + asum = zero(Ta) + bsum = zero(Tb) + + for (ai, bi) in zip(a, b) + sqab += sqrt(ai * bi) + asum += ai + bsum += bi + end + return sqab, asum, bsum +end +@inline function _bhattacharyya_coeff(a::AbstractVector{Ta}, b::AbstractVector{Tb}) where {Ta<:Number,Tb<:Number} + T = typeof(sqrt(oneunit(Ta)*oneunit(Tb))) + sqab = zero(T) + asum = zero(Ta) + bsum = zero(Tb) - @simd for i = 1:n + @simd for i in eachindex(a, b) @inbounds ai = a[i] @inbounds bi = b[i] sqab += sqrt(ai * bi) asum += ai bsum += bi end - - sqab / sqrt(asum * bsum) + return sqab, asum, bsum end -bhattacharyya_coeff(a::T, b::T) where {T <: Number} = throw("Bhattacharyya coefficient cannot be calculated for scalars") - # Faster pair- and column-wise versions TBD... # Bhattacharyya distance -(::BhattacharyyaDist)(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} = -log(bhattacharyya_coeff(a, b)) -(::BhattacharyyaDist)(a::T, b::T) where {T <: Number} = throw("Bhattacharyya distance cannot be calculated for scalars") +(::BhattacharyyaDist)(a, b) = -log(bhattacharyya_coeff(a, b)) bhattacharyya(a, b) = BhattacharyyaDist()(a, b) # Hellinger distance -(::HellingerDist)(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} = sqrt(1 - bhattacharyya_coeff(a, b)) -(::HellingerDist)(a::T, b::T) where {T <: Number} = throw("Hellinger distance cannot be calculated for scalars") +(::HellingerDist)(a, b) = sqrt(1 - bhattacharyya_coeff(a, b)) hellinger(a, b) = HellingerDist()(a, b) diff --git a/src/bregman.jl b/src/bregman.jl index afaa2ef..483db08 100644 --- a/src/bregman.jl +++ b/src/bregman.jl @@ -22,26 +22,26 @@ end Bregman(F, ∇) = Bregman(F, ∇, LinearAlgebra.dot) # Evaluation fuction -function (dist::Bregman)(p::AbstractVector, q::AbstractVector) +function (dist::Bregman)(p, q) # Create cache vals. - FP_val = dist.F(p); - FQ_val = dist.F(q); - DQ_val = dist.∇(q); - p_size = size(p); + FP_val = dist.F(p) + FQ_val = dist.F(q) + DQ_val = dist.∇(q) + p_size = length(p) # Check F codomain. if !(isa(FP_val, Real) && isa(FQ_val, Real)) throw(ArgumentError("F Codomain Error: F doesn't map the vectors to real numbers")) end # Check vector size. - if !(p_size == size(q)) + if p_size != length(q) throw(DimensionMismatch("The vector p ($(size(p))) and q ($(size(q))) are different sizes.")) end # Check gradient size. - if !(size(DQ_val) == p_size) + if length(DQ_val) != p_size throw(DimensionMismatch("The gradient result is not the same size as p and q")) end # Return the Bregman divergence. - return FP_val - FQ_val - dist.inner(DQ_val, p-q); + return FP_val - FQ_val - dist.inner(DQ_val, p .- q) end # Convenience function. diff --git a/src/common.jl b/src/common.jl index 2e89b3f..675239c 100644 --- a/src/common.jl +++ b/src/common.jl @@ -12,6 +12,12 @@ function get_common_ncols(a::AbstractMatrix, b::AbstractMatrix) return na end +function get_common_length(a, b) + n = length(a) + length(b) == n || throw(DimensionMismatch("The lengths of a and b must match.")) + return n +end + function get_pairwise_dims(r::AbstractMatrix, a::AbstractMatrix, b::AbstractMatrix) ma, na = size(a) mb, nb = size(b) diff --git a/src/generic.jl b/src/generic.jl index 1af118e..11a51a1 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -26,49 +26,124 @@ evaluate(dist::PreMetric, a, b) = dist(a, b) # Generic functions """ - result_type(dist::PreMetric, Ta::Type, Tb::Type) -> T - result_type(dist::PreMetric, a::AbstractArray, b::AbstractArray) -> T + result_type(dist, Ta::Type, Tb::Type) -> T + result_type(dist, a, b) -> T -Infer the result type of metric `dist` with input type `Ta` and `Tb`, or input -data `a` and `b`. +Infer the result type of metric `dist` with input types `Ta` and `Tb`, or element types +of iterators `a` and `b`. """ -result_type(::PreMetric, ::Type, ::Type) = Float64 # fallback -result_type(dist::PreMetric, a::AbstractArray, b::AbstractArray) = result_type(dist, eltype(a), eltype(b)) +result_type(dist, a, b) = result_type(dist, _eltype(a), _eltype(b)) +result_type(f, a::Type, b::Type) = typeof(f(oneunit(a), oneunit(b))) # don't require `PreMetric` subtyping +_eltype(a) = __eltype(Base.IteratorEltype(a), a) +_eltype(::Type{T}) where {T} = eltype(T) === T ? T : _eltype(eltype(T)) + +__eltype(::Base.HasEltype, a) = _eltype(eltype(a)) +__eltype(::Base.EltypeUnknown, a) = _eltype(typeof(first(a))) + # Generic column-wise evaluation +""" + colwise!(r::AbstractArray, metric::PreMetric, a, b) + +Compute distances between corresponding elements of the iterable collections +`a` and `b` according to distance `metric`, and store the result in `r`. + +`a` and `b` must have the same number of elements, `r` must be an array of length +`length(a) == length(b)`. +""" +function colwise!(r::AbstractArray, metric::PreMetric, a, b) + require_one_based_indexing(r) + n = length(a) + length(b) == n || throw(DimensionMismatch("iterators have different lengths")) + length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) + @inbounds for (j, ab) in enumerate(zip(a, b)) + r[j] = metric(ab...) + end + r +end + function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractVector, b::AbstractMatrix) + require_one_based_indexing(r) n = size(b, 2) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) - @inbounds for j = 1:n - r[j] = metric(a, view(b, :, j)) + @inbounds for (rj, bj) in enumerate(axes(b, 2)) + r[rj] = metric(a, view(b, :, bj)) end r end function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractMatrix, b::AbstractVector) + require_one_based_indexing(r) n = size(a, 2) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) - @inbounds for j = 1:n - r[j] = metric(view(a, :, j), b) + @inbounds for (rj, aj) in enumerate(axes(a, 2)) + r[rj] = metric(view(a, :, aj), b) end r end +""" + colwise!(r::AbstractArray, metric::PreMetric, + a::AbstractMatrix, b::AbstractMatrix) + colwise!(r::AbstractArray, metric::PreMetric, + a::AbstractVector, b::AbstractMatrix) + colwise!(r::AbstractArray, metric::PreMetric, + a::AbstractMatrix, b::AbstractVector) + +Compute distances between each corresponding columns of `a` and `b` according +to distance `metric`, and store the result in `r`. Exactly one of `a` or `b` +can be a vector, in which case the distance between that vector and all columns +of the other matrix are computed. + +`a` and `b` must have the same number of columns if neither of the two is a +vector. `r` must be an array of length `maximum(size(a, 2), size(b, 2))`. + +!!! note + If both `a` and `b` are vectors, the generic, iterator-based method of + `colwise` applies. +""" function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix) + require_one_based_indexing(r, a, b) n = get_common_ncols(a, b) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) - @inbounds for j = 1:n + @inbounds for j in 1:n r[j] = metric(view(a, :, j), view(b, :, j)) end r end -function colwise!(r::AbstractArray, metric::SemiMetric, a::AbstractMatrix, b::AbstractVector) - colwise!(r, metric, b, a) +""" + colwise(metric::PreMetric, a, b) + +Compute distances between corresponding elements of the iterable collections +`a` and `b` according to distance `metric`. + +`a` and `b` must have the same number of elements (`length(a) == length(b)`). +""" +function colwise(metric::PreMetric, a, b) + n = get_common_length(a, b) + r = Vector{result_type(metric, a, b)}(undef, n) + colwise!(r, metric, a, b) end +""" + colwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix) + colwise(metric::PreMetric, a::AbstractVector, b::AbstractMatrix) + colwise(metric::PreMetric, a::AbstractMatrix, b::AbstractVector) + +Compute distances between corresponding columns of `a` and `b` according to +distance `metric`. Exactly one of `a` or `b` can be a vector, in which case the +distance between that vector and all columns of the other matrix are computed. + +`a` and `b` must have the same number of columns if neither of the two is a +vector. + +!!! note + If both `a` and `b` are vectors, the generic, iterator-based method of + `colwise` applies. +""" function colwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix) n = get_common_ncols(a, b) r = Vector{result_type(metric, a, b)}(undef, n) @@ -90,8 +165,20 @@ end # Generic pairwise evaluation +function _pairwise!(r::AbstractMatrix, metric::PreMetric, a, b=a) + require_one_based_indexing(r) + na = length(a) + nb = length(b) + size(r) == (na, nb) || throw(DimensionMismatch("Incorrect size of r.")) + @inbounds for (j, bj) in enumerate(b), (i, ai) in enumerate(a) + r[i, j] = metric(ai, bj) + end + r +end + function _pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix=a) + require_one_based_indexing(r, a, b) na = size(a, 2) nb = size(b, 2) size(r) == (na, nb) || throw(DimensionMismatch("Incorrect size of r.")) @@ -104,18 +191,36 @@ function _pairwise!(r::AbstractMatrix, metric::PreMetric, r end +function _pairwise!(r::AbstractMatrix, metric::SemiMetric, a) + require_one_based_indexing(r) + n = length(a) + size(r) == (n, n) || throw(DimensionMismatch("Incorrect size of r.")) + itr = Iterators.product(enumerate(a), enumerate(a)) + @inbounds for ((i, ai), (j, aj)) in itr + r[i, j] = if i > j + metric(ai, aj) + elseif i == j + 0 + else + r[j, i] + end + end + r +end + function _pairwise!(r::AbstractMatrix, metric::SemiMetric, a::AbstractMatrix) + require_one_based_indexing(r) n = size(a, 2) size(r) == (n, n) || throw(DimensionMismatch("Incorrect size of r.")) @inbounds for j = 1:n + for i = 1:(j - 1) + r[i, j] = r[j, i] # leveraging the symmetry of SemiMetric + end + r[j, j] = 0 aj = view(a, :, j) for i = (j + 1):n r[i, j] = metric(view(a, :, i), aj) end - r[j, j] = 0 - for i = 1:(j - 1) - r[i, j] = r[j, i] # leveraging the symmetry of SemiMetric - end end r end @@ -140,7 +245,7 @@ in `a` and `b` according to distance `metric`, and store the result in `r`. If a single matrix `a` is provided, compute distances between its rows or columns. `a` and `b` must have the same numbers of columns if `dims=1`, or of rows if `dims=2`. -`r` must be a square matrix with size `size(a, dims) == size(b, dims)`. +`r` must be a matrix with size `size(a, dims) × size(b, dims)`. """ function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; @@ -161,7 +266,7 @@ function pairwise!(r::AbstractMatrix, metric::PreMetric, size(r) == (na, nb) || throw(DimensionMismatch("Incorrect size of r (got $(size(r)), expected $((na, nb))).")) if dims == 1 - _pairwise!(r, metric, transpose(a), transpose(b)) + _pairwise!(r, metric, permutedims(a), permutedims(b)) else _pairwise!(r, metric, a, b) end @@ -179,12 +284,24 @@ function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix; size(r) == (n, n) || throw(DimensionMismatch("Incorrect size of r (got $(size(r)), expected $((n, n))).")) if dims == 1 - _pairwise!(r, metric, transpose(a)) + _pairwise!(r, metric, permutedims(a)) else _pairwise!(r, metric, a) end end +""" + pairwise!(r::AbstractMatrix, metric::PreMetric, a, b=a) + +Compute distances between each element of collection `a` and each element of +collection `b` according to distance `metric`, and store the result in `r`. +If a single iterable `a` is provided, compute distances between its elements. + +`r` must be a matrix with size `length(a) × length(b)`. +""" +pairwise!(r::AbstractMatrix, metric::PreMetric, a, b) = _pairwise!(r, metric, a, b) +pairwise!(r::AbstractMatrix, metric::PreMetric, a) = _pairwise!(r, metric, a) + """ pairwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix=a; dims) @@ -212,3 +329,23 @@ function pairwise(metric::PreMetric, a::AbstractMatrix; r = Matrix{result_type(metric, a, a)}(undef, n, n) pairwise!(r, metric, a, dims=dims) end + +""" + pairwise(metric::PreMetric, a, b=a) + +Compute distances between each element of collection `a` and each element of +collection `b` according to distance `metric`. If a single iterable `a` is +provided, compute distances between its elements. +""" +function pairwise(metric::PreMetric, a, b) + m = length(a) + n = length(b) + r = Matrix{result_type(metric, a, b)}(undef, m, n) + _pairwise!(r, metric, a, b) +end + +function pairwise(metric::PreMetric, a) + n = length(a) + r = Matrix{result_type(metric, a, a)}(undef, n, n) + _pairwise!(r, metric, a) +end diff --git a/src/haversine.jl b/src/haversine.jl index 8a08cff..f73e8d6 100644 --- a/src/haversine.jl +++ b/src/haversine.jl @@ -10,20 +10,17 @@ struct Haversine{T<:Real} <: Metric radius::T end -const VecOrLengthTwoTuple{T} = Union{AbstractVector{T}, NTuple{2, T}} - -function (dist::Haversine)(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple) +function (dist::Haversine)(x, y) length(x) == length(y) == 2 || haversine_error() - @inbounds begin - # longitudes - Δλ = deg2rad(y[1] - x[1]) - - # latitudes - φ₁ = deg2rad(x[2]) - φ₂ = deg2rad(y[2]) - end + @inbounds x1, x2 = x + @inbounds y1, y2 = y + # longitudes + Δλ = deg2rad(y1 - x1) + # latitudes + φ₁ = deg2rad(x2) + φ₂ = deg2rad(y2) Δφ = φ₂ - φ₁ # haversine formula @@ -33,6 +30,6 @@ function (dist::Haversine)(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple) 2 * dist.radius * asin( min(√a, one(a)) ) # take care of floating point errors end -haversine(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple, radius::Real) = Haversine(radius)(x, y) +haversine(x, y, radius::Real) = Haversine(radius)(x, y) @noinline haversine_error() = throw(ArgumentError("expected both inputs to have length 2 in Haversine distance")) diff --git a/src/metrics.jl b/src/metrics.jl index 3ddd284..43532fb 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -52,7 +52,7 @@ julia> pairwise(Euclidean(1e-12), x, x) """ Euclidean() = Euclidean(0) -struct WeightedEuclidean{W <: AbstractArray{<:Real}} <: UnionMetric +struct WeightedEuclidean{W} <: UnionMetric weights::W end @@ -74,7 +74,7 @@ julia> evaluate(PeriodicEuclidean(L), x, y) 0.25 ``` """ -struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: UnionMetric +struct PeriodicEuclidean{W} <: UnionMetric periods::W end @@ -90,14 +90,14 @@ see [`Euclidean`](@ref). """ SqEuclidean() = SqEuclidean(0) -struct WeightedSqEuclidean{W <: AbstractArray{<:Real}} <: UnionSemiMetric +struct WeightedSqEuclidean{W} <: UnionSemiMetric weights::W end struct Chebyshev <: UnionMetric end struct Cityblock <: UnionMetric end -struct WeightedCityblock{W <: AbstractArray{<:Real}} <: UnionMetric +struct WeightedCityblock{W} <: UnionMetric weights::W end @@ -108,13 +108,13 @@ struct RogersTanimoto <: UnionMetric end struct Minkowski{T <: Real} <: UnionMetric p::T end -struct WeightedMinkowski{W <: AbstractArray{<:Real},T <: Real} <: UnionMetric +struct WeightedMinkowski{W,T <: Real} <: UnionMetric weights::W p::T end struct Hamming <: UnionMetric end -struct WeightedHamming{W <: AbstractArray{<:Real}} <: UnionMetric +struct WeightedHamming{W} <: UnionMetric weights::W end @@ -214,19 +214,31 @@ for dist in weightedmetrics end result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}) where {Ta,Tb} = - result_type(dist, Ta, Tb, parameters(dist)) + result_type(dist, _eltype(Ta), _eltype(Tb), parameters(dist)) result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, ::Nothing) where {Ta,Tb} = typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb))) result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} = - typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(eltype(p)))) + typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(_eltype(p)))) -Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b) _evaluate(d, a, b, parameters(d)) end -_evaluate(dist::UnionMetrics, a::Number, b::Number) = _evaluate(dist, a, b, parameters(dist)) # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, ::Nothing) + @boundscheck if length(a) != length(b) + throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b)).")) + end + if length(a) == 0 + return zero(result_type(d, a, b)) + end + s = eval_start(d, a, b) + @inbounds for (ai, bi) in zip(a, b) + s = eval_reduce(d, s, eval_op(d, ai, bi)) + end + return eval_end(d, s) +end Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) @@ -243,9 +255,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b s = eval_reduce(d, s, eval_op(d, ai, bi)) end else - for (Ia, Ib) in zip(eachindex(a), eachindex(b)) - ai = a[Ia] - bi = b[Ib] + for (ai, bi) in zip(a, b) s = eval_reduce(d, s, eval_op(d, ai, bi)) end end @@ -253,6 +263,22 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b end end +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p) + @boundscheck if length(a) != length(b) + throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b)).")) + end + @boundscheck if length(a) != length(p) + throw(DimensionMismatch("data collections have length $(length(a)) but parameters have length $(length(p)).")) + end + if length(a) == 0 + return zero(result_type(d, a, b)) + end + s = eval_start(d, a, b) + @inbounds for (ai, bi, pi) in zip(a, b, p) + s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) + end + return eval_end(d, s) +end Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) @@ -274,10 +300,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) end else - for (Ia, Ib, Ip) in zip(eachindex(a), eachindex(b), eachindex(p)) - ai = a[Ia] - bi = b[Ib] - pi = p[Ip] + for (ai, bi, pi) in zip(a, b, p) s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) end end @@ -291,28 +314,23 @@ function _evaluate(dist::UnionMetrics, a::Number, b::Number, p) eval_end(dist, eval_op(dist, a, b, first(p))) end -eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = zero(result_type(d, a, b)) +eval_start(d::UnionMetrics, a, b) = zero(result_type(d, a, b)) eval_reduce(::UnionMetrics, s1, s2) = s1 + s2 -eval_end(d::UnionMetrics, s) = s +eval_end(::UnionMetrics, s) = s for M in (metrics..., weightedmetrics...) - @eval @inline (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b, parameters(dist)) - if M != SpanNormDist - @eval @inline (dist::$M)(a::Number, b::Number) = _evaluate(dist, a, b, parameters(dist)) - end + @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b, parameters(dist)) end # Euclidean @inline eval_op(::Euclidean, ai, bi) = abs2(ai - bi) eval_end(::Euclidean, s) = sqrt(s) -euclidean(a::AbstractArray, b::AbstractArray) = Euclidean()(a, b) -euclidean(a::Number, b::Number) = Euclidean()(a, b) +euclidean(a, b) = Euclidean()(a, b) # Weighted Euclidean @inline eval_op(::WeightedEuclidean, ai, bi, wi) = abs2(ai - bi) * wi eval_end(::WeightedEuclidean, s) = sqrt(s) -weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedEuclidean(w)(a, b) -weuclidean(a::Number, b::Number, w::Real) = WeightedEuclidean([w])(a, b) +weuclidean(a, b, w) = WeightedEuclidean(w)(a, b) # PeriodicEuclidean @inline function eval_op(d::PeriodicEuclidean, ai, bi, p) @@ -322,68 +340,57 @@ weuclidean(a::Number, b::Number, w::Real) = WeightedEuclidean([w])(a, b) abs2(s3) end eval_end(::PeriodicEuclidean, s) = sqrt(s) -peuclidean(a::AbstractArray, b::AbstractArray, p::AbstractArray{<: Real}) = - PeriodicEuclidean(p)(a, b) -peuclidean(a::Number, b::Number, p::Real) = PeriodicEuclidean([p])(a, b) +peuclidean(a, b, p) = PeriodicEuclidean(p)(a, b) # SqEuclidean @inline eval_op(::SqEuclidean, ai, bi) = abs2(ai - bi) -sqeuclidean(a::AbstractArray, b::AbstractArray) = SqEuclidean()(a, b) -sqeuclidean(a::Number, b::Number) = SqEuclidean()(a, b) +sqeuclidean(a, b) = SqEuclidean()(a, b) # Weighted Squared Euclidean @inline eval_op(::WeightedSqEuclidean, ai, bi, wi) = abs2(ai - bi) * wi -wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedSqEuclidean(w)(a, b) -wsqeuclidean(a::Number, b::Number, w::Real) = WeightedSqEuclidean([w])(a, b) +wsqeuclidean(a, b, w) = WeightedSqEuclidean(w)(a, b) # Cityblock @inline eval_op(::Cityblock, ai, bi) = abs(ai - bi) -cityblock(a::AbstractArray, b::AbstractArray) = Cityblock()(a, b) -cityblock(a::Number, b::Number) = Cityblock()(a, b) +cityblock(a, b) = Cityblock()(a, b) # Weighted City Block @inline eval_op(::WeightedCityblock, ai, bi, wi) = abs((ai - bi) * wi) -wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedCityblock(w)(a, b) -wcityblock(a::Number, b::Number, w::Real) = WeightedCityblock([w])(a, b) +wcityblock(a, b, w) = WeightedCityblock(w)(a, b) # Total variation @inline eval_op(::TotalVariation, ai, bi) = abs(ai - bi) eval_end(::TotalVariation, s) = s / 2 -totalvariation(a::AbstractArray, b::AbstractArray) = TotalVariation()(a, b) -totalvariation(a::Number, b::Number) = TotalVariation()(a, b) +totalvariation(a, b) = TotalVariation()(a, b) # Chebyshev @inline eval_op(::Chebyshev, ai, bi) = abs(ai - bi) @inline eval_reduce(::Chebyshev, s1, s2) = max(s1, s2) # if only NaN, will output NaN -Base.@propagate_inbounds eval_start(::Chebyshev, a::AbstractArray, b::AbstractArray) = abs(a[1] - b[1]) -chebyshev(a::AbstractArray, b::AbstractArray) = Chebyshev()(a, b) -chebyshev(a::Number, b::Number) = Chebyshev()(a, b) +Base.@propagate_inbounds eval_start(::Chebyshev, a, b) = abs(first(a) - first(b)) +chebyshev(a, b) = Chebyshev()(a, b) # Minkowski @inline eval_op(dist::Minkowski, ai, bi) = abs(ai - bi)^dist.p @inline eval_end(dist::Minkowski, s) = s^(1 / dist.p) -minkowski(a::AbstractArray, b::AbstractArray, p::Real) = Minkowski(p)(a, b) -minkowski(a::Number, b::Number, p::Real) = Minkowski(p)(a, b) +minkowski(a, b, p::Real) = Minkowski(p)(a, b) # Weighted Minkowski @inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi)^dist.p * wi @inline eval_end(dist::WeightedMinkowski, s) = s^(1 / dist.p) -wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = WeightedMinkowski(w, p)(a, b) -wminkowski(a::Number, b::Number, w::Real, p::Real) = WeightedMinkowski([w], p)(a, b) +wminkowski(a, b, w, p::Real) = WeightedMinkowski(w, p)(a, b) # Hamming +result_type(::Hamming, ::Type, ::Type) = Int # fallback for Hamming @inline eval_op(::Hamming, ai, bi) = ai != bi ? 1 : 0 -hamming(a::AbstractArray, b::AbstractArray) = Hamming()(a, b) -hamming(a::Number, b::Number) = Hamming()(a, b) +hamming(a, b) = Hamming()(a, b) # WeightedHamming @inline eval_op(::WeightedHamming, ai, bi, wi) = ai != bi ? wi : zero(eltype(wi)) -whamming(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedHamming(w)(a, b) -whamming(a::Number, b::Number, w::Real) = WeightedHamming([w])(a, b) +whamming(a, b, w) = WeightedHamming(w)(a, b) # Cosine dist -@inline function eval_start(dist::CosineDist, a::AbstractArray, b::AbstractArray) +@inline function eval_start(dist::CosineDist, a, b) T = result_type(dist, a, b) zero(T), zero(T), zero(T) end @@ -397,32 +404,31 @@ function eval_end(::CosineDist, s) ab, a2, b2 = s max(1 - ab / (sqrt(a2) * sqrt(b2)), 0) end -cosine_dist(a::AbstractArray, b::AbstractArray) = CosineDist()(a, b) -cosine_dist(a::Number, b::Number) = CosineDist()(a, b) +cosine_dist(a, b) = CosineDist()(a, b) # CorrDist -_centralize(x::AbstractArray) = x .- mean(x) -(dist::CorrDist)(a::AbstractArray, b::AbstractArray) = CosineDist()(_centralize(a), _centralize(b)) +_centralize(x) = x .- mean(x) +(dist::CorrDist)(a, b) = CosineDist()(_centralize(a), _centralize(b)) (dist::CorrDist)(a::Number, b::Number) = CosineDist()(zero(mean(a)), zero(mean(b))) -corr_dist(a::AbstractArray, b::AbstractArray) = CorrDist()(a, b) -corr_dist(a::Number, b::Number) = CorrDist()(a, b) +corr_dist(a, b) = CorrDist()(a, b) # ChiSqDist @inline eval_op(::ChiSqDist, ai, bi) = (d = abs2(ai - bi) / (ai + bi); ifelse(ai != bi, d, zero(d))) -chisq_dist(a::AbstractArray, b::AbstractArray) = ChiSqDist()(a, b) +chisq_dist(a, b) = ChiSqDist()(a, b) # KLDivergence @inline eval_op(dist::KLDivergence, ai, bi) = ai > 0 ? ai * log(ai / bi) : zero(eval_op(dist, oneunit(ai), bi)) -kl_divergence(a::AbstractArray, b::AbstractArray) = KLDivergence()(a, b) +kl_divergence(a, b) = KLDivergence()(a, b) # GenKLDivergence @inline eval_op(dist::GenKLDivergence, ai, bi) = ai > 0 ? ai * log(ai / bi) - ai + bi : oftype(eval_op(dist, oneunit(ai), bi), bi) -gkl_divergence(a::AbstractArray, b::AbstractArray) = GenKLDivergence()(a, b) +gkl_divergence(a, b) = GenKLDivergence()(a, b) # RenyiDivergence -Base.@propagate_inbounds function eval_start(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Real} +Base.@propagate_inbounds function eval_start(::RenyiDivergence, a, b) + T = promote_type(_eltype(a), _eltype(b)) zero(T), zero(T), T(sum(a)), T(sum(b)) end @@ -466,7 +472,7 @@ function eval_end(dist::RenyiDivergence, s::Tuple{T,T,T,T}) where {T <: Real} end end -renyi_divergence(a::AbstractArray, b::AbstractArray, q::Real) = RenyiDivergence(q)(a, b) +renyi_divergence(a, b, q::Real) = RenyiDivergence(q)(a, b) # Combine docs with RenyiDivergence. Fetching the docstring with @doc causes # problems during package compilation; see # https://github.com/JuliaLang/julia/issues/31640 @@ -483,14 +489,15 @@ end tu = u > 0 ? u * log(u) : zero(log(one(T))) ta + tb - tu end -js_divergence(a::AbstractArray, b::AbstractArray) = JSDivergence()(a, b) +js_divergence(a, b) = JSDivergence()(a, b) # SpanNormDist -result_type(dist::SpanNormDist, a::AbstractArray, b::AbstractArray) = - typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))) -Base.@propagate_inbounds function eval_start(::SpanNormDist, a::AbstractArray, b::AbstractArray) - a[1] - b[1], a[1] - b[1] +result_type(dist::SpanNormDist, ::Type{Ta}, ::Type{Tb}) where {Ta,Tb} = + typeof(eval_op(dist, oneunit(Ta), oneunit(Tb))) +Base.@propagate_inbounds function eval_start(::SpanNormDist, a, b) + d = first(a) - first(b) + return d, d end eval_op(::SpanNormDist, ai, bi) = ai - bi @inline function eval_reduce(::SpanNormDist, s1, s2) @@ -505,13 +512,12 @@ end eval_end(::SpanNormDist, s) = s[2] - s[1] (::SpanNormDist)(a::Number, b::Number) = zero(promote_type(typeof(a), typeof(b))) -spannorm_dist(a::AbstractArray, b::AbstractArray) = SpanNormDist()(a, b) -spannorm_dist(a::Number, b::Number) = SpanNormDist()(a, b) +spannorm_dist(a, b) = SpanNormDist()(a, b) # Jaccard -@inline eval_start(::Jaccard, a::AbstractArray{Bool}, b::AbstractArray{Bool}) = 0, 0 -@inline function eval_start(dist::Jaccard, a::AbstractArray, b::AbstractArray) +@inline eval_start(::Jaccard, ::AbstractArray{Bool}, ::AbstractArray{Bool}) = 0, 0 +@inline function eval_start(dist::Jaccard, a, b) T = result_type(dist, a, b) zero(T), zero(T) end @@ -529,12 +535,11 @@ end @inbounds v = 1 - (a[1] / a[2]) return v end -jaccard(a::AbstractArray, b::AbstractArray) = Jaccard()(a, b) -jaccard(a::Number, b::Number) = Jaccard()(a, b) +jaccard(a, b) = Jaccard()(a, b) # BrayCurtis -@inline function eval_start(dist::BrayCurtis, a::AbstractArray, b::AbstractArray) +@inline function eval_start(dist::BrayCurtis, a, b) T = result_type(dist, a, b) zero(T), zero(T) end @@ -552,12 +557,11 @@ end @inbounds v = a[1] / a[2] return v end -braycurtis(a::AbstractArray, b::AbstractArray) = BrayCurtis()(a, b) -braycurtis(a::Number, b::Number) = BrayCurtis()(a, b) +braycurtis(a, b) = BrayCurtis()(a, b) # Tanimoto -@inline eval_start(::RogersTanimoto, a::AbstractArray, b::AbstractArray) = 0, 0, 0, 0 +@inline eval_start(::RogersTanimoto, _, _) = 0, 0, 0, 0 @inline function eval_op(::RogersTanimoto, s1, s2) tt = s1 && s2 tf = s1 && !s2 @@ -579,7 +583,7 @@ end @inbounds denominator = a[1] + a[4] + 2(a[2] + a[3]) numerator / denominator end -rogerstanimoto(a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Bool} = RogersTanimoto()(a, b) +rogerstanimoto(a, b) = RogersTanimoto()(a, b) # Deviations diff --git a/test/test_dists.jl b/test/test_dists.jl index 53c4b9a..9dbda1e 100644 --- a/test/test_dists.jl +++ b/test/test_dists.jl @@ -1,5 +1,26 @@ # Unit tests for Distances +struct FooDist <: PreMetric end # Julia 1.0 Compat: struct definition must be put in global scope + +@testset "result_type" begin + foodist(a, b) = a + b + (::FooDist)(a, b) = foodist(a, b) + for (Ta, Tb) in [ + (Int, Int), + (Int, Float64), + (Float32, Float32), + (Float32, Float64), + ] + A, B = rand(Ta, 2, 3), rand(Tb, 2, 3) + @test result_type(FooDist(), A, B) == result_type(FooDist(), Ta, Tb) + @test result_type(foodist, A, B) == result_type(foodist, Ta, Tb) == typeof(foodist(oneunit(Ta), oneunit(Tb))) + + a, b = rand(Ta), rand(Tb) + @test result_type(FooDist(), a, b) == result_type(FooDist(), Ta, Tb) + @test result_type(foodist, a, b) == result_type(foodist, Ta, Tb) == typeof(foodist(oneunit(Ta), oneunit(Tb))) + end +end + function test_metricity(dist, x, y, z) @testset "Test metricity of $(typeof(dist))" begin @test dist(x, y) == evaluate(dist, x, y) @@ -86,14 +107,14 @@ end test_metricity(BrayCurtis(), a, b, c) test_metricity(Jaccard(), a, b, c) - w = rand(T, n) - - test_metricity(PeriodicEuclidean(w), x, y, z) - test_metricity(WeightedSqEuclidean(w), x, y, z) - test_metricity(WeightedEuclidean(w), x, y, z) - test_metricity(WeightedCityblock(w), x, y, z) - test_metricity(WeightedMinkowski(w, 2.5), x, y, z) - test_metricity(WeightedHamming(w), a, b, c) + for w in (rand(T, n), (rand(T, n)...,)) + test_metricity(PeriodicEuclidean(w), x, y, z) + test_metricity(WeightedSqEuclidean(w), x, y, z) + test_metricity(WeightedEuclidean(w), x, y, z) + test_metricity(WeightedCityblock(w), x, y, z) + test_metricity(WeightedMinkowski(w, 2.5), x, y, z) + test_metricity(WeightedHamming(w), a, b, c) + end Q = rand(T, n, n) Q = Q * Q' # make sure Q is positive-definite @@ -130,10 +151,15 @@ end @test chebyshev(a, b) == 1.0 @test braycurtis(a, b) === 1/3 @test minkowski(a, b, 2) == 1.0 - @test hamming(a, b) == 1 + @test hamming(a, b) === 1 + @test hamming("martha", "marhta") === 2 + @test hamming("es an ", " vs an") === 6 + @test hamming("", "") === 0 @test peuclidean(a, b, 0.5) === 0.0 @test peuclidean(a, b, 2) === 1.0 @test cosine_dist(a, b) === 0.0 + @test bhattacharyya(a, b) === bhattacharyya([a], [b]) === -0.0 + @test bhattacharyya(a, b) === bhattacharyya((a,), (b,)) @test isnan(corr_dist(a, b)) @test spannorm_dist(a, b) === 0 @@ -142,53 +168,59 @@ end @test rogerstanimoto(bt, bf) == 4.0 / 5.0 @test braycurtis(bt, bf) == 0.5 - w = 2 - @test wsqeuclidean(a, b, w) === 2 - @test weuclidean(a, b, w) === sqrt(2) - @test wcityblock(a, b, w) === 2 - @test wminkowski(a, b, w, 2) === sqrt(2) - @test whamming(a, b, w) === 2 + for w in (2, (2,)) + @test wsqeuclidean(a, b, w) === 2 + @test weuclidean(a, b, w) === sqrt(2) + @test wcityblock(a, b, w) === 2 + @test wminkowski(a, b, w, 2) === sqrt(2) + @test whamming(a, b, w) === 2 + end for T in (Float64, F64) - for (_x, _y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]), ([4.0, 5.0, 6.0, 7.0], [3. 8.; 9. 1.0])) x, y = T.(_x), T.(_y) - @test sqeuclidean(x, y) == 57.0 - @test euclidean(x, y) == sqrt(57.0) - @test jaccard(x, y) == 13.0 / 28 - @test cityblock(x, y) == 13.0 - @test totalvariation(x, y) == 6.5 - @test chebyshev(x, y) == 6.0 - @test braycurtis(x, y) == 1.0 - (30.0 / 43.0) - @test minkowski(x, y, 2) == sqrt(57.0) - @test peuclidean(x, y, fill(10.0, 4)) == sqrt(37) - @test peuclidean(x - vec(y), zero(y), fill(10.0, 4)) == peuclidean(x, y, fill(10.0, 4)) - @test peuclidean(x, y, [10.0, 10.0, 10.0, Inf]) == sqrt(57) - @test_throws DimensionMismatch cosine_dist(1.0:2, 1.0:3) - @test cosine_dist(x, y) ≈ (1.0 - 112. / sqrt(19530.0)) - x_int, y_int = Int64.(x), Int64.(y) - @test cosine_dist(x_int, y_int) == (1.0 - 112.0 / sqrt(19530.0)) - @test corr_dist(x, y) ≈ cosine_dist(x .- mean(x), vec(y) .- mean(y)) - @test corr_dist(OffsetVector(x, -1:length(x)-2), y) == corr_dist(x, y) - @test chisq_dist(x, y) == sum((x - vec(y)).^2 ./ (x + vec(y))) - @test spannorm_dist(x, y) == maximum(x - vec(y)) - minimum(x - vec(y)) - - @test gkl_divergence(x, y) ≈ sum(i -> x[i] * log(x[i] / y[i]) - x[i] + y[i], 1:length(x)) - - @test meanad(x, y) ≈ mean(Float64[abs(x[i] - y[i]) for i in 1:length(x)]) - @test msd(x, y) ≈ mean(Float64[abs2(x[i] - y[i]) for i in 1:length(x)]) - @test rmsd(x, y) ≈ sqrt(msd(x, y)) - @test nrmsd(x, y) ≈ sqrt(msd(x, y)) / (maximum(x) - minimum(x)) - - w = ones(4) - @test sqeuclidean(x, y) ≈ wsqeuclidean(x, y, w) - - w = rand(Float64, size(x)) - @test wsqeuclidean(x, y, w) ≈ dot((x - vec(y)).^2, w) - @test weuclidean(x, y, w) == sqrt(wsqeuclidean(x, y, w)) - @test wcityblock(x, y, w) ≈ dot(abs.(x - vec(y)), w) - @test wminkowski(x, y, w, 2) ≈ weuclidean(x, y, w) + for (x, y) in ((x, y), + ((Iterators.take(x, 4), Iterators.take(y, 4))), # iterator + (((x[i] for i in 1:length(x)), (y[i] for i in 1:length(y)))), # generator + ) + xc, yc = collect(x), collect(y) + @test sqeuclidean(x, y) == 57.0 + @test euclidean(x, y) == sqrt(57.0) + @test jaccard(x, y) == 13.0 / 28 + @test cityblock(x, y) == 13.0 + @test totalvariation(x, y) == 6.5 + @test chebyshev(x, y) == 6.0 + @test braycurtis(x, y) == 1.0 - (30.0 / 43.0) + @test minkowski(x, y, 2) == sqrt(57.0) + @test peuclidean(x, y, fill(10.0, 4)) == sqrt(37) + @test peuclidean(xc - vec(yc), zero(yc), fill(10.0, 4)) == peuclidean(x, y, fill(10.0, 4)) + @test peuclidean(x, y, [10.0, 10.0, 10.0, Inf]) == sqrt(57) + @test_throws DimensionMismatch cosine_dist(1.0:2, 1.0:3) + @test cosine_dist(x, y) ≈ (1.0 - 112. / sqrt(19530.0)) + x_int, y_int = Int64.(x), Int64.(y) + @test cosine_dist(x_int, y_int) == (1.0 - 112.0 / sqrt(19530.0)) + @test corr_dist(x, y) ≈ cosine_dist(x .- mean(x), vec(yc) .- mean(y)) + @test corr_dist(OffsetVector(xc, -1:length(xc)-2), yc) == corr_dist(x, y) + @test chisq_dist(x, y) == sum((xc - vec(yc)).^2 ./ (xc + vec(yc))) + @test spannorm_dist(x, y) == maximum(xc - vec(yc)) - minimum(xc - vec(yc)) + + @test gkl_divergence(x, y) ≈ sum(i -> xc[i] * log(xc[i] / yc[i]) - xc[i] + yc[i], 1:length(x)) + + @test meanad(x, y) ≈ mean(Float64[abs(xc[i] - yc[i]) for i in 1:length(x)]) + @test msd(x, y) ≈ mean(Float64[abs2(xc[i] - yc[i]) for i in 1:length(x)]) + @test rmsd(x, y) ≈ sqrt(msd(x, y)) + @test nrmsd(x, y) ≈ sqrt(msd(x, y)) / (maximum(x) - minimum(x)) + + w = ones(4) + @test sqeuclidean(x, y) ≈ wsqeuclidean(x, y, w) + + w = rand(Float64, length(x)) + @test wsqeuclidean(x, y, w) ≈ dot((xc - vec(yc)).^2, w) + @test weuclidean(x, y, w) == sqrt(wsqeuclidean(x, y, w)) + @test wcityblock(x, y, w) ≈ dot(abs.(xc - vec(yc)), w) + @test wminkowski(x, y, w, 2) ≈ weuclidean(x, y, w) + end end # Test ChiSq doesn't give NaN at zero @@ -228,22 +260,27 @@ end klv += p[i] * log(p[i] / q[i]) end end - @test kl_divergence(p, q) ≈ klv - @test typeof(kl_divergence(p, q)) == T - - - @test renyi_divergence(p, r, 0) ≈ -log(scale) - @test renyi_divergence(p, r, 1) ≈ -log(scale) - @test renyi_divergence(p, r, 10) ≈ -log(scale) - @test renyi_divergence(p, r, rand()) ≈ -log(scale) - @test renyi_divergence(p, r, Inf) ≈ -log(scale) - @test isinf(renyi_divergence([0.0, 0.5, 0.5], [0.0, 1.0, 0.0], Inf)) - @test renyi_divergence([0.0, 1.0, 0.0], [0.0, 0.5, 0.5], Inf) ≈ log(2.0) - @test renyi_divergence(p, q, 1) ≈ kl_divergence(p, q) pm = (p + q) / 2 - jsv = kl_divergence(p, pm) / 2 + kl_divergence(q, pm) / 2 - @test js_divergence(p, q) ≈ jsv + for (r, p, pm) in ((r, p, pm), + (Iterators.take(r, length(r)), Iterators.take(p, length(p)), Iterators.take(pm, length(pm))), + ((r[i] for i in 1:length(r)), (p[i] for i in 1:length(p)), (pm[i] for i in 1:length(pm))), + ) + @test kl_divergence(p, q) ≈ klv + @test typeof(kl_divergence(p, q)) == T + + @test renyi_divergence(p, r, 0) ≈ -log(scale) + @test renyi_divergence(p, r, 1) ≈ -log(scale) + @test renyi_divergence(p, r, 10) ≈ -log(scale) + @test renyi_divergence(p, r, rand()) ≈ -log(scale) + @test renyi_divergence(p, r, Inf) ≈ -log(scale) + @test isinf(renyi_divergence([0.0, 0.5, 0.5], [0.0, 1.0, 0.0], Inf)) + @test renyi_divergence([0.0, 1.0, 0.0], [0.0, 0.5, 0.5], Inf) ≈ log(2.0) + @test renyi_divergence(p, q, 1) ≈ kl_divergence(p, q) + + jsv = kl_divergence(p, pm) / 2 + kl_divergence(q, pm) / 2 + @test js_divergence(p, q) ≈ jsv + end end end # testset @@ -256,9 +293,7 @@ end # testset end #testset @testset "empty vector" begin - for T in (Float64, F64) - a = T[] - b = T[] + for T in (Float64, F64), (a, b) in ((T[], T[]), (Iterators.take(T[], 0), Iterators.take(T[], 0))) @test sqeuclidean(a, b) == 0.0 @test isa(sqeuclidean(a, b), T) @test euclidean(a, b) == 0.0 @@ -290,12 +325,17 @@ end # testset @testset "DimensionMismatch throwing" begin a = [1, 0]; b = [2] @test_throws DimensionMismatch sqeuclidean(a, b) - a = [1, 0]; b = [2.0] ; w = [3.0] + a = (1, 0); b = (2,) + @test_throws DimensionMismatch sqeuclidean(a, b) + a = (1, 0); b = (2.0,); w = (3.0,) @test_throws DimensionMismatch wsqeuclidean(a, b, w) @test_throws DimensionMismatch peuclidean(a, b, w) a = [1, 0]; b = [2.0, 4.0] ; w = [3.0] @test_throws DimensionMismatch wsqeuclidean(a, b, w) @test_throws DimensionMismatch peuclidean(a, b, w) + a = (1, 0); b = (2.0, 4.0) ; w = (3.0,) + @test_throws DimensionMismatch wsqeuclidean(a, b, w) + @test_throws DimensionMismatch peuclidean(a, b, w) p = [0.5, 0.5]; q = [0.3, 0.3, 0.4] @test_throws DimensionMismatch bhattacharyya(p, q) @test_throws DimensionMismatch hellinger(q, p) @@ -372,19 +412,21 @@ end #testset @test haversine([0.,-90.], [0.,90.], 1.) ≈ π atol=1e-10 @test haversine((-180.,0.), (180.,0.), 1.) ≈ 0 atol=1e-10 @test haversine((0.,-90.), (0.,90.), 1.) ≈ π atol=1e-10 - @test haversine((1.,-15.625), (-179.,15.625), 6371.) ≈ 20015. atol=1e0 + x, y = (1.,-15.625), (-179.,15.625) + @test haversine(x, y, 6371.) ≈ 20015. atol=1e0 + @test haversine(Iterators.take(x, 2), Iterators.take(y, 2), 6371.) ≈ 20015. atol=1e0 @test_throws ArgumentError haversine([0.,-90., 0.25], [0.,90.], 1.) end end @testset "bhattacharyya / hellinger" begin - for T in (Float64, F64) - x, y = T.([4.0, 5.0, 6.0, 7.0]), T.([3.0, 9.0, 8.0, 1.0]) - a = T.([1.0, 2.0, 1.0, 3.0, 2.0, 1.0]) - b = T.([1.0, 3.0, 0.0, 2.0, 2.0, 0.0]) - p = rand(T, 12) - p[p .< median(p)] .= 0.0 - q = rand(T, 12) + for T in (Int, Float64, F64) + x, y = T.([4, 5, 6, 7]), T.([3, 9, 8, 1]) + a = T.([1, 2, 1, 3, 2, 1]) + b = T.([1, 3, 0, 2, 2, 0]) + p = T == Int ? rand(0:10, 12) : rand(T, 12) + p[p .< median(p)] .= 0 + q = T == Int ? rand(0:10, 12) : rand(T, 12) # Bhattacharyya and Hellinger distances are defined for discrete # probability distributions so to calculate the expected values @@ -392,9 +434,11 @@ end px = x ./ sum(x) py = y ./ sum(y) expected_bc_x_y = sum(sqrt.(px .* py)) - @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y - @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) - @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) + for (x, y) in ((x, y), (Iterators.take(x, 12), Iterators.take(y, 12))) + @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y + @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) + @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) + end pa = a ./ sum(a) pb = b ./ sum(b) @@ -429,6 +473,7 @@ function test_colwise(dist, x, y, T) end # ≈ and all( .≈ ) seem to behave slightly differently for F64 @test all(colwise(dist, x, y) .≈ r1) + @test all(colwise(dist, (x[:,i] for i in axes(x, 2)), (y[:,i] for i in axes(y, 2))) .≈ r1) @test all(colwise(dist, x[:, 1], y) .≈ r2) @test all(colwise(dist, x, y[:, 1]) .≈ r3) end @@ -507,10 +552,16 @@ function test_pairwise(dist, x, y, T) # As earlier, we have small rounding errors in accumulations @test pairwise(dist, x, y, dims=2) ≈ rxy @test pairwise(dist, x, dims=2) ≈ rxx - @test pairwise(dist, x, y, dims=2) ≈ rxy - @test pairwise(dist, x, dims=2) ≈ rxx @test pairwise(dist, permutedims(x), permutedims(y), dims=1) ≈ rxy @test pairwise(dist, permutedims(x), dims=1) ≈ rxx + vecx = (x[:, i] for i in 1:nx) + vecy = (y[:, i] for i in 1:ny) + for (vecx, vecy) in ((vecx, vecy), (collect(vecx), collect(vecy))) + @test pairwise(dist, vecx, vecy) ≈ rxy + @test pairwise(dist, vecx) ≈ rxx + @test pairwise!(similar(rxy), dist, vecx, vecy) ≈ rxy + @test pairwise!(similar(rxx), dist, vecx) ≈ rxx + end end end @@ -570,6 +621,50 @@ end test_pairwise(Mahalanobis(Q), X, Y, T) end +function test_scalar_pairwise(dist, x, y, T) + @testset "Scalar pairwise test for $(typeof(dist))" begin + rxy = dist.(x, permutedims(y)) + rxx = dist.(x, permutedims(x)) + # As earlier, we have small rounding errors in accumulations + @test pairwise(dist, x, y) ≈ rxy + @test pairwise(dist, x) ≈ rxx + @test pairwise(dist, permutedims(x), permutedims(y), dims=2) ≈ rxy + @test pairwise(dist, permutedims(x), dims=2) ≈ rxx + @test_throws DimensionMismatch pairwise(dist, permutedims(x), permutedims(y), dims=1) + end +end + +@testset "scalar pairwise metrics on $T" for T in (Float64, F64) + m = 5 + n = 8 + nx = 6 + ny = 8 + x = rand(T, nx) + y = rand(T, ny) + a = rand(1:3, nx) + b = rand(1:3, ny) + test_scalar_pairwise(SqEuclidean(), x, y, T) + test_scalar_pairwise(Euclidean(), x, y, T) + test_scalar_pairwise(Cityblock(), x, y, T) + test_scalar_pairwise(TotalVariation(), x, y, T) + test_scalar_pairwise(Chebyshev(), x, y, T) + test_scalar_pairwise(Minkowski(2.5), x, y, T) + test_scalar_pairwise(Hamming(), a, b, T) + test_scalar_pairwise(CosineDist(), x, y, T) + test_scalar_pairwise(CosineDist(), a, b, T) + test_scalar_pairwise(ChiSqDist(), x, y, T) + test_scalar_pairwise(KLDivergence(), x, y, T) + test_scalar_pairwise(JSDivergence(), x, y, T) + test_scalar_pairwise(BrayCurtis(), x, y, T) + w = rand(1, 1) + test_scalar_pairwise(WeightedSqEuclidean(w), x, y, T) + test_scalar_pairwise(WeightedEuclidean(w), x, y, T) + test_scalar_pairwise(WeightedCityblock(w), x, y, T) + test_scalar_pairwise(WeightedMinkowski(w, 2.5), x, y, T) + test_scalar_pairwise(WeightedHamming(w), a, b, T) + test_scalar_pairwise(PeriodicEuclidean(w), x, y, T) +end + @testset "Euclidean precision" begin X = [0.1 0.2; 0.3 0.4; -0.1 -0.1] pd = pairwise(Euclidean(1e-12), X, X, dims=2) @@ -608,11 +703,16 @@ end testDist = Bregman(F, ∇) p = rand(4) q = rand(4) - p = p/sum(p); - q = q/sum(q); - @test testDist(p, q) ≈ gkl_divergence(p, q) - # Test if Bregman() correctly implements the squared euclidean dist. between them. - @test bregman(x -> norm(x)^2, x -> 2*x, p, q) ≈ sqeuclidean(p, q) + p = p/sum(p) + q = q/sum(q) + for (p, q) in ((p, q), + (Iterators.take(p, 4), Iterators.take(q, 4)), + ((p[i] for i in 1:4), (q[i] for i in 1:4)), + ) + @test testDist(p, q) ≈ gkl_divergence(p, q) + # Test if Bregman() correctly implements the squared euclidean dist. between them. + @test bregman(x -> norm(x)^2, x -> 2 .* x, p, q) ≈ sqeuclidean(p, q) + end # Test if Bregman() correctly implements the IS distance. F(p) = -1 * sum(log.(p)) ∇(p) = map(x -> -1 * x^(-1), p)