From 6c4cbf9f81b927ab388272e39d3276d7a6369e84 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Sun, 7 Apr 2019 19:01:34 +0100 Subject: [PATCH 1/7] Add optimised methods for reducing(hcat/vcat on any iterators --- base/abstractarray.jl | 4 +- base/reduce.jl | 92 ++++++++++++++++++++++++++++++++++++++++++- test/reduce.jl | 21 ++++++++++ 3 files changed, 114 insertions(+), 3 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index f361632f8946c..bd19dfd1fca0d 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1372,10 +1372,10 @@ end typed_vcat(::Type{T}, A::AbstractVecOrMat...) where {T} = _typed_vcat(T, A) -reduce(::typeof(vcat), A::AbstractVector{<:AbstractVecOrMat}) = +reduce(::typeof(vcat), A::AbstractVector{<:AbstractVecOrMat}, et, isize) = _typed_vcat(mapreduce(eltype, promote_type, A), A) -reduce(::typeof(hcat), A::AbstractVector{<:AbstractVecOrMat}) = +reduce(::typeof(hcat), A::AbstractVector{<:AbstractVecOrMat}, et, isize) = _typed_hcat(mapreduce(eltype, promote_type, A), A) ## cat: general case diff --git a/base/reduce.jl b/base/reduce.jl index 425d8d6f28f2f..5c7fb2b51be1b 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -1,5 +1,10 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +## Helpers +eltype_or_default_eltype(itr::T) where T = eltype_or_default_eltype(itr, IteratorEltype(T)) +eltype_or_default_eltype(itr::T, ::HasEltype) where T = eltype(T) +eltype_or_default_eltype(itr, ::EltypeUnknown) = @default_eltype(itr) + ## reductions ## ###### Generic (map)reduce functions ###### @@ -354,10 +359,95 @@ julia> reduce(*, [2; 3; 4]; init=-1) -24 ``` """ -reduce(op, itr; kw...) = mapreduce(identity, op, itr; kw...) +function reduce(op, itr::T; kw...) where T + # Redispatch, adding traits + reduce(op, itr, eltype_or_default_eltype(itr), IteratorSize(T); kw...) +end + +function reduce(op, itr, et, isize; kw...) + # Fallback: if nothing interesting is being done with the traits + # or the operation + return mapreduce(identity, op, itr; kw...) +end reduce(op, a::Number) = a # Do we want this? +##### Operation specific reduce optimisations + +## vcat + +function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector}, isize) + x_state = iterate(xs) + x_state === nothing && return T() # New empty instance + x1, state = x_state + + ret = copy(x1) # this is **Not** going to work for StaticArrays + + if !(isize isa SizeUnknown) + # Assume first element has representitive size, unless that would make this too large + SIZEHINT_CAP = 10^6 + sizehint!(ret, min(SIZEHINT_CAP, length(xs)*length(x1))) + end + + x_state = iterate(xs, state) + while(x_state !== nothing) + x, state = x_state + append!(ret, x) + x_state = iterate(xs, state) + end + return ret +end + +## hcat + +function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize::SizeUnknown) + x_state = iterate(xs) + x_state === nothing && return et() # New empty instance + x1, state = x_state + + dim1_size = length(x1) + dim2_size = 1 + ret_vec = copy(x1) # this is **Not** going to work for StaticArrays + + x_state = iterate(xs, state) + + while(x_state !== nothing) + x, state = x_state + append!(ret_vec, x) + dim2_size += 1 + x_state = iterate(xs, state) + end + + # Reshape will throw errors if anything was the wrong size + return reshape(ret_vec, (dim1_size, dim2_size)) +end + +function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize) + # Size is known + x_state = iterate(xs) + x_state === nothing && return T() # New empty instance + x1, state = x_state + + dim1_size = size(x1,1) + dim2_size = length(xs) + + ret = similar(x1, eltype(x1), (dim1_size, dim2_size)) + copyto!(ret, 1, x1, 1) + + x_state = iterate(xs, state) + offset = length(x1)+1 + while(x_state !== nothing) + x, state = x_state + length(x)==dim1_size || throw(DimensionMismatch("hcat")) + copyto!(ret, offset, x, 1) + offset += length(x) + x_state = iterate(xs, state) + end + + return ret +end + + ###### Specific reduction functions ###### ## sum diff --git a/test/reduce.jl b/test/reduce.jl index eb585e8a630f1..ac07af54fc3b4 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -526,6 +526,27 @@ test18695(r) = sum( t^2 for t in r ) end end +@testset "optimized reduce(vcat/hcat, A) for iterators" begin + v_v_same = [rand(128) for ii in 1:100] + # the following 2 are not optimized, but we want to make sure + # that they still hit the normal reduce methods + v_v_diff = [rand(128), rand(Float32,128), rand(Int, 128)] + v_v_diff_typed = Union{Vector{Float64},Vector{Float32},Vector{Int}}[rand(128), rand(Float32,128), rand(Int, 128)] + + for v_v in (v_v_same, v_v_diff_typed, v_v_diff_typed) + # Cover all combinations of iterator traits. + g_v = (x for x in v_v) + f_g_v = Iterators.filter(x->true, g_v) + f_v_v = Iterators.filter(x->true, v_v); + hcat_expected = hcat(v_v...) + vcat_expected = vcat(v_v...) + @testset "$(typeof(data))" for data in (v_v, g_v, f_g_v, f_g_v) + @test reduce(hcat, data) == hcat_expected + @test reduce(vcat, data) == vcat_expected + end + end +end + # offset axes i = Base.Slice(-3:3) x = [j^2 for j in i] From 32da054430e08676713ec34acb84f63a4e51ced1 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Sun, 7 Apr 2019 19:58:30 +0100 Subject: [PATCH 2/7] correct ommissions from test loops --- test/reduce.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/reduce.jl b/test/reduce.jl index ac07af54fc3b4..b6d216fccc883 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -533,14 +533,14 @@ end v_v_diff = [rand(128), rand(Float32,128), rand(Int, 128)] v_v_diff_typed = Union{Vector{Float64},Vector{Float32},Vector{Int}}[rand(128), rand(Float32,128), rand(Int, 128)] - for v_v in (v_v_same, v_v_diff_typed, v_v_diff_typed) + for v_v in (v_v_same, v_v_diff, v_v_diff_typed) # Cover all combinations of iterator traits. g_v = (x for x in v_v) f_g_v = Iterators.filter(x->true, g_v) f_v_v = Iterators.filter(x->true, v_v); hcat_expected = hcat(v_v...) vcat_expected = vcat(v_v...) - @testset "$(typeof(data))" for data in (v_v, g_v, f_g_v, f_g_v) + @testset "$(typeof(data))" for data in (v_v, g_v, f_g_v, f_v_v) @test reduce(hcat, data) == hcat_expected @test reduce(vcat, data) == vcat_expected end From bee2ed10eed63951c5ae7749837acf1266614e82 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Sun, 7 Apr 2019 22:21:55 +0100 Subject: [PATCH 3/7] remove old, and previously masked redundant reduce method --- base/reducedim.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 996729be8bc4c..6e11e41fb1290 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -349,7 +349,7 @@ julia> reduce(max, a, dims=1) 4 8 12 16 ``` """ -reduce(op, A::AbstractArray; kw...) = mapreduce(identity, op, A; kw...) +reduce(op, A::AbstractArray; kw...) ##### Specific reduction functions ##### """ From 62d13841c2424e17d7508b8eaee302f99e1023b1 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 8 Apr 2019 09:35:21 +0100 Subject: [PATCH 4/7] undo extra specialisation of abstract array methods --- base/abstractarray.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index bd19dfd1fca0d..f361632f8946c 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1372,10 +1372,10 @@ end typed_vcat(::Type{T}, A::AbstractVecOrMat...) where {T} = _typed_vcat(T, A) -reduce(::typeof(vcat), A::AbstractVector{<:AbstractVecOrMat}, et, isize) = +reduce(::typeof(vcat), A::AbstractVector{<:AbstractVecOrMat}) = _typed_vcat(mapreduce(eltype, promote_type, A), A) -reduce(::typeof(hcat), A::AbstractVector{<:AbstractVecOrMat}, et, isize) = +reduce(::typeof(hcat), A::AbstractVector{<:AbstractVecOrMat}) = _typed_hcat(mapreduce(eltype, promote_type, A), A) ## cat: general case From 75d22ca124cd3deff19da66cda56354eeb6bb5e7 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 8 Apr 2019 09:37:10 +0100 Subject: [PATCH 5/7] fix whitespace --- base/reduce.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 5c7fb2b51be1b..cb4c1e5a497cd 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -380,15 +380,15 @@ function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector}, isize) x_state = iterate(xs) x_state === nothing && return T() # New empty instance x1, state = x_state - + ret = copy(x1) # this is **Not** going to work for StaticArrays - + if !(isize isa SizeUnknown) # Assume first element has representitive size, unless that would make this too large SIZEHINT_CAP = 10^6 sizehint!(ret, min(SIZEHINT_CAP, length(xs)*length(x1))) end - + x_state = iterate(xs, state) while(x_state !== nothing) x, state = x_state @@ -404,20 +404,20 @@ function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize::SizeUnknow x_state = iterate(xs) x_state === nothing && return et() # New empty instance x1, state = x_state - + dim1_size = length(x1) dim2_size = 1 ret_vec = copy(x1) # this is **Not** going to work for StaticArrays - + x_state = iterate(xs, state) - + while(x_state !== nothing) x, state = x_state append!(ret_vec, x) dim2_size += 1 x_state = iterate(xs, state) end - + # Reshape will throw errors if anything was the wrong size return reshape(ret_vec, (dim1_size, dim2_size)) end @@ -427,13 +427,13 @@ function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize) x_state = iterate(xs) x_state === nothing && return T() # New empty instance x1, state = x_state - + dim1_size = size(x1,1) dim2_size = length(xs) ret = similar(x1, eltype(x1), (dim1_size, dim2_size)) copyto!(ret, 1, x1, 1) - + x_state = iterate(xs, state) offset = length(x1)+1 while(x_state !== nothing) From 0a193a656a124bc0901c5cfae647b12774924567 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Sat, 13 Apr 2019 14:11:08 +0100 Subject: [PATCH 6/7] sizehint back down --- base/reduce.jl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index cb4c1e5a497cd..0295968ce58e6 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -383,10 +383,15 @@ function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector}, isize) ret = copy(x1) # this is **Not** going to work for StaticArrays + hinted_size = 0 if !(isize isa SizeUnknown) # Assume first element has representitive size, unless that would make this too large - SIZEHINT_CAP = 10^6 - sizehint!(ret, min(SIZEHINT_CAP, length(xs)*length(x1))) + SIZEHINT_CAP = 10^5 + expected_size = length(xs)*length(x1) + if expected_size < SIZEHINT_CAP + hinted_size = expected_size + sizehint!(ret, hinted_size) + end end x_state = iterate(xs, state) @@ -395,6 +400,11 @@ function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector}, isize) append!(ret, x) x_state = iterate(xs, state) end + + if length(ret) < hinted_size/2 # it is only allowable to be at most 2x to much memory + sizehint!(ret, length(ret)) + end + return ret end From 889a5541908fea1b24e3a1678c710b5511ac1f9f Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 12 Aug 2019 16:20:09 +0100 Subject: [PATCH 7/7] make less things error --- base/reduce.jl | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/base/reduce.jl b/base/reduce.jl index 0295968ce58e6..00088027eacba 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -376,22 +376,19 @@ reduce(op, a::Number) = a # Do we want this? ## vcat -function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector}, isize) +function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector{V}}, isize) where V x_state = iterate(xs) - x_state === nothing && return T() # New empty instance + x_state === nothing && return reduce_empty(vcat, T) x1, state = x_state - ret = copy(x1) # this is **Not** going to work for StaticArrays + ret = Vector(x1) hinted_size = 0 if !(isize isa SizeUnknown) # Assume first element has representitive size, unless that would make this too large SIZEHINT_CAP = 10^5 - expected_size = length(xs)*length(x1) - if expected_size < SIZEHINT_CAP - hinted_size = expected_size - sizehint!(ret, hinted_size) - end + hinted_size = min(SIZEHINT_CAP, length(xs)*length(x1)) + sizehint!(ret, hinted_size) end x_state = iterate(xs, state) @@ -401,7 +398,7 @@ function reduce(::typeof(vcat), xs, T::Type{<:AbstractVector}, isize) x_state = iterate(xs, state) end - if length(ret) < hinted_size/2 # it is only allowable to be at most 2x to much memory + if length(ret) < hinted_size/2 # it is only allowable to be at most 2x too much memory sizehint!(ret, length(ret)) end @@ -410,14 +407,14 @@ end ## hcat -function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize::SizeUnknown) +function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector{V}}, isize::SizeUnknown) where V x_state = iterate(xs) - x_state === nothing && return et() # New empty instance + x_state === nothing && return reduce_empty(hcat, T) x1, state = x_state dim1_size = length(x1) dim2_size = 1 - ret_vec = copy(x1) # this is **Not** going to work for StaticArrays + ret_vec = Vector(x1) x_state = iterate(xs, state) @@ -432,10 +429,10 @@ function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize::SizeUnknow return reshape(ret_vec, (dim1_size, dim2_size)) end -function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector}, isize) +function reduce(::typeof(hcat), xs, T::Type{<:AbstractVector{V}}, isize) where V # Size is known x_state = iterate(xs) - x_state === nothing && return T() # New empty instance + x_state === nothing && return reduce_empty(hcat, T) x1, state = x_state dim1_size = size(x1,1)