diff --git a/src/ThreadsX.jl b/src/ThreadsX.jl index 3ca438d7..ded807ad 100644 --- a/src/ThreadsX.jl +++ b/src/ThreadsX.jl @@ -31,6 +31,8 @@ function Set end function foreach end function map! end +function copy! end +function copyto! end function sort end function sort! end @@ -85,6 +87,7 @@ include("basesizes.jl") include("reduce.jl") include("foreach.jl") include("map.jl") +include("copy.jl") include("mergesort.jl") include("quicksort.jl") include("countingsort.jl") diff --git a/src/copy.jl b/src/copy.jl new file mode 100644 index 00000000..7150986b --- /dev/null +++ b/src/copy.jl @@ -0,0 +1,42 @@ +ThreadsX.copy!(dest::AbstractVector, src::AbstractVector; kwargs...) = + ThreadsX.copyto!(resize!(dest, length(src)), src; kwargs...) + +function ThreadsX.copyto!( + dest::AbstractArray, + src::AbstractArray; + basesize::Integer = default_copyto_basesize(dest, src), +) + if length(dest) <= basesize + copyto!(dest, src) + elseif IndexStyle(dest) isa IndexLinear && IndexStyle(src) isa IndexLinear + linear_copyto!(dest, src, basesize) + else + cartesian_copyto!(dest, src, basesize) + end + return dest +end + +function linear_copyto!(dest, src, basesize) + # TODO: support size-compatible but index-incompatible arrays + @sync for p in _partition(eachindex(dest, src), basesize) + @spawn if p isa AbstractUnitRange + copyto!(dest, first(p), src, first(p), length(p)) + else + copyto!(view(dest, p), view(src, p)) + end + end +end + +function cartesian_copyto!(dest, src, basesize) + ThreadsX.foreach(eachindex(dest, src); basesize = basesize) do i + @inbounds dest[i] = src[i] + end +end + +# TODO: Take into account more properties like: sizeof(T), boxed?, union?, +# need conversion? +function default_copyto_basesize(dest::AbstractArray{T}, ::AbstractArray) where {T} + # 2^19 for 64 bit T + basesize = 4194304 รท elsizeof(T) + return max(cld(length(dest), Threads.nthreads()), basesize) +end diff --git a/src/utils.jl b/src/utils.jl index 311e721e..a3ff7412 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -22,6 +22,15 @@ __verify_simd_flag(_, simd) = verify_simd_flag(simd) = __verify_simd_flag(_asval(simd), simd) +# TODO: handle Union +function elsizeof(::Type{T}) where {T} + if Base.allocatedinline(Some{T}) + return sizeof(Some{T}) + else + return sizeof(Ptr{Any}) + end +end + function _median(order, (a, b, c)::NTuple{3,Any}) # Sort `(a, b, c)`: if Base.lt(order, b, a) diff --git a/test/test_copy.jl b/test/test_copy.jl new file mode 100644 index 00000000..233e6979 --- /dev/null +++ b/test/test_copy.jl @@ -0,0 +1,38 @@ +module TestCopy + +using Test +using ThreadsX + +@testset "copy vector" begin + @testset for basesize in [[nothing]; 1:8], n in 0:10, T in [Int, Any] + ys = Vector{T}(undef, n) + if basesize === nothing + @test ThreadsX.copyto!(ys, 1:n) == 1:n + @test ThreadsX.copy!(ys, 1:n) == 1:n + else + @test ThreadsX.copyto!(ys, 1:n; basesize = basesize) == 1:n + @test ThreadsX.copy!(ys, 1:n; basesize = basesize) == 1:n + end + end +end + +@testset "copy matrix" begin + @testset for basesize in [[nothing]; 1:8], n in 0:10, m in 1:5, T in [Int, Any] + A = Matrix{T}(undef, n, m) + B = reshape(1:n*m, n, m) + if basesize === nothing + @test ThreadsX.copyto!(A, B) == B + else + @test ThreadsX.copyto!(A, B; basesize = basesize) == B + end + + Bt = transpose(reshape(1:n*m, m, n)) + if basesize === nothing + @test ThreadsX.copyto!(A, Bt) == Bt + else + @test ThreadsX.copyto!(A, Bt; basesize = basesize) == Bt + end + end +end + +end # module