From cb9220684904d417a3f159f3193ca78134128512 Mon Sep 17 00:00:00 2001 From: chethega Date: Wed, 12 Aug 2020 18:20:00 +0200 Subject: [PATCH] This hoists some work-arounds for computation of eltypes for zero-size vectors out of the loops in order to get a speedup in some cases where type inference otherwise takes tuple types. (#36975) (cherry picked from commit 54d73a83040ec93efd084efbc97976662fd1cf53) --- stdlib/LinearAlgebra/src/matmul.jl | 40 ++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index 66242177f2ea6b..812d114a992b1d 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -646,30 +646,34 @@ function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::Ab @inbounds begin if tA == 'T' # fastest case - for k = 1:mA - aoffs = (k-1)*Astride - if mB == 0 - s = false - else - s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1]) + if nA == 0 + for k = 1:mA + _modify!(_add, false, C, k) end - for i = 1:nA - s += transpose(A[aoffs+i]) * B[i] + else + for k = 1:mA + aoffs = (k-1)*Astride + s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1]) + for i = 1:nA + s += transpose(A[aoffs+i]) * B[i] + end + _modify!(_add, s, C, k) end - _modify!(_add, s, C, k) end elseif tA == 'C' - for k = 1:mA - aoffs = (k-1)*Astride - if mB == 0 - s = false - else - s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1]) + if nA == 0 + for k = 1:mA + _modify!(_add, false, C, k) end - for i = 1:nA - s += A[aoffs + i]'B[i] + else + for k = 1:mA + aoffs = (k-1)*Astride + s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1]) + for i = 1:nA + s += A[aoffs + i]'B[i] + end + _modify!(_add, s, C, k) end - _modify!(_add, s, C, k) end else # tA == 'N' for i = 1:mA