Skip to content

Commit

Permalink
This hoists some work-arounds for computation of eltypes for zero-siz…
Browse files Browse the repository at this point in the history
…e vectors out of the loops in order to get a speedup in some cases where type inference otherwise takes tuple types. (#36975)

(cherry picked from commit 54d73a8)
  • Loading branch information
chethega authored and KristofferC committed Aug 18, 2020
1 parent 265ef29 commit cb92206
Showing 1 changed file with 22 additions and 18 deletions.
40 changes: 22 additions & 18 deletions stdlib/LinearAlgebra/src/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -646,30 +646,34 @@ function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::Ab

@inbounds begin
if tA == 'T' # fastest case
for k = 1:mA
aoffs = (k-1)*Astride
if mB == 0
s = false
else
s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
if nA == 0
for k = 1:mA
_modify!(_add, false, C, k)
end
for i = 1:nA
s += transpose(A[aoffs+i]) * B[i]
else
for k = 1:mA
aoffs = (k-1)*Astride
s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
for i = 1:nA
s += transpose(A[aoffs+i]) * B[i]
end
_modify!(_add, s, C, k)
end
_modify!(_add, s, C, k)
end
elseif tA == 'C'
for k = 1:mA
aoffs = (k-1)*Astride
if mB == 0
s = false
else
s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
if nA == 0
for k = 1:mA
_modify!(_add, false, C, k)
end
for i = 1:nA
s += A[aoffs + i]'B[i]
else
for k = 1:mA
aoffs = (k-1)*Astride
s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
for i = 1:nA
s += A[aoffs + i]'B[i]
end
_modify!(_add, s, C, k)
end
_modify!(_add, s, C, k)
end
else # tA == 'N'
for i = 1:mA
Expand Down

0 comments on commit cb92206

Please sign in to comment.