From 67105aecf114785efad7ec00a0ed1cf2c49e0edc Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 24 Dec 2020 07:29:40 -0600 Subject: [PATCH] Add some precompiles Together with a couple of changes to LoopVectorization, this shaves about one second off the initial `mygemmavx!` demo. There may be more methods that could be added, but this is a start. Overall, VectorizationBase is the only substantive source of inference time in that demo. --- src/precompile.jl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/precompile.jl b/src/precompile.jl index 22c11ca0..30cb2d74 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -1,3 +1,22 @@ function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing + for T in (Bool, Int, Float32, Float64) + for A in (Vector, Matrix) + precompile(stridedpointer, (A{T},)) + end + end + function precompile_nt(@nospecialize(T)) + for I ∈ (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64) + precompile(vload_quote, (Type{T}, Type{I}, Symbol, Int, Int, Int, Int, Bool, Bool)) + end + # precompile(vfmadd, (Vec{4, T}, Vec{4, T}, Vec{4, T})) # doesn't "take" (too bad, this is expensive) + end + U = NativeTypes + while isa(U, Union) + T, U = U.a, U.b + precompile_nt(T) + end + precompile_nt(U) + precompile(_pick_vector_width, (Type, Vararg{Type,100})) + precompile(>=, (Int, MM{4, 1, Int})) end