From 195bae7071dfd1cb7463d6efc7d9bacbf3709f3b Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Sat, 29 Apr 2023 14:32:05 +0200 Subject: [PATCH] Base: twiceprecision: optimize mul12 It is well known and obvious that the algorithm behind `Base.mul12` (sometimes known as "Fast2Mult") doesn't require a "Fast2Sum" (known in the Julia codebase as `Base.canonicalize2`) call at the end, so remove it. This follows from the fact that IEEE-754 floating-point multiplication is required to be well-rounded, so Fast2Sum can't change the result. Reference, for example, the beginning of https://doi.org/10.1145/3121432 by Joldes, Muller, Popescu. Furthermore, `Base.Math.two_mul` already exists, so use it as a kernel function. This required adding a general fallback method for `Base.Math.two_mul`, required, for example, for `BigFloat`. Also removed the `iszero` check that is now obviously not necessary. --- base/math.jl | 5 +++++ base/twiceprecision.jl | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/base/math.jl b/base/math.jl index f42ecf3d0ee7e..71bd4949498b5 100644 --- a/base/math.jl +++ b/base/math.jl @@ -46,6 +46,11 @@ end # non-type specific math functions +function two_mul(x::T, y::T) where {T<:Number} + xy = x*y + xy, fma(x, y, -xy) +end + @assume_effects :consistent @inline function two_mul(x::Float64, y::Float64) if Core.Intrinsics.have_fma(Float64) xy = x*y diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl index b6d702d9242b8..4e54daabf6f77 100644 --- a/base/twiceprecision.jl +++ b/base/twiceprecision.jl @@ -112,8 +112,8 @@ julia> Float64(hi) + Float64(lo) ``` """ function mul12(x::T, y::T) where {T<:AbstractFloat} - h = x * y - ifelse(iszero(h) | !isfinite(h), (h, h), canonicalize2(h, fma(x, y, -h))) + (h, l) = Base.Math.two_mul(x, y) + ifelse(!isfinite(h), (h, h), (h, l)) end mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p))) mul12(x, y) = mul12(promote(x, y)...)