From 195bae7071dfd1cb7463d6efc7d9bacbf3709f3b Mon Sep 17 00:00:00 2001
From: Neven Sajko <s@purelymail.com>
Date: Sat, 29 Apr 2023 14:32:05 +0200
Subject: [PATCH] Base: twiceprecision: optimize mul12

It is well known and obvious that the algorithm behind `Base.mul12`
(sometimes known as "Fast2Mult") doesn't require a "Fast2Sum" (known in
the Julia codebase as `Base.canonicalize2`) call at the end, so remove
it. This follows from the fact that IEEE-754 floating-point
multiplication is required to be well-rounded, so Fast2Sum can't change
the result.

Reference, for example, the beginning of
https://doi.org/10.1145/3121432 by Joldes, Muller, Popescu.

Furthermore, `Base.Math.two_mul` already exists, so use it as a kernel
function. This required adding a general fallback method for
`Base.Math.two_mul`, required, for example, for `BigFloat`.

Also removed the `iszero` check that is now obviously not necessary.
---
 base/math.jl           | 5 +++++
 base/twiceprecision.jl | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/base/math.jl b/base/math.jl
index f42ecf3d0ee7e..71bd4949498b5 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -46,6 +46,11 @@ end
 
 # non-type specific math functions
 
+function two_mul(x::T, y::T) where {T<:Number}
+    xy = x*y
+    xy, fma(x, y, -xy)
+end
+
 @assume_effects :consistent @inline function two_mul(x::Float64, y::Float64)
     if Core.Intrinsics.have_fma(Float64)
         xy = x*y
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index b6d702d9242b8..4e54daabf6f77 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -112,8 +112,8 @@ julia> Float64(hi) + Float64(lo)
 ```
 """
 function mul12(x::T, y::T) where {T<:AbstractFloat}
-    h = x * y
-    ifelse(iszero(h) | !isfinite(h), (h, h), canonicalize2(h, fma(x, y, -h)))
+    (h, l) = Base.Math.two_mul(x, y)
+    ifelse(!isfinite(h), (h, h), (h, l))
 end
 mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
 mul12(x, y) = mul12(promote(x, y)...)