From 0097bddf900c16a7c7591671a6a3a0e2bd8acb4d Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Fri, 30 Oct 2020 03:20:07 -0500 Subject: [PATCH] A faster version of exp, exp2, exp10 for Floating point numbers (#36761) * A faster version of exp for Float64 This is based on the Glibc algorithm which at-chriselrod (Elrond on discourse) described the algorithm of for me. It appears to be about 2x faster than the current algorithm, and equally accurate over the range for which I have tried it. It also theoretically should be easier to vectorize as branches are only used for checking for over/underflow. --- base/math.jl | 11 +- base/special/exp.jl | 358 ++++++++++++++++++++++++++++-------------- base/special/exp10.jl | 139 ---------------- 3 files changed, 240 insertions(+), 268 deletions(-) delete mode 100644 base/special/exp10.jl diff --git a/base/math.jl b/base/math.jl index 94d001c0f31dc..c78816545a2eb 100644 --- a/base/math.jl +++ b/base/math.jl @@ -362,13 +362,9 @@ asinh(x::Number) Accurately compute ``e^x-1``. """ expm1(x) -for f in (:exp2, :expm1) - @eval begin - ($f)(x::Float64) = ccall(($(string(f)),libm), Float64, (Float64,), x) - ($f)(x::Float32) = ccall(($(string(f,"f")),libm), Float32, (Float32,), x) - ($f)(x::Real) = ($f)(float(x)) - end -end +expm1(x::Float64) = ccall((:expm1,libm), Float64, (Float64,), x) +expm1(x::Float32) = ccall((:expm1f,libm), Float32, (Float32,), x) +expm1(x::Real) = expm1(float(x)) """ exp2(x) @@ -1186,7 +1182,6 @@ Return positive part of the high word of `x` as a `UInt32`. # More special functions include("special/cbrt.jl") include("special/exp.jl") -include("special/exp10.jl") include("special/ldexp_exp.jl") include("special/hyperbolic.jl") include("special/trig.jl") diff --git a/base/special/exp.jl b/base/special/exp.jl index 493e5167f1e66..8ff8e3dd5418c 100644 --- a/base/special/exp.jl +++ b/base/special/exp.jl @@ -1,65 +1,248 @@ -# Based on FDLIBM http://www.netlib.org/fdlibm/e_exp.c -# which is made available under the following licence - -## Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. Permission -## to use, copy, modify, and distribute this software is freely granted, -## provided that this notice is preserved. - -# Method -# 1. Argument reduction: Reduce x to an r so that |r| <= 0.5*ln(2). Given x, -# find r and integer k such that -# x = k*ln(2) + r, |r| <= 0.5*ln(2). -# Here r is represented as r = hi - lo for better accuracy. -# -# 2. Approximate exp(r) by a special rational function on [0, 0.5*ln(2)]: -# R(r^2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r^4/360 + ... -# -# A special Remez algorithm on [0, 0.5*ln(2)] is used to generate a -# polynomial to approximate R. -# -# The computation of exp(r) thus becomes -# 2*r -# exp(r) = 1 + ---------- -# R(r) - r -# r*c(r) -# = 1 + r + ----------- (for better accuracy) -# 2 - c(r) -# where -# c(r) = r - (P1*r^2 + P2*r^4 + ... + P5*r^10 + ...). -# -# 3. Scale back: exp(x) = 2^k * exp(r) +# magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int. +# This works because eps(MAGIC_ROUND_CONST(T)) == one(T), so adding it to a smaller number aligns the lsb to the 1s place. +# Values for which this trick doesn't work are going to have outputs of 0 or Inf. +MAGIC_ROUND_CONST(::Type{Float64}) = 6.755399441055744e15 +MAGIC_ROUND_CONST(::Type{Float32}) = 1.048576f7 + +# max, min, and subnormal arguments +# max_exp = T(exponent_bias(T)*log(base, big(2)) + log(base, 2 - big(2.0)^-significand_bits(T))) +MAX_EXP(n::Val{2}, ::Type{Float32}) = 128.0f0 +MAX_EXP(n::Val{2}, ::Type{Float64}) = 1024.0 +MAX_EXP(n::Val{:ℯ}, ::Type{Float32}) = 88.72284f0 +MAX_EXP(n::Val{:ℯ}, ::Type{Float64}) = 709.782712893384 +MAX_EXP(n::Val{10}, ::Type{Float32}) = 38.53184f0 +MAX_EXP(n::Val{10}, ::Type{Float64}) = 308.25471555991675 + +# min_exp = T(-(exponent_bias(T)+significand_bits(T)) * log(base, big(2))) +MIN_EXP(n::Val{2}, ::Type{Float32}) = -150.0f0 +MIN_EXP(n::Val{2}, ::Type{Float64}) = -1075.0 +MIN_EXP(n::Val{:ℯ}, ::Type{Float32}) = -103.97208f0 +MIN_EXP(n::Val{:ℯ}, ::Type{Float64}) = -745.1332191019412 +MIN_EXP(n::Val{10}, ::Type{Float32}) = -45.1545f0 +MIN_EXP(n::Val{10}, ::Type{Float64}) = -323.60724533877976 + +# subnorm_exp = abs(log(base, floatmin(T))) +# these vals are positive since it's easier to take abs(x) than -abs(x) +SUBNORM_EXP(n::Val{2}, ::Type{Float32}) = 126.00001f0 +SUBNORM_EXP(n::Val{2}, ::Type{Float64}) = 1022.0 +SUBNORM_EXP(n::Val{:ℯ}, ::Type{Float32}) = 87.33655f0 +SUBNORM_EXP(n::Val{:ℯ}, ::Type{Float64}) = 708.3964185322641 +SUBNORM_EXP(n::Val{10}, ::Type{Float32}) = 37.92978f0 +SUBNORM_EXP(n::Val{10}, ::Type{Float64}) = 307.6526555685887 + +# 256/log(base, 2) (For Float64 reductions) +LogBo256INV(::Val{2}, ::Type{Float64}) = 256.0 +LogBo256INV(::Val{:ℯ}, ::Type{Float64}) = 369.3299304675746 +LogBo256INV(::Val{10}, ::Type{Float64}) = 850.4135922911647 + +# -log(base, 2)/256 in upper and lower bits +# Upper is truncated to only have 34 bits of significand since N has at most +# ceil(log2(-MIN_EXP(base, Float64)*LogBo256INV(Val(2), Float64))) = 19 bits. +# This ensures no rounding when multiplying LogBo256U*N for FMAless hardware +LogBo256U(::Val{2}, ::Type{Float64}) = -0.00390625 +LogBo256U(::Val{:ℯ}, ::Type{Float64}) = -0.002707606173999011 +LogBo256U(::Val{10}, ::Type{Float64}) = -0.0011758984204561784 +LogBo256L(::Val{2}, ::Type{Float64}) = 0.0 +LogBo256L(::Val{:ℯ}, ::Type{Float64}) = -6.327543041662719e-14 +LogBo256L(::Val{10}, ::Type{Float64}) = -1.0624811566412999e-13 + +# 1/log(base, 2) (For Float32 reductions) +LogBINV(::Val{2}, ::Type{Float32}) = 1.0f0 +LogBINV(::Val{:ℯ}, ::Type{Float32}) = 1.442695f0 +LogBINV(::Val{10}, ::Type{Float32}) = 3.321928f0 + +# -log(base, 2) in upper and lower bits +# Upper is truncated to only have 16 bits of significand since N has at most +# ceil(log2(-MIN_EXP(n, Float32)*LogBINV(Val(2), Float32))) = 8 bits. +# This ensures no rounding when multiplying LogBU*N for FMAless hardware +LogBU(::Val{2}, ::Type{Float32}) = -1.0f0 +LogBU(::Val{:ℯ}, ::Type{Float32}) = -0.69314575f0 +LogBU(::Val{10}, ::Type{Float32}) = -0.3010254f0 +LogBL(::Val{2}, ::Type{Float32}) = 0.0f0 +LogBL(::Val{:ℯ}, ::Type{Float32}) = -1.4286068f-6 +LogBL(::Val{10}, ::Type{Float32}) = -4.605039f-6 + +# Range reduced kernels +@inline function expm1b_kernel(::Val{2}, x::Float64) + return x * evalpoly(x, (0.6931471805599393, 0.24022650695910058, + 0.05550411502333161, 0.009618129548366803)) +end +@inline function expm1b_kernel(::Val{:ℯ}, x::Float64) + return x * evalpoly(x, (0.9999999999999912, 0.4999999999999997, + 0.1666666857598779, 0.04166666857598777)) +end + +@inline function expm1b_kernel(::Val{10}, x::Float64) + return x * evalpoly(x, (2.3025850929940255, 2.6509490552391974, + 2.034678825384765, 1.1712552025835192)) +end + +@inline function expb_kernel(::Val{2}, x::Float32) + return evalpoly(x, (1.0f0, 0.6931472f0, 0.2402265f0, + 0.05550411f0, 0.009618025f0, + 0.0013333423f0, 0.00015469732f0, 1.5316464f-5)) +end +@inline function expb_kernel(::Val{:ℯ}, x::Float32) + return evalpoly(x, (1.0f0, 1.0f0, 0.5f0, 0.16666667f0, + 0.041666217f0, 0.008333249f0, + 0.001394858f0, 0.00019924171f0)) +end +@inline function expb_kernel(::Val{10}, x::Float32) + return evalpoly(x, (1.0f0, 2.3025851f0, 2.650949f0, + 2.0346787f0, 1.1712426f0, 0.53937745f0, + 0.20788547f0, 0.06837386f0)) +end -# log(2) -const LN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01 -# log2(e) -const LOG2_E = 1.442695040888963407359924681001892137426646 +# Table stores data with 60 sig figs by using the fact that the first 12 bits of all the +# values would be the same if stored as regular Float64. +# This only gains 8 bits since the least significant 4 bits of the exponent +# of the small part are not the same for all table entries +const JU_MASK = typemax(UInt64)>>12 +const JL_MASK = typemax(UInt64)>>8 +const JU_CONST = 0x3FF0000000000000 +const JL_CONST = 0x3C00000000000000 -# log(2) into upper and lower bits -LN2U(::Type{Float64}) = 6.93147180369123816490e-1 -LN2U(::Type{Float32}) = 6.9313812256f-1 -LN2L(::Type{Float64}) = 1.90821492927058770002e-10 -LN2L(::Type{Float32}) = 9.0580006145f-6 +#function make_table(size) +# t_array = zeros(UInt64, size); +# for j in 1:size +# val = 2.0^(BigFloat(j-1)/size) +# valU = Float64(val, RoundDown) +# valL = Float64(val-valU) +# valU = reinterpret(UInt64, valU) & JU_MASK +# valL = ((reinterpret(UInt64, valL) & JL_MASK)>>44)<<52 +# t_array[j] = valU | valL +# end +# return Tuple(t_array) +#end +#const J_TABLE = make_table(256); +const J_TABLE = (0x0000000000000000, 0xaac00b1afa5abcbe, 0x9b60163da9fb3335, 0xab502168143b0280, 0xadc02c9a3e778060, + 0x656037d42e11bbcc, 0xa7a04315e86e7f84, 0x84c04e5f72f654b1, 0x8d7059b0d3158574, 0xa510650a0e3c1f88, + 0xa8d0706b29ddf6dd, 0x83207bd42b72a836, 0x6180874518759bc8, 0xa4b092bdf66607df, 0x91409e3ecac6f383, + 0x85d0a9c79b1f3919, 0x98a0b5586cf9890f, 0x94f0c0f145e46c85, 0x9010cc922b7247f7, 0xa210d83b23395deb, + 0x4030e3ec32d3d1a2, 0xa5b0efa55fdfa9c4, 0xae40fb66affed31a, 0x8d41073028d7233e, 0xa4911301d0125b50, + 0xa1a11edbab5e2ab5, 0xaf712abdc06c31cb, 0xae8136a814f204aa, 0xa661429aaea92ddf, 0xa9114e95934f312d, + 0x82415a98c8a58e51, 0x58f166a45471c3c2, 0xab9172b83c7d517a, 0x70917ed48695bbc0, 0xa7718af9388c8de9, + 0x94a1972658375d2f, 0x8e51a35beb6fcb75, 0x97b1af99f8138a1c, 0xa351bbe084045cd3, 0x9001c82f95281c6b, + 0x9e01d4873168b9aa, 0xa481e0e75eb44026, 0xa711ed5022fcd91c, 0xa201f9c18438ce4c, 0x8dc2063b88628cd6, + 0x935212be3578a819, 0x82a21f49917ddc96, 0x8d322bdda27912d1, 0x99b2387a6e756238, 0x8ac2451ffb82140a, + 0x8ac251ce4fb2a63f, 0x93e25e85711ece75, 0x82b26b4565e27cdd, 0x9e02780e341ddf29, 0xa2d284dfe1f56380, + 0xab4291ba7591bb6f, 0x86129e9df51fdee1, 0xa352ab8a66d10f12, 0xafb2b87fd0dad98f, 0xa572c57e39771b2e, + 0x9002d285a6e4030b, 0x9d12df961f641589, 0x71c2ecafa93e2f56, 0xaea2f9d24abd886a, 0x86f306fe0a31b715, + 0x89531432edeeb2fd, 0x8a932170fc4cd831, 0xa1d32eb83ba8ea31, 0x93233c08b26416ff, 0xab23496266e3fa2c, + 0xa92356c55f929ff0, 0xa8f36431a2de883a, 0xa4e371a7373aa9ca, 0xa3037f26231e7549, 0xa0b38cae6d05d865, + 0xa3239a401b7140ee, 0xad43a7db34e59ff6, 0x9543b57fbfec6cf4, 0xa083c32dc313a8e4, 0x7fe3d0e544ede173, + 0x8ad3dea64c123422, 0xa943ec70df1c5174, 0xa413fa4504ac801b, 0x8bd40822c367a024, 0xaf04160a21f72e29, + 0xa3d423fb27094689, 0xab8431f5d950a896, 0x88843ffa3f84b9d4, 0x48944e086061892d, 0xae745c2042a7d231, + 0x9c946a41ed1d0057, 0xa1e4786d668b3236, 0x73c486a2b5c13cd0, 0xab1494e1e192aed1, 0x99c4a32af0d7d3de, + 0xabb4b17dea6db7d6, 0x7d44bfdad5362a27, 0x9054ce41b817c114, 0x98e4dcb299fddd0d, 0xa564eb2d81d8abfe, + 0xa5a4f9b2769d2ca6, 0x7a2508417f4531ee, 0xa82516daa2cf6641, 0xac65257de83f4eee, 0xabe5342b569d4f81, + 0x879542e2f4f6ad27, 0xa8a551a4ca5d920e, 0xa7856070dde910d1, 0x99b56f4736b527da, 0xa7a57e27dbe2c4ce, + 0x82958d12d497c7fd, 0xa4059c0827ff07cb, 0x9635ab07dd485429, 0xa245ba11fba87a02, 0x3c45c9268a5946b7, + 0xa195d84590998b92, 0x9ba5e76f15ad2148, 0xa985f6a320dceb70, 0xa60605e1b976dc08, 0x9e46152ae6cdf6f4, + 0xa636247eb03a5584, 0x984633dd1d1929fd, 0xa8e6434634ccc31f, 0xa28652b9febc8fb6, 0xa226623882552224, + 0xa85671c1c70833f5, 0x60368155d44ca973, 0x880690f4b19e9538, 0xa216a09e667f3bcc, 0x7a36b052fa75173e, + 0xada6c012750bdabe, 0x9c76cfdcddd47645, 0xae46dfb23c651a2e, 0xa7a6ef9298593ae4, 0xa9f6ff7df9519483, + 0x59d70f7466f42e87, 0xaba71f75e8ec5f73, 0xa6f72f8286ead089, 0xa7a73f9a48a58173, 0x90474fbd35d7cbfd, + 0xa7e75feb564267c8, 0x9b777024b1ab6e09, 0x986780694fde5d3f, 0x934790b938ac1cf6, 0xaaf7a11473eb0186, + 0xa207b17b0976cfda, 0x9f17c1ed0130c132, 0x91b7d26a62ff86f0, 0x7057e2f336cf4e62, 0xabe7f3878491c490, + 0xa6c80427543e1a11, 0x946814d2add106d9, 0xa1582589994cce12, 0x9998364c1eb941f7, 0xa9c8471a4623c7ac, + 0xaf2857f4179f5b20, 0xa01868d99b4492ec, 0x85d879cad931a436, 0x99988ac7d98a6699, 0x9d589bd0a478580f, + 0x96e8ace5422aa0db, 0x9ec8be05bad61778, 0xade8cf3216b5448b, 0xa478e06a5e0866d8, 0x85c8f1ae99157736, + 0x959902fed0282c8a, 0xa119145b0b91ffc5, 0xab2925c353aa2fe1, 0xae893737b0cdc5e4, 0xa88948b82b5f98e4, + 0xad395a44cbc8520e, 0xaf296bdd9a7670b2, 0xa1797d829fde4e4f, 0x7ca98f33e47a22a2, 0xa749a0f170ca07b9, + 0xa119b2bb4d53fe0c, 0x7c79c49182a3f090, 0xa579d674194bb8d4, 0x7829e86319e32323, 0xaad9fa5e8d07f29d, + 0xa65a0c667b5de564, 0x9c6a1e7aed8eb8bb, 0x963a309bec4a2d33, 0xa2aa42c980460ad7, 0xa16a5503b23e255c, + 0x650a674a8af46052, 0x9bca799e1330b358, 0xa58a8bfe53c12e58, 0x90fa9e6b5579fdbf, 0x889ab0e521356eba, + 0xa81ac36bbfd3f379, 0x97ead5ff3a3c2774, 0x97aae89f995ad3ad, 0xa5aafb4ce622f2fe, 0xa21b0e07298db665, + 0x94db20ce6c9a8952, 0xaedb33a2b84f15fa, 0xac1b468415b749b0, 0xa1cb59728de55939, 0x92ab6c6e29f1c52a, + 0xad5b7f76f2fb5e46, 0xa24b928cf22749e3, 0xa08ba5b030a10649, 0xafcbb8e0b79a6f1e, 0x823bcc1e904bc1d2, + 0xafcbdf69c3f3a206, 0xa08bf2c25bd71e08, 0xa89c06286141b33c, 0x811c199bdd85529c, 0xa48c2d1cd9fa652b, + 0x9b4c40ab5fffd07a, 0x912c544778fafb22, 0x928c67f12e57d14b, 0xa86c7ba88988c932, 0x71ac8f6d9406e7b5, + 0xaa0ca3405751c4da, 0x750cb720dcef9069, 0xac5ccb0f2e6d1674, 0xa88cdf0b555dc3f9, 0xa2fcf3155b5bab73, + 0xa1ad072d4a07897b, 0x955d1b532b08c968, 0xa15d2f87080d89f1, 0x93dd43c8eacaa1d6, 0x82ed5818dcfba487, + 0x5fed6c76e862e6d3, 0xa77d80e316c98397, 0x9a0d955d71ff6075, 0x9c2da9e603db3285, 0xa24dbe7cd63a8314, + 0x92ddd321f301b460, 0xa1ade7d5641c0657, 0xa72dfc97337b9b5e, 0xadae11676b197d16, 0xa42e264614f5a128, + 0xa30e3b333b16ee11, 0x839e502ee78b3ff6, 0xaa7e653924676d75, 0x92de7a51fbc74c83, 0xa77e8f7977cdb73f, + 0xa0bea4afa2a490d9, 0x948eb9f4867cca6e, 0xa1becf482d8e67f0, 0x91cee4aaa2188510, 0x9dcefa1bee615a27, + 0xa66f0f9c1cb64129, 0x93af252b376bba97, 0xacdf3ac948dd7273, 0x99df50765b6e4540, 0x9faf6632798844f8, + 0xa12f7bfdad9cbe13, 0xaeef91d802243c88, 0x874fa7c1819e90d8, 0xacdfbdba3692d513, 0x62efd3c22b8f71f1, 0x74afe9d96b2a23d9) -# max and min arguments -MAX_EXP(::Type{Float64}) = 7.09782712893383996732e2 # log 2^1023*(2-2^-52) -MAX_EXP(::Type{Float32}) = 88.72283905206835f0 # log 2^127 *(2-2^-23) +@inline function table_unpack(ind) + j = @inbounds J_TABLE[ind] + jU = reinterpret(Float64, JU_CONST | (j&JU_MASK)) + jL = reinterpret(Float64, JL_CONST | (j>>8)) + return jU, jL +end -# one less than the min exponent since we can sqeeze a bit more from the exp function -MIN_EXP(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075 -MIN_EXP(::Type{Float32}) = -103.97207708f0 # log 2^-150 +# Method for Float64 +# 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/512. Given x, base b, +# find r and integers k, j such that +# x = (k + j/256)*log(b, 2) + r, 0 <= j < 256, |r| <= log(b,2)/512. +# +# 2. Approximate b^r-1 by 3rd-degree minimax polynomial p_b(r) on the interval [-log(b,2)/512, log(b,2)/512]. +# Since the bounds on r are very tight, this is sufficient to be accurate to floating point epsilon. +# +# 3. Scale back: b^x = 2^k * 2^(j/256) * (1 + p_b(r)) +# Since the range of possible j is small, 2^(j/256) is stored for all possible values in slightly extended precision. -@inline exp_kernel(x::Float64) = @horner(x, 1.66666666666666019037e-1, - -2.77777777770155933842e-3, 6.61375632143793436117e-5, - -1.65339022054652515390e-6, 4.13813679705723846039e-8) +# Method for Float32 +# 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/2. Given x, base b, +# find r and integer N such that +# x = N*log(b, 2) + r, |r| <= log(b,2)/2. +# +# 2. Approximate b^r by 7th-degree minimax polynomial p_b(r) on the interval [-log(b,2)/2, log(b,2)/2]. +# 3. Scale back: b^x = 2^N * p_b(r) -@inline exp_kernel(x::Float32) = @horner(x, 1.6666625440f-1, -2.7667332906f-3) +# For both, a little extra care needs to be taken if b^r is subnormal. +# The solution is to do the scaling back in 2 steps as just messing with the exponent wouldn't work. +for (func, base) in (:exp2=>Val(2), :exp=>Val(:ℯ), :exp10=>Val(10)) + @eval begin + ($func)(x::Real) = ($func)(float(x)) + function ($func)(x::T) where T<:Float64 + N_float = muladd(x, LogBo256INV($base, T), MAGIC_ROUND_CONST(T)) + N = reinterpret(uinttype(T), N_float) % Int32 + N_float -= MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV($base, T)) + r = muladd(N_float, LogBo256U($base, T), x) + r = muladd(N_float, LogBo256L($base, T), r) + k = N >> 8 + jU, jL = table_unpack(N&255 +1) + small_part = muladd(jU, expm1b_kernel($base, r), jL) + jU -# for values smaller than this threshold just use a Taylor expansion -@eval exp_small_thres(::Type{Float64}) = $(2.0^-28) -@eval exp_small_thres(::Type{Float32}) = $(2.0f0^-13) + if !(abs(x) <= SUBNORM_EXP($base, T)) + x >= MAX_EXP($base, T) && return Inf + x <= MIN_EXP($base, T) && return 0.0 + if k <= -53 + # The UInt64 forces promotion. (Only matters for 32 bit systems.) + twopk = (k + UInt64(53)) << 52 + return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53) + end + end + twopk = Int64(k) << 52 + return reinterpret(T, twopk + reinterpret(Int64, small_part)) + end -""" + function ($func)(x::T) where T<:Float32 + N_float = round(x*LogBINV($base, T)) + N = unsafe_trunc(Int32, N_float) + r = muladd(N_float, LogBU($base, T), x) + r = muladd(N_float, LogBL($base, T), r) + small_part = expb_kernel($base, r) + if !(abs(x) <= SUBNORM_EXP($base, T)) + x > MAX_EXP($base, T) && return Inf32 + x < MIN_EXP($base, T) && return 0.0f0 + if N<=Int32(-24) + twopk = reinterpret(T, (N+Int32(151)) << Int32(23)) + return (twopk*small_part)*(2f0^(-24)) + end + N == exponent_max(T) && return small_part * T(2.0) * T(2.0)^(exponent_max(T) - 1) + end + twopk = reinterpret(T, (N+Int32(127)) << Int32(23)) + return twopk*small_part + end + end +end +@doc """ exp(x) Compute the natural base exponential of `x`, in other words ``e^x``. @@ -68,71 +251,4 @@ Compute the natural base exponential of `x`, in other words ``e^x``. ```jldoctest julia> exp(1.0) 2.718281828459045 -``` -""" -exp(x::Real) = exp(float(x)) -function exp(x::T) where T<:Union{Float32,Float64} - xa = reinterpret(Unsigned, x) & ~sign_mask(T) - xsb = signbit(x) - - # filter out non-finite arguments - if xa > reinterpret(Unsigned, MAX_EXP(T)) - if xa >= exponent_mask(T) - xa & significand_mask(T) != 0 && return T(NaN) - return xsb ? T(0.0) : T(Inf) # exp(+-Inf) - end - x > MAX_EXP(T) && return T(Inf) - x < MIN_EXP(T) && return T(0.0) - end - # This implementation gives 2.7182818284590455 for exp(1.0) when T == - # Float64, which is well within the allowable error; however, - # 2.718281828459045 is closer to the true value so we prefer that answer, - # given that 1.0 is such an important argument value. - if x == T(1.0) && T == Float64 - return 2.718281828459045235360 - end - # compute approximation - if xa > reinterpret(Unsigned, T(0.5)*T(LN2)) # |x| > 0.5 log(2) - # argument reduction - if xa < reinterpret(Unsigned, T(1.5)*T(LN2)) # |x| < 1.5 log(2) - if xsb - k = -1 - hi = x + LN2U(T) - lo = -LN2L(T) - else - k = 1 - hi = x - LN2U(T) - lo = LN2L(T) - end - else - n = round(T(LOG2_E)*x) - k = unsafe_trunc(Int,n) - hi = muladd(n, -LN2U(T), x) - lo = n*LN2L(T) - end - # compute approximation on reduced argument - r = hi - lo - z = r*r - p = r - z*exp_kernel(z) - y = T(1.0) - ((lo - (r*p)/(T(2.0) - p)) - hi) - # scale back - if k > -significand_bits(T) - # multiply by 2.0 first to prevent overflow, which helps extends the range - k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) - twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T)) - return y*twopk - else - # add significand_bits(T) + 1 to lift the range outside the subnormals - twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T)) - return y * twopk * T(2.0)^(-significand_bits(T) - 1) - end - elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres - # Taylor approximation for small values: exp(x) ≈ 1.0 + x - return T(1.0) + x - else - # primary range with k = 0, so compute approximation directly - z = x*x - p = x - z*exp_kernel(z) - return T(1.0) - ((x*p)/(p - T(2.0)) - x) - end -end +""" exp(x::Real) diff --git a/base/special/exp10.jl b/base/special/exp10.jl deleted file mode 100644 index c32d0a98702ee..0000000000000 --- a/base/special/exp10.jl +++ /dev/null @@ -1,139 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -# Method -# 1. Argument reduction: Reduce x to an r so that |r| <= 0.5*log10(2). Given x, -# find r and integer k such that -# -# x = k*log10(2) + r, |r| <= 0.5*log10(2). -# -# 2. Approximate exp10(r) by a polynomial on the interval [-0.5*log10(2), 0.5*log10(2)]: -# -# exp10(x) = 1.0 + polynomial(x), -# -# sup norm relative error within the interval of the polynomial approximations: -# Float64 : [2.7245504724394698952e-18; 2.7245529895753476720e-18] -# Float32 : [9.6026471477842205871e-10; 9.6026560194009888672e-10] -# -# 3. Scale back: exp10(x) = 2^k * exp10(r) - -# log2(10) -const LOG2_10 = 3.321928094887362347870319429489390175864831393024580612054756395815934776608624 -# log10(2) -const LOG10_2 = 3.010299956639811952137388947244930267681898814621085413104274611271081892744238e-01 -# log(10) -const LN10 = 2.302585092994045684017991454684364207601101488628772976033327900967572609677367 - -# log10(2) into upper and lower bits -LOG10_2U(::Type{Float64}) = 3.01025390625000000000e-1 -LOG10_2U(::Type{Float32}) = 3.00781250000000000000f-1 - -LOG10_2L(::Type{Float64}) = 4.60503898119521373889e-6 -LOG10_2L(::Type{Float32}) = 2.48745663981195213739f-4 - -# max and min arguments -MAX_EXP10(::Type{Float64}) = 3.08254715559916743851e2 # log 2^1023*(2-2^-52) -MAX_EXP10(::Type{Float32}) = 38.531839419103626f0 # log 2^127 *(2-2^-23) - -# one less than the min exponent since we can sqeeze a bit more from the exp10 function -MIN_EXP10(::Type{Float64}) = -3.23607245338779784854769e2 # log10 2^-1075 -MIN_EXP10(::Type{Float32}) = -45.15449934959718f0 # log10 2^-150 - -@inline exp10_kernel(x::Float64) = - @horner(x, 1.0, - 2.30258509299404590109361379290930926799774169921875, - 2.6509490552391992146397114993305876851081848144531, - 2.03467859229323178027470930828712880611419677734375, - 1.17125514891212478829629617393948137760162353515625, - 0.53938292928868392106522833273629657924175262451172, - 0.20699584873167015119932443667494226247072219848633, - 6.8089348259156870502017966373387025669217109680176e-2, - 1.9597690535095281527677713029333972372114658355713e-2, - 5.015553121397981796436571499953060992993414402008e-3, - 1.15474960721768829356725927226534622604958713054657e-3, - 1.55440426715227567738830671828509366605430841445923e-4, - 3.8731032432074128681303432086835414338565897196531e-5, - 2.3804466459036747669197886523306806338950991630554e-3, - 9.3881392238209649520573607528461934634833596646786e-5, - -2.64330486232183387018679354696359951049089431762695e-2) - -@inline exp10_kernel(x::Float32) = - @horner(x, 1.0f0, - 2.302585124969482421875f0, - 2.650949001312255859375f0, - 2.0346698760986328125f0, - 1.17125606536865234375f0, - 0.5400512218475341796875f0, - 0.20749187469482421875f0, - 5.2789829671382904052734375f-2) - -@eval exp10_small_thres(::Type{Float64}) = $(2.0^-29) -@eval exp10_small_thres(::Type{Float32}) = $(2.0f0^-14) - -""" - exp10(x) - -Compute ``10^x``. - -# Examples -```jldoctest -julia> exp10(2) -100.0 - -julia> exp10(0.2) -1.5848931924611136 -``` -""" -exp10(x::Real) = exp10(float(x)) -function exp10(x::T) where T<:Union{Float32,Float64} - xa = reinterpret(Unsigned, x) & ~sign_mask(T) - xsb = signbit(x) - - # filter out non-finite arguments - if xa > reinterpret(Unsigned, MAX_EXP10(T)) - if xa >= exponent_mask(T) - xa & significand_mask(T) != 0 && return T(NaN) - return xsb ? T(0.0) : T(Inf) # exp10(+-Inf) - end - x > MAX_EXP10(T) && return T(Inf) - x < MIN_EXP10(T) && return T(0.0) - end - # compute approximation - if xa > reinterpret(Unsigned, T(0.5)*T(LOG10_2)) # |x| > 0.5 log10(2). - # argument reduction - if xa < reinterpret(Unsigned, T(1.5)*T(LOG10_2)) # |x| <= 1.5 log10(2) - if xsb - k = -1 - r = LOG10_2U(T) + x - r = LOG10_2L(T) + r - else - k = 1 - r = x - LOG10_2U(T) - r = r - LOG10_2L(T) - end - else - n = round(T(LOG2_10)*x) - k = unsafe_trunc(Int,n) - r = muladd(n, -LOG10_2U(T), x) - r = muladd(n, -LOG10_2L(T), r) - end - # compute approximation on reduced argument - y = exp10_kernel(r) - # scale back - if k > -significand_bits(T) - # multiply by 2.0 first to prevent overflow, extending the range - k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) - twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T)) - return y*twopk - else - # add significand_bits(T) + 1 to lift the range outside the subnormals - twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T)) - return y * twopk * T(2.0)^(-significand_bits(T) - 1) - end - elseif xa < reinterpret(Unsigned, exp10_small_thres(T)) # |x| < exp10_small_thres - # Taylor approximation for small values: exp10(x) ≈ 1.0 + log(10)*x - return muladd(x, T(LN10), T(1.0)) - else - # primary range with k = 0, so compute approximation directly - return exp10_kernel(x) - end -end