Improve Math.Round, Math.ILogB, and do some minor cleanup of Half, Si…

…ngle, and Double (#98040) * Simplify the fallback implementation for Math.Round * Improve some of the floating-point classification APIs * Improve the algorithm used for ILogB * Do some minor cleanup of functions in Half, Single, and Double * Fix a build failure * Ensure Half.IsNormal and Half.IsSubnormal casts to ushort so the domain stays correct * Make sure BitDecrement checks `IsFinite(x)` not `IsFinite(bits)`
dotnet · Feb 7, 2024 · 62d7c6a · 62d7c6a
1 parent bdd5d32
commit 62d7c6a
Show file tree

Hide file tree

Showing 7 changed files with 483 additions and 482 deletions.
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
@@ -2373,65 +2373,34 @@ double FloatingPointUtils::round(double x)
     //            MathF.Round(float), and FloatingPointUtils::round(float)
     // ************************************************************************************
 
-    // This is based on the 'Berkeley SoftFloat Release 3e' algorithm
+    // This represents the boundary at which point we can only represent whole integers
+    const double IntegerBoundary = 4503599627370496.0; // 2^52
 
-    uint64_t bits     = *reinterpret_cast<uint64_t*>(&x);
-    int32_t  exponent = (int32_t)(bits >> 52) & 0x07FF;
-
-    if (exponent <= 0x03FE)
-    {
-        if ((bits << 1) == 0)
-        {
-            // Exactly +/- zero should return the original value
-            return x;
-        }
-
-        // Any value less than or equal to 0.5 will always round to exactly zero
-        // and any value greater than 0.5 will always round to exactly one. However,
-        // we need to preserve the original sign for IEEE compliance.
-
-        double result = ((exponent == 0x03FE) && ((bits & UI64(0x000FFFFFFFFFFFFF)) != 0)) ? 1.0 : 0.0;
-        return _copysign(result, x);
-    }
-
-    if (exponent >= 0x0433)
+    if (fabs(x) >= IntegerBoundary)
     {
-        // Any value greater than or equal to 2^52 cannot have a fractional part,
-        // So it will always round to exactly itself.
-
+        // Values above this boundary don't have a fractional
+        // portion and so we can simply return them as-is.
         return x;
     }
 
-    // The absolute value should be greater than or equal to 1.0 and less than 2^52
-    assert((0x03FF <= exponent) && (exponent <= 0x0432));
-
-    // Determine the last bit that represents the integral portion of the value
-    // and the bits representing the fractional portion
-
-    uint64_t lastBitMask   = UI64(1) << (0x0433 - exponent);
-    uint64_t roundBitsMask = lastBitMask - 1;
-
-    // Increment the first fractional bit, which represents the midpoint between
-    // two integral values in the current window.
-
-    bits += lastBitMask >> 1;
-
-    if ((bits & roundBitsMask) == 0)
-    {
-        // If that overflowed and the rest of the fractional bits are zero
-        // then we were exactly x.5 and we want to round to the even result
-
-        bits &= ~lastBitMask;
-    }
-    else
-    {
-        // Otherwise, we just want to strip the fractional bits off, truncating
-        // to the current integer value.
-
-        bits &= ~roundBitsMask;
-    }
+    // Otherwise, since floating-point takes the inputs, performs
+    // the computation as if to infinite precision and unbounded
+    // range, and then rounds to the nearest representable result
+    // using the current rounding mode, we can rely on this to
+    // cheaply round.
+    //
+    // In particular, .NET doesn't support changing the rounding
+    // mode and defaults to "round to nearest, ties to even", thus
+    // by adding the original value to the IntegerBoundary we get
+    // an exactly represented whole integer that is precisely the
+    // IntegerBoundary greater in magnitude than the answer we want.
+    //
+    // We can then simply remove that offset to get the correct answer,
+    // noting that we also need to copy back the original sign to
+    // correctly handle -0.0
 
-    return *reinterpret_cast<double*>(&bits);
+    double temp = _copysign(IntegerBoundary, x);
+    return _copysign((x + temp) - temp, x);
 }
 
 // Windows x86 and Windows ARM/ARM64 may not define _copysignf() but they do define _copysign().
@@ -2455,65 +2424,40 @@ float FloatingPointUtils::round(float x)
     //            Math.Round(double), and FloatingPointUtils::round(double)
     // ************************************************************************************
 
-    // This is based on the 'Berkeley SoftFloat Release 3e' algorithm
-
-    uint32_t bits     = *reinterpret_cast<uint32_t*>(&x);
-    int32_t  exponent = (int32_t)(bits >> 23) & 0xFF;
-
-    if (exponent <= 0x7E)
-    {
-        if ((bits << 1) == 0)
-        {
-            // Exactly +/- zero should return the original value
-            return x;
-        }
-
-        // Any value less than or equal to 0.5 will always round to exactly zero
-        // and any value greater than 0.5 will always round to exactly one. However,
-        // we need to preserve the original sign for IEEE compliance.
+    // This code is based on `nearbyint` from amd/aocl-libm-ose
+    // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+    //
+    // Licensed under the BSD 3-Clause "New" or "Revised" License
+    // See THIRD-PARTY-NOTICES.TXT for the full license text
 
-        float result = ((exponent == 0x7E) && ((bits & 0x007FFFFF) != 0)) ? 1.0f : 0.0f;
-        return _copysignf(result, x);
-    }
+    // This represents the boundary at which point we can only represent whole integers
+    const float IntegerBoundary = 8388608.0f; // 2^23
 
-    if (exponent >= 0x96)
+    if (fabsf(x) >= IntegerBoundary)
     {
-        // Any value greater than or equal to 2^52 cannot have a fractional part,
-        // So it will always round to exactly itself.
-
+        // Values above this boundary don't have a fractional
+        // portion and so we can simply return them as-is.
         return x;
     }
 
-    // The absolute value should be greater than or equal to 1.0 and less than 2^52
-    assert((0x7F <= exponent) && (exponent <= 0x95));
-
-    // Determine the last bit that represents the integral portion of the value
-    // and the bits representing the fractional portion
-
-    uint32_t lastBitMask   = 1U << (0x96 - exponent);
-    uint32_t roundBitsMask = lastBitMask - 1;
-
-    // Increment the first fractional bit, which represents the midpoint between
-    // two integral values in the current window.
-
-    bits += lastBitMask >> 1;
-
-    if ((bits & roundBitsMask) == 0)
-    {
-        // If that overflowed and the rest of the fractional bits are zero
-        // then we were exactly x.5 and we want to round to the even result
-
-        bits &= ~lastBitMask;
-    }
-    else
-    {
-        // Otherwise, we just want to strip the fractional bits off, truncating
-        // to the current integer value.
-
-        bits &= ~roundBitsMask;
-    }
+    // Otherwise, since floating-point takes the inputs, performs
+    // the computation as if to infinite precision and unbounded
+    // range, and then rounds to the nearest representable result
+    // using the current rounding mode, we can rely on this to
+    // cheaply round.
+    //
+    // In particular, .NET doesn't support changing the rounding
+    // mode and defaults to "round to nearest, ties to even", thus
+    // by adding the original value to the IntegerBoundary we get
+    // an exactly represented whole integer that is precisely the
+    // IntegerBoundary greater in magnitude than the answer we want.
+    //
+    // We can then simply remove that offset to get the correct answer,
+    // noting that we also need to copy back the original sign to
+    // correctly handle -0.0
 
-    return *reinterpret_cast<float*>(&bits);
+    float temp = _copysignf(IntegerBoundary, x);
+    return _copysignf((x + temp) - temp, x);
 }
 
 bool FloatingPointUtils::isNormal(double x)