Skip to content

Commit

Permalink
Improve Math.Round, Math.ILogB, and do some minor cleanup of Half, Si…
Browse files Browse the repository at this point in the history
…ngle, and Double (#98040)

* Simplify the fallback implementation for Math.Round

* Improve some of the floating-point classification APIs

* Improve the algorithm used for ILogB

* Do some minor cleanup of functions in Half, Single, and Double

* Fix a build failure

* Ensure Half.IsNormal and Half.IsSubnormal casts to ushort so the domain stays correct

* Make sure BitDecrement checks `IsFinite(x)` not `IsFinite(bits)`
  • Loading branch information
tannergooding authored Feb 7, 2024
1 parent bdd5d32 commit 62d7c6a
Show file tree
Hide file tree
Showing 7 changed files with 483 additions and 482 deletions.
154 changes: 49 additions & 105 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,65 +2373,34 @@ double FloatingPointUtils::round(double x)
// MathF.Round(float), and FloatingPointUtils::round(float)
// ************************************************************************************

// This is based on the 'Berkeley SoftFloat Release 3e' algorithm
// This represents the boundary at which point we can only represent whole integers
const double IntegerBoundary = 4503599627370496.0; // 2^52

uint64_t bits = *reinterpret_cast<uint64_t*>(&x);
int32_t exponent = (int32_t)(bits >> 52) & 0x07FF;

if (exponent <= 0x03FE)
{
if ((bits << 1) == 0)
{
// Exactly +/- zero should return the original value
return x;
}

// Any value less than or equal to 0.5 will always round to exactly zero
// and any value greater than 0.5 will always round to exactly one. However,
// we need to preserve the original sign for IEEE compliance.

double result = ((exponent == 0x03FE) && ((bits & UI64(0x000FFFFFFFFFFFFF)) != 0)) ? 1.0 : 0.0;
return _copysign(result, x);
}

if (exponent >= 0x0433)
if (fabs(x) >= IntegerBoundary)
{
// Any value greater than or equal to 2^52 cannot have a fractional part,
// So it will always round to exactly itself.

// Values above this boundary don't have a fractional
// portion and so we can simply return them as-is.
return x;
}

// The absolute value should be greater than or equal to 1.0 and less than 2^52
assert((0x03FF <= exponent) && (exponent <= 0x0432));

// Determine the last bit that represents the integral portion of the value
// and the bits representing the fractional portion

uint64_t lastBitMask = UI64(1) << (0x0433 - exponent);
uint64_t roundBitsMask = lastBitMask - 1;

// Increment the first fractional bit, which represents the midpoint between
// two integral values in the current window.

bits += lastBitMask >> 1;

if ((bits & roundBitsMask) == 0)
{
// If that overflowed and the rest of the fractional bits are zero
// then we were exactly x.5 and we want to round to the even result

bits &= ~lastBitMask;
}
else
{
// Otherwise, we just want to strip the fractional bits off, truncating
// to the current integer value.

bits &= ~roundBitsMask;
}
// Otherwise, since floating-point takes the inputs, performs
// the computation as if to infinite precision and unbounded
// range, and then rounds to the nearest representable result
// using the current rounding mode, we can rely on this to
// cheaply round.
//
// In particular, .NET doesn't support changing the rounding
// mode and defaults to "round to nearest, ties to even", thus
// by adding the original value to the IntegerBoundary we get
// an exactly represented whole integer that is precisely the
// IntegerBoundary greater in magnitude than the answer we want.
//
// We can then simply remove that offset to get the correct answer,
// noting that we also need to copy back the original sign to
// correctly handle -0.0

return *reinterpret_cast<double*>(&bits);
double temp = _copysign(IntegerBoundary, x);
return _copysign((x + temp) - temp, x);
}

// Windows x86 and Windows ARM/ARM64 may not define _copysignf() but they do define _copysign().
Expand All @@ -2455,65 +2424,40 @@ float FloatingPointUtils::round(float x)
// Math.Round(double), and FloatingPointUtils::round(double)
// ************************************************************************************

// This is based on the 'Berkeley SoftFloat Release 3e' algorithm

uint32_t bits = *reinterpret_cast<uint32_t*>(&x);
int32_t exponent = (int32_t)(bits >> 23) & 0xFF;

if (exponent <= 0x7E)
{
if ((bits << 1) == 0)
{
// Exactly +/- zero should return the original value
return x;
}

// Any value less than or equal to 0.5 will always round to exactly zero
// and any value greater than 0.5 will always round to exactly one. However,
// we need to preserve the original sign for IEEE compliance.
// This code is based on `nearbyint` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text

float result = ((exponent == 0x7E) && ((bits & 0x007FFFFF) != 0)) ? 1.0f : 0.0f;
return _copysignf(result, x);
}
// This represents the boundary at which point we can only represent whole integers
const float IntegerBoundary = 8388608.0f; // 2^23

if (exponent >= 0x96)
if (fabsf(x) >= IntegerBoundary)
{
// Any value greater than or equal to 2^52 cannot have a fractional part,
// So it will always round to exactly itself.

// Values above this boundary don't have a fractional
// portion and so we can simply return them as-is.
return x;
}

// The absolute value should be greater than or equal to 1.0 and less than 2^52
assert((0x7F <= exponent) && (exponent <= 0x95));

// Determine the last bit that represents the integral portion of the value
// and the bits representing the fractional portion

uint32_t lastBitMask = 1U << (0x96 - exponent);
uint32_t roundBitsMask = lastBitMask - 1;

// Increment the first fractional bit, which represents the midpoint between
// two integral values in the current window.

bits += lastBitMask >> 1;

if ((bits & roundBitsMask) == 0)
{
// If that overflowed and the rest of the fractional bits are zero
// then we were exactly x.5 and we want to round to the even result

bits &= ~lastBitMask;
}
else
{
// Otherwise, we just want to strip the fractional bits off, truncating
// to the current integer value.

bits &= ~roundBitsMask;
}
// Otherwise, since floating-point takes the inputs, performs
// the computation as if to infinite precision and unbounded
// range, and then rounds to the nearest representable result
// using the current rounding mode, we can rely on this to
// cheaply round.
//
// In particular, .NET doesn't support changing the rounding
// mode and defaults to "round to nearest, ties to even", thus
// by adding the original value to the IntegerBoundary we get
// an exactly represented whole integer that is precisely the
// IntegerBoundary greater in magnitude than the answer we want.
//
// We can then simply remove that offset to get the correct answer,
// noting that we also need to copy back the original sign to
// correctly handle -0.0

return *reinterpret_cast<float*>(&bits);
float temp = _copysignf(IntegerBoundary, x);
return _copysignf((x + temp) - temp, x);
}

bool FloatingPointUtils::isNormal(double x)
Expand Down
Loading

0 comments on commit 62d7c6a

Please sign in to comment.