Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Math.Round, Math.ILogB, and do some minor cleanup of Half, Single, and Double #98040

Merged
merged 7 commits into from
Feb 7, 2024
154 changes: 49 additions & 105 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,65 +2373,34 @@ double FloatingPointUtils::round(double x)
// MathF.Round(float), and FloatingPointUtils::round(float)
// ************************************************************************************

// This is based on the 'Berkeley SoftFloat Release 3e' algorithm
// This represents the boundary at which point we can only represent whole integers
const double IntegerBoundary = 4503599627370496.0; // 2^52

uint64_t bits = *reinterpret_cast<uint64_t*>(&x);
int32_t exponent = (int32_t)(bits >> 52) & 0x07FF;

if (exponent <= 0x03FE)
{
if ((bits << 1) == 0)
{
// Exactly +/- zero should return the original value
return x;
}

// Any value less than or equal to 0.5 will always round to exactly zero
// and any value greater than 0.5 will always round to exactly one. However,
// we need to preserve the original sign for IEEE compliance.

double result = ((exponent == 0x03FE) && ((bits & UI64(0x000FFFFFFFFFFFFF)) != 0)) ? 1.0 : 0.0;
return _copysign(result, x);
}

if (exponent >= 0x0433)
if (fabs(x) >= IntegerBoundary)
{
// Any value greater than or equal to 2^52 cannot have a fractional part,
// So it will always round to exactly itself.

// Values above this boundary don't have a fractional
// portion and so we can simply return them as-is.
return x;
}

// The absolute value should be greater than or equal to 1.0 and less than 2^52
assert((0x03FF <= exponent) && (exponent <= 0x0432));

// Determine the last bit that represents the integral portion of the value
// and the bits representing the fractional portion

uint64_t lastBitMask = UI64(1) << (0x0433 - exponent);
uint64_t roundBitsMask = lastBitMask - 1;

// Increment the first fractional bit, which represents the midpoint between
// two integral values in the current window.

bits += lastBitMask >> 1;

if ((bits & roundBitsMask) == 0)
{
// If that overflowed and the rest of the fractional bits are zero
// then we were exactly x.5 and we want to round to the even result

bits &= ~lastBitMask;
}
else
{
// Otherwise, we just want to strip the fractional bits off, truncating
// to the current integer value.

bits &= ~roundBitsMask;
}
// Otherwise, since floating-point takes the inputs, performs
// the computation as if to infinite precision and unbounded
// range, and then rounds to the nearest representable result
// using the current rounding mode, we can rely on this to
// cheaply round.
//
// In particular, .NET doesn't support changing the rounding
// mode and defaults to "round to nearest, ties to even", thus
// by adding the original value to the IntegerBoundary we get
// an exactly represented whole integer that is precisely the
// IntegerBoundary greater in magnitude than the answer we want.
//
// We can then simply remove that offset to get the correct answer,
// noting that we also need to copy back the original sign to
// correctly handle -0.0

return *reinterpret_cast<double*>(&bits);
double temp = _copysign(IntegerBoundary, x);
return _copysign((x + temp) - temp, x);
}

// Windows x86 and Windows ARM/ARM64 may not define _copysignf() but they do define _copysign().
Expand All @@ -2455,65 +2424,40 @@ float FloatingPointUtils::round(float x)
// Math.Round(double), and FloatingPointUtils::round(double)
// ************************************************************************************

// This is based on the 'Berkeley SoftFloat Release 3e' algorithm

uint32_t bits = *reinterpret_cast<uint32_t*>(&x);
int32_t exponent = (int32_t)(bits >> 23) & 0xFF;

if (exponent <= 0x7E)
{
if ((bits << 1) == 0)
{
// Exactly +/- zero should return the original value
return x;
}

// Any value less than or equal to 0.5 will always round to exactly zero
// and any value greater than 0.5 will always round to exactly one. However,
// we need to preserve the original sign for IEEE compliance.
// This code is based on `nearbyint` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text

float result = ((exponent == 0x7E) && ((bits & 0x007FFFFF) != 0)) ? 1.0f : 0.0f;
return _copysignf(result, x);
}
// This represents the boundary at which point we can only represent whole integers
const float IntegerBoundary = 8388608.0f; // 2^23

if (exponent >= 0x96)
if (fabsf(x) >= IntegerBoundary)
{
// Any value greater than or equal to 2^52 cannot have a fractional part,
// So it will always round to exactly itself.

// Values above this boundary don't have a fractional
// portion and so we can simply return them as-is.
return x;
}

// The absolute value should be greater than or equal to 1.0 and less than 2^52
assert((0x7F <= exponent) && (exponent <= 0x95));

// Determine the last bit that represents the integral portion of the value
// and the bits representing the fractional portion

uint32_t lastBitMask = 1U << (0x96 - exponent);
uint32_t roundBitsMask = lastBitMask - 1;

// Increment the first fractional bit, which represents the midpoint between
// two integral values in the current window.

bits += lastBitMask >> 1;

if ((bits & roundBitsMask) == 0)
{
// If that overflowed and the rest of the fractional bits are zero
// then we were exactly x.5 and we want to round to the even result

bits &= ~lastBitMask;
}
else
{
// Otherwise, we just want to strip the fractional bits off, truncating
// to the current integer value.

bits &= ~roundBitsMask;
}
// Otherwise, since floating-point takes the inputs, performs
// the computation as if to infinite precision and unbounded
// range, and then rounds to the nearest representable result
// using the current rounding mode, we can rely on this to
// cheaply round.
//
// In particular, .NET doesn't support changing the rounding
// mode and defaults to "round to nearest, ties to even", thus
// by adding the original value to the IntegerBoundary we get
// an exactly represented whole integer that is precisely the
// IntegerBoundary greater in magnitude than the answer we want.
//
// We can then simply remove that offset to get the correct answer,
// noting that we also need to copy back the original sign to
// correctly handle -0.0

return *reinterpret_cast<float*>(&bits);
float temp = _copysignf(IntegerBoundary, x);
return _copysignf((x + temp) - temp, x);
}

bool FloatingPointUtils::isNormal(double x)
Expand Down
Loading
Loading