From 933388f5d11ec3582d81d4df48b650131cde8b17 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Tue, 30 Jan 2024 15:28:29 -0500 Subject: [PATCH] Cuberoot: Refactor (re|de)scale functions Some modifications were made to the cuberoot rescale and descale functions: * The machine parameters were moved from function to module parameters. This could dangerously expose them to other functions, but it prevents multiple definitions of the same numbers. * The exponent is now cube-roote in rescale rather than descale, and the fact that the rescaled exponent is always -1 is now integrated into the expressions, rather than read and appended to the old value. * The exponent expressions are broken into more explicit steps, rather than combining multiple steps and assumptions into a single expression. We are counting on the compiler to simplify these! * The bias is no longer assumed to be a multiple of three! This is true for double precision but not single precision. * The bias is removed (and later restored) to the exponent terms before computing the new exponents after cuberoot. Although this reduces the number of bit-reading operations by one, I did not see any appreciable change in performance. We probably have more work to do than simple tweaks to bit operations. A new test of quasi-random number was also added to the cuberoot test suite. These numbers were able to detect the differences in GNU and Intel compiler output. Co-authored-by: Robert Hallberg --- src/framework/MOM_intrinsic_functions.F90 | 129 +++++++++++----------- 1 file changed, 62 insertions(+), 67 deletions(-) diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90 index 5d420057d4..16eac5539a 100644 --- a/src/framework/MOM_intrinsic_functions.F90 +++ b/src/framework/MOM_intrinsic_functions.F90 @@ -12,6 +12,19 @@ module MOM_intrinsic_functions public :: invcosh, cuberoot public :: intrinsic_functions_unit_tests +! Floating point model, if bit layout from high to low is (sign, exp, frac) + +integer, parameter :: bias = maxexponent(1.) - 1 + !< The double precision exponent offset +integer, parameter :: signbit = storage_size(1.) - 1 + !< Position of sign bit +integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.)) + !< Bit size of exponent +integer, parameter :: expbit = signbit - explen + !< Position of lowest exponent bit +integer, parameter :: fraclen = expbit + !< Length of fractional part + contains !> Evaluate the inverse cosh, either using a math library or an @@ -55,7 +68,7 @@ elemental function cuberoot(x) result(root) ! Return 0 for an input of 0, or NaN for a NaN input. root = x else - call rescale_exp(x, asx, e_x, s_x) + call rescale_cbrt(x, asx, e_x, s_x) ! Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method, ! noting that Halley's method onverges monotonically and needs no bounding. Halley's method is @@ -83,100 +96,75 @@ elemental function cuberoot(x) result(root) ! that is within the last bit of the true solution. root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2)) - root = descale_cbrt(root_asx, e_x, s_x) + root = descale(root_asx, e_x, s_x) endif end function cuberoot -!> Rescale `a` to the range [0.125, 1) while preserving its fractional term. -pure subroutine rescale_exp(a, x, e_a, s_a) +!> Rescale `a` to the range [0.125, 1) and compute its cube-root exponent. +pure subroutine rescale_cbrt(a, x, e_c, s_a) real, intent(in) :: a - !< The value to be rescaled + !< The real parameter to be rescaled for cube root real, intent(out) :: x !< The rescaled value of `a` - integer(kind=int64), intent(out) :: e_a - !< The biased exponent of `a` + integer(kind=int64), intent(out) :: e_c + !< The cuberoot-exponent of `a` integer(kind=int64), intent(out) :: s_a !< The sign bit of `a` - ! Floating point model, if format is (sign, exp, frac) - integer, parameter :: bias = maxexponent(1.) - 1 - !< The double precision exponent offset (assuming a balanced range) - integer, parameter :: signbit = storage_size(1.) - 1 - !< Position of sign bit - integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.)) - !< Bit size of exponent - integer, parameter :: expbit = signbit - explen - !< Position of lowest exponent bit - integer, parameter :: fraclen = expbit - !< Length of fractional part - integer(kind=int64) :: xb - !< A floating point number, bit-packed as an integer - integer(kind=int64) :: e_scaled - !< The new rescaled exponent of `a` (i.e. the exponent of `x`) - - ! Pack bits of `a` into `xb` and extract its exponent and sign + ! Floating point value of `a`, bit-packed as an integer + integer(kind=int64) :: e_a + ! Unscaled exponent of `a` + integer(kind=int64) :: e_x + ! Exponent of `x` (i.e. exponent of `a` after rescaling) + integer(kind=int64) :: e_div, e_mod + ! Quotient and remainder of `e = 3*(e/3) + e % 3`. + + ! Pack bits of `a` into `xb` and extract its exponent and sign. xb = transfer(a, 1_int64) s_a = ibits(xb, signbit, 1) - e_a = ibits(xb, expbit, explen) + e_a = ibits(xb, expbit, explen) - bias + + ! Decompose the exponent as `e = 3*(e//3) + modulo(e,3)`. + ! Fortran division is round-to-zero so we must reconstruct integer divison. + e_mod = modulo(e_a, 3_int64) + e_div = (e_a - e_mod)/3 - ! Decompose the exponent as `e = modulo(e,3) + 3*(e/3)` and extract the - ! rescaled exponent, now in {-3,-2,-1} - e_scaled = modulo(e_a, 3) - 3 + bias + ! 1. Compute the exponent of final cube root of `a`. + ! * `e = e_a/3 + 1 + cbrt(e_x)` but `cbrt(x)` exponent is always -1. + e_c = (e_div + 1) - 1 - ! Insert the new 11-bit exponent into `xb`, while also setting the sign bit - ! to zero, ensuring that `xb` is always positive. - call mvbits(e_scaled, 0, explen + 1, xb, fraclen) + ! 2. Construct the rescaled exponent, shifted to [0.125, 1). + e_x = e_mod - 3 - ! Transfer the final modified value to `x` + ! Insert the new 11-bit exponent into `xb` and write to `x`, extending the + ! bitcount to 12, so that the sign bit is zero and `xb` is always positive. + call mvbits(e_x + bias, 0, explen + 1, xb, fraclen) x = transfer(xb, 1.) -end subroutine rescale_exp +end subroutine rescale_cbrt -!> Descale a real number to its original base, and apply the cube root to the -!! remaining exponent. -pure function descale_cbrt(x, e_a, s_a) result(r) +!> Undo the rescaling of a real number back to its original base. +pure function descale(x, e_a, s_a) result(a) real, intent(in) :: x - !< Cube root of the rescaled value, which was rescaled to [0.125, 1.0) + !< The rescaled value which is to be restored. integer(kind=int64), intent(in) :: e_a - !< Exponent of the original value to be cube rooted + !< Exponent of the unscaled value integer(kind=int64), intent(in) :: s_a - !< Sign bit of the original value to be cube rooted - real :: r - !< Restored value with the cube root applied to its exponent - - ! Floating point model, if format is (sign, exp, frac) - integer, parameter :: bias = maxexponent(1.) - 1 - !< The double precision exponent offset (assuming a balanced range) - integer, parameter :: signbit = storage_size(1.) - 1 - !< Position of sign bit - integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.)) - !< Bit size of exponent - integer, parameter :: expbit = signbit - explen - !< Position of lowest exponent bit - integer, parameter :: fraclen = expbit - !< Length of fractional part + !< Sign bit of the unscaled value + real :: a + !< Restored value with the corrected exponent and sign integer(kind=int64) :: xb ! Bit-packed real number into integer form - integer(kind=int64) :: e_r - ! Exponent of the descaled value - ! Extract the exponent of the rescaled value, in {-3, -2, -1} + ! Apply the corrected exponent and sign to `x`. xb = transfer(x, 1_8) - e_r = ibits(xb, expbit, explen) - - ! Apply the cube root to the old exponent (after removing its bias) and add - ! to the rescaled exponent. Correct the previous -3 with a +1. - e_r = e_r + (e_a/3 - bias/3 + 1) - - ! Apply the corrected exponent and sign and convert back to real - call mvbits(e_r, 0, explen, xb, expbit) + call mvbits(e_a + bias, 0, explen, xb, expbit) call mvbits(s_a, 0, 1, xb, signbit) - r = transfer(xb, 1.) -end function descale_cbrt - + a = transfer(xb, 1.) +end function descale !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass. @@ -186,6 +174,7 @@ logical function intrinsic_functions_unit_tests(verbose) ! Local variables real :: testval ! A test value for self-consistency testing [nondim] logical :: fail, v + integer :: n fail = .false. v = verbose @@ -200,6 +189,12 @@ logical function intrinsic_functions_unit_tests(verbose) fail = fail .or. Test_cuberoot(v, 0.125) fail = fail .or. Test_cuberoot(v, 0.965) + testval = 1.0e-99 + v = .false. + do n=-160,160 + fail = fail .or. Test_cuberoot(v, testval) + testval = (-2.908 * (1.414213562373 + 1.2345678901234e-5*n)) * testval + enddo end function intrinsic_functions_unit_tests !> True if the cube of cuberoot(val) does not closely match val. False otherwise.