From f39aeabd6a0490d78e0ef5b663afef27223ddb1c Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Tue, 22 Oct 2024 12:05:26 +0200 Subject: [PATCH 01/13] half base type --- core/base/extended_float.hpp | 393 +----------------- core/test/base/extended_float.cpp | 41 +- include/ginkgo/core/base/half.hpp | 669 ++++++++++++++++++++++++++++++ include/ginkgo/ginkgo.hpp | 1 + 4 files changed, 707 insertions(+), 397 deletions(-) create mode 100644 include/ginkgo/core/base/half.hpp diff --git a/core/base/extended_float.hpp b/core/base/extended_float.hpp index c14b5d1bd39..dd7d46c363d 100644 --- a/core/base/extended_float.hpp +++ b/core/base/extended_float.hpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -30,341 +31,6 @@ namespace gko { -template -class truncated; - - -namespace detail { - - -template -struct uint_of_impl {}; - -template -struct uint_of_impl> { - using type = uint16; -}; - -template -struct uint_of_impl> { - using type = uint32; -}; - -template -struct uint_of_impl> { - using type = uint64; -}; - -template -using uint_of = typename uint_of_impl::type; - - -template -struct basic_float_traits {}; - -template <> -struct basic_float_traits { - using type = float16; - static constexpr int sign_bits = 1; - static constexpr int significand_bits = 10; - static constexpr int exponent_bits = 5; - static constexpr bool rounds_to_nearest = true; -}; - -template <> -struct basic_float_traits { - using type = float32; - static constexpr int sign_bits = 1; - static constexpr int significand_bits = 23; - static constexpr int exponent_bits = 8; - static constexpr bool rounds_to_nearest = true; -}; - -template <> -struct basic_float_traits { - using type = float64; - static constexpr int sign_bits = 1; - static constexpr int significand_bits = 52; - static constexpr int exponent_bits = 11; - static constexpr bool rounds_to_nearest = true; -}; - -template -struct basic_float_traits> { - using type = truncated; - static constexpr int sign_bits = ComponentId == 0 ? 1 : 0; - static constexpr int exponent_bits = - ComponentId == 0 ? basic_float_traits::exponent_bits : 0; - static constexpr int significand_bits = - ComponentId == 0 ? sizeof(type) * byte_size - exponent_bits - 1 - : sizeof(type) * byte_size; - static constexpr bool rounds_to_nearest = false; -}; - - -template -constexpr UintType create_ones(int n) -{ - return (n == sizeof(UintType) * byte_size ? static_cast(0) - : static_cast(1) << n) - - static_cast(1); -} - -template -struct float_traits { - using type = typename basic_float_traits::type; - using bits_type = uint_of; - static constexpr int sign_bits = basic_float_traits::sign_bits; - static constexpr int significand_bits = - basic_float_traits::significand_bits; - static constexpr int exponent_bits = basic_float_traits::exponent_bits; - static constexpr bits_type significand_mask = - create_ones(significand_bits); - static constexpr bits_type exponent_mask = - create_ones(significand_bits + exponent_bits) - - significand_mask; - static constexpr bits_type bias_mask = - create_ones(significand_bits + exponent_bits - 1) - - significand_mask; - static constexpr bits_type sign_mask = - create_ones(sign_bits + significand_bits + exponent_bits) - - exponent_mask - significand_mask; - static constexpr bool rounds_to_nearest = - basic_float_traits::rounds_to_nearest; - - static constexpr auto eps = - 1.0 / (1ll << (significand_bits + rounds_to_nearest)); - - static constexpr bool is_inf(bits_type data) - { - return (data & exponent_mask) == exponent_mask && - (data & significand_mask) == bits_type{}; - } - - static constexpr bool is_nan(bits_type data) - { - return (data & exponent_mask) == exponent_mask && - (data & significand_mask) != bits_type{}; - } - - static constexpr bool is_denom(bits_type data) - { - return (data & exponent_mask) == bits_type{}; - } -}; - - -template -struct precision_converter; - -// upcasting implementation details -template -struct precision_converter { - using source_traits = float_traits; - using result_traits = float_traits; - using source_bits = typename source_traits::bits_type; - using result_bits = typename result_traits::bits_type; - - static_assert(source_traits::exponent_bits <= - result_traits::exponent_bits && - source_traits::significand_bits <= - result_traits::significand_bits, - "SourceType has to have both lower range and precision or " - "higher range and precision than ResultType"); - - static constexpr int significand_offset = - result_traits::significand_bits - source_traits::significand_bits; - static constexpr int exponent_offset = significand_offset; - static constexpr int sign_offset = result_traits::exponent_bits - - source_traits::exponent_bits + - exponent_offset; - static constexpr result_bits bias_change = - result_traits::bias_mask - - (static_cast(source_traits::bias_mask) << exponent_offset); - - static constexpr result_bits shift_significand(source_bits data) noexcept - { - return static_cast(data & source_traits::significand_mask) - << significand_offset; - } - - static constexpr result_bits shift_exponent(source_bits data) noexcept - { - return update_bias( - static_cast(data & source_traits::exponent_mask) - << exponent_offset); - } - - static constexpr result_bits shift_sign(source_bits data) noexcept - { - return static_cast(data & source_traits::sign_mask) - << sign_offset; - } - -private: - static constexpr result_bits update_bias(result_bits data) noexcept - { - return data == typename result_traits::bits_type{} ? data - : data + bias_change; - } -}; - -// downcasting implementation details -template -struct precision_converter { - using source_traits = float_traits; - using result_traits = float_traits; - using source_bits = typename source_traits::bits_type; - using result_bits = typename result_traits::bits_type; - - static_assert(source_traits::exponent_bits >= - result_traits::exponent_bits && - source_traits::significand_bits >= - result_traits::significand_bits, - "SourceType has to have both lower range and precision or " - "higher range and precision than ResultType"); - - static constexpr int significand_offset = - source_traits::significand_bits - result_traits::significand_bits; - static constexpr int exponent_offset = significand_offset; - static constexpr int sign_offset = source_traits::exponent_bits - - result_traits::exponent_bits + - exponent_offset; - static constexpr source_bits bias_change = - (source_traits::bias_mask >> exponent_offset) - - static_cast(result_traits::bias_mask); - - static constexpr result_bits shift_significand(source_bits data) noexcept - { - return static_cast( - (data & source_traits::significand_mask) >> significand_offset); - } - - static constexpr result_bits shift_exponent(source_bits data) noexcept - { - return static_cast(update_bias( - (data & source_traits::exponent_mask) >> exponent_offset)); - } - - static constexpr result_bits shift_sign(source_bits data) noexcept - { - return static_cast((data & source_traits::sign_mask) >> - sign_offset); - } - -private: - static constexpr source_bits update_bias(source_bits data) noexcept - { - return data <= bias_change ? typename source_traits::bits_type{} - : limit_exponent(data - bias_change); - } - - static constexpr source_bits limit_exponent(source_bits data) noexcept - { - return data >= static_cast(result_traits::exponent_mask) - ? static_cast(result_traits::exponent_mask) - : data; - } -}; - - -} // namespace detail - - -/** - * A class providing basic support for half precision floating point types. - * - * For now the only features are reduced storage compared to single precision - * and conversions from and to single precision floating point type. - */ -class half { -public: - half() noexcept = default; - - GKO_ATTRIBUTES half(float32 val) noexcept - { -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - const auto tmp = __float2half_rn(val); - data_ = reinterpret_cast(tmp); -#else // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - data_ = float2half(reinterpret_cast(val)); -#endif // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - } - - GKO_ATTRIBUTES half(float64 val) noexcept : half(static_cast(val)) - {} - - GKO_ATTRIBUTES operator float32() const noexcept - { -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - return __half2float(reinterpret_cast(data_)); -#else // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - const auto bits = half2float(data_); - return reinterpret_cast(bits); -#endif // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - } - - GKO_ATTRIBUTES operator float64() const noexcept - { - return static_cast(static_cast(*this)); - } - - GKO_ATTRIBUTES half operator-() const noexcept - { - auto res = *this; - // flip sign bit - res.data_ ^= f16_traits::sign_mask; - return res; - } - -private: - using f16_traits = detail::float_traits; - using f32_traits = detail::float_traits; - - static uint16 float2half(uint32 data_) noexcept - { - using conv = detail::precision_converter; - if (f32_traits::is_inf(data_)) { - return conv::shift_sign(data_) | f16_traits::exponent_mask; - } else if (f32_traits::is_nan(data_)) { - return conv::shift_sign(data_) | f16_traits::exponent_mask | - f16_traits::significand_mask; - } else { - const auto exp = conv::shift_exponent(data_); - if (f16_traits::is_inf(exp)) { - return conv::shift_sign(data_) | exp; - } else if (f16_traits::is_denom(exp)) { - // TODO: handle denormals - return conv::shift_sign(data_); - } else { - return conv::shift_sign(data_) | exp | - conv::shift_significand(data_); - } - } - } - - static uint32 half2float(uint16 data_) noexcept - { - using conv = detail::precision_converter; - if (f16_traits::is_inf(data_)) { - return conv::shift_sign(data_) | f32_traits::exponent_mask; - } else if (f16_traits::is_nan(data_)) { - return conv::shift_sign(data_) | f32_traits::exponent_mask | - f32_traits::significand_mask; - } else if (f16_traits::is_denom(data_)) { - // TODO: handle denormals - return conv::shift_sign(data_); - } else { - return conv::shift_sign(data_) | conv::shift_exponent(data_) | - conv::shift_significand(data_); - } - } - - uint16 data_; -}; - - /** * This template implements the truncated (or split) storage of a floating point * type. @@ -458,38 +124,6 @@ class truncated { namespace std { -template <> -class complex { -public: - using value_type = gko::half; - - complex(const value_type& real = 0.f, const value_type& imag = 0.f) - : real_(real), imag_(imag) - {} - - template - explicit complex(const complex& other) - : complex(static_cast(other.real()), - static_cast(other.imag())) - {} - - value_type real() const noexcept { return real_; } - - value_type imag() const noexcept { return imag_; } - - - operator std::complex() const noexcept - { - return std::complex(static_cast(real_), - static_cast(imag_)); - } - -private: - value_type real_; - value_type imag_; -}; - - template class complex> { public: @@ -521,31 +155,6 @@ class complex> { }; -template <> -struct is_scalar : std::true_type {}; - - -template <> -struct numeric_limits { - static constexpr bool is_specialized{true}; - static constexpr bool is_signed{true}; - static constexpr bool is_integer{false}; - static constexpr bool is_exact{false}; - static constexpr bool is_bounded{true}; - static constexpr bool is_modulo{false}; - static constexpr int digits{ - gko::detail::float_traits::significand_bits + 1}; - // 3/10 is approx. log_10(2) - static constexpr int digits10{digits * 3 / 10}; - - // Note: gko::half can't return gko::half here because it does not have - // a constexpr constructor. - static constexpr float epsilon() - { - return gko::detail::float_traits::eps; - } -}; - } // namespace std diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp index 6148c7c350a..818843baa38 100644 --- a/core/test/base/extended_float.cpp +++ b/core/test/base/extended_float.cpp @@ -9,6 +9,8 @@ #include +#include + namespace { @@ -110,7 +112,12 @@ TEST_F(FloatToHalf, ConvertsNan) { half x = create_from_bits("0" "11111111" "00000000000000000000001"); + #if defined(SYCL_LANGUAGE_VERSION) + // Sycl put the 1000000000, but ours put mask + ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000")); + #else ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111")); + #endif } @@ -118,7 +125,12 @@ TEST_F(FloatToHalf, ConvertsNegNan) { half x = create_from_bits("1" "11111111" "00010000000000000000000"); + #if defined(SYCL_LANGUAGE_VERSION) + // Sycl put the 1000000000, but ours put mask + ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000")); + #else ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111")); + #endif } @@ -162,12 +174,21 @@ TEST_F(FloatToHalf, TruncatesSmallNumber) } -TEST_F(FloatToHalf, TruncatesLargeNumber) +TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven) { - half x = create_from_bits("1" "10001110" "10010011111000010000100"); - - ASSERT_EQ(get_bits(x), get_bits("1" "11110" "1001001111")); - + half neg_x = create_from_bits("1" "10001110" "10010011111000010000100"); + half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100"); + half x = create_from_bits("0" "10001110" "10010011111000010000100"); + half x2 = create_from_bits("0" "10001110" "10010011101000010000100"); + half x3 = create_from_bits("0" "10001110" "10010011101000000000000"); + half x4 = create_from_bits("0" "10001110" "10010011111000000000000"); + + EXPECT_EQ(get_bits(x), get_bits("0" "11110" "1001010000")); + EXPECT_EQ(get_bits(x2), get_bits("0" "11110" "1001001111")); + EXPECT_EQ(get_bits(x3), get_bits("0" "11110" "1001001110")); + EXPECT_EQ(get_bits(x4), get_bits("0" "11110" "1001010000")); + EXPECT_EQ(get_bits(neg_x), get_bits("1" "11110" "1001010000")); + EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111")); } @@ -216,7 +237,12 @@ TEST_F(HalfToFloat, ConvertsNan) { float x = create_from_bits("0" "11111" "0001001000"); + #if defined(SYCL_LANGUAGE_VERSION) + // sycl keeps significand + ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000")); + #else ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111")); + #endif } @@ -224,7 +250,12 @@ TEST_F(HalfToFloat, ConvertsNegNan) { float x = create_from_bits("1" "11111" "0000000001"); + #if defined(SYCL_LANGUAGE_VERSION) + // sycl keeps significand + ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000")); + #else ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111")); + #endif } diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp new file mode 100644 index 00000000000..25a38abb6eb --- /dev/null +++ b/include/ginkgo/core/base/half.hpp @@ -0,0 +1,669 @@ +// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GKO_PUBLIC_CORE_BASE_HALF_HPP_ +#define GKO_PUBLIC_CORE_BASE_HALF_HPP_ + + +#include +#include +#include + +#include +#include + + +class __half; + + +namespace gko { + + +template +class truncated; + + +namespace detail { + + +template +struct uint_of_impl {}; + +template +struct uint_of_impl> { + using type = uint16; +}; + +template +struct uint_of_impl> { + using type = uint32; +}; + +template +struct uint_of_impl> { + using type = uint64; +}; + +template +using uint_of = typename uint_of_impl::type; + + +template +struct basic_float_traits {}; + +template <> +struct basic_float_traits { + using type = float16; + static constexpr int sign_bits = 1; + static constexpr int significand_bits = 10; + static constexpr int exponent_bits = 5; + static constexpr bool rounds_to_nearest = true; +}; + +template <> +struct basic_float_traits<__half> { + using type = __half; + static constexpr int sign_bits = 1; + static constexpr int significand_bits = 10; + static constexpr int exponent_bits = 5; + static constexpr bool rounds_to_nearest = true; +}; + +template <> +struct basic_float_traits { + using type = float32; + static constexpr int sign_bits = 1; + static constexpr int significand_bits = 23; + static constexpr int exponent_bits = 8; + static constexpr bool rounds_to_nearest = true; +}; + +template <> +struct basic_float_traits { + using type = float64; + static constexpr int sign_bits = 1; + static constexpr int significand_bits = 52; + static constexpr int exponent_bits = 11; + static constexpr bool rounds_to_nearest = true; +}; + +template +struct basic_float_traits> { + using type = truncated; + static constexpr int sign_bits = ComponentId == 0 ? 1 : 0; + static constexpr int exponent_bits = + ComponentId == 0 ? basic_float_traits::exponent_bits : 0; + static constexpr int significand_bits = + ComponentId == 0 ? sizeof(type) * byte_size - exponent_bits - 1 + : sizeof(type) * byte_size; + static constexpr bool rounds_to_nearest = false; +}; + + +template +constexpr UintType create_ones(int n) +{ + return (n == sizeof(UintType) * byte_size ? static_cast(0) + : static_cast(1) << n) - + static_cast(1); +} + + +template +struct float_traits { + using type = typename basic_float_traits::type; + using bits_type = uint_of; + static constexpr int sign_bits = basic_float_traits::sign_bits; + static constexpr int significand_bits = + basic_float_traits::significand_bits; + static constexpr int exponent_bits = basic_float_traits::exponent_bits; + static constexpr bits_type significand_mask = + create_ones(significand_bits); + static constexpr bits_type exponent_mask = + create_ones(significand_bits + exponent_bits) - + significand_mask; + static constexpr bits_type bias_mask = + create_ones(significand_bits + exponent_bits - 1) - + significand_mask; + static constexpr bits_type sign_mask = + create_ones(sign_bits + significand_bits + exponent_bits) - + exponent_mask - significand_mask; + static constexpr bool rounds_to_nearest = + basic_float_traits::rounds_to_nearest; + + static constexpr auto eps = + 1.0 / (1ll << (significand_bits + rounds_to_nearest)); + + static constexpr bool is_inf(bits_type data) + { + return (data & exponent_mask) == exponent_mask && + (data & significand_mask) == bits_type{}; + } + + static constexpr bool is_nan(bits_type data) + { + return (data & exponent_mask) == exponent_mask && + (data & significand_mask) != bits_type{}; + } + + static constexpr bool is_denom(bits_type data) + { + return (data & exponent_mask) == bits_type{}; + } +}; + + +template +struct precision_converter; + +// upcasting implementation details +template +struct precision_converter { + using source_traits = float_traits; + using result_traits = float_traits; + using source_bits = typename source_traits::bits_type; + using result_bits = typename result_traits::bits_type; + + static_assert(source_traits::exponent_bits <= + result_traits::exponent_bits && + source_traits::significand_bits <= + result_traits::significand_bits, + "SourceType has to have both lower range and precision or " + "higher range and precision than ResultType"); + + static constexpr int significand_offset = + result_traits::significand_bits - source_traits::significand_bits; + static constexpr int exponent_offset = significand_offset; + static constexpr int sign_offset = result_traits::exponent_bits - + source_traits::exponent_bits + + exponent_offset; + static constexpr result_bits bias_change = + result_traits::bias_mask - + (static_cast(source_traits::bias_mask) << exponent_offset); + + static constexpr result_bits shift_significand(source_bits data) noexcept + { + return static_cast(data & source_traits::significand_mask) + << significand_offset; + } + + static constexpr result_bits shift_exponent(source_bits data) noexcept + { + return update_bias( + static_cast(data & source_traits::exponent_mask) + << exponent_offset); + } + + static constexpr result_bits shift_sign(source_bits data) noexcept + { + return static_cast(data & source_traits::sign_mask) + << sign_offset; + } + +private: + static constexpr result_bits update_bias(result_bits data) noexcept + { + return data == typename result_traits::bits_type{} ? data + : data + bias_change; + } +}; + +// downcasting implementation details +template +struct precision_converter { + using source_traits = float_traits; + using result_traits = float_traits; + using source_bits = typename source_traits::bits_type; + using result_bits = typename result_traits::bits_type; + + static_assert(source_traits::exponent_bits >= + result_traits::exponent_bits && + source_traits::significand_bits >= + result_traits::significand_bits, + "SourceType has to have both lower range and precision or " + "higher range and precision than ResultType"); + + static constexpr int significand_offset = + source_traits::significand_bits - result_traits::significand_bits; + static constexpr int exponent_offset = significand_offset; + static constexpr int sign_offset = source_traits::exponent_bits - + result_traits::exponent_bits + + exponent_offset; + static constexpr source_bits bias_change = + (source_traits::bias_mask >> exponent_offset) - + static_cast(result_traits::bias_mask); + + static constexpr result_bits shift_significand(source_bits data) noexcept + { + return static_cast( + (data & source_traits::significand_mask) >> significand_offset); + } + + static constexpr result_bits shift_exponent(source_bits data) noexcept + { + return static_cast(update_bias( + (data & source_traits::exponent_mask) >> exponent_offset)); + } + + static constexpr result_bits shift_sign(source_bits data) noexcept + { + return static_cast((data & source_traits::sign_mask) >> + sign_offset); + } + +private: + static constexpr source_bits update_bias(source_bits data) noexcept + { + return data <= bias_change ? typename source_traits::bits_type{} + : limit_exponent(data - bias_change); + } + + static constexpr source_bits limit_exponent(source_bits data) noexcept + { + return data >= static_cast(result_traits::exponent_mask) + ? static_cast(result_traits::exponent_mask) + : data; + } +}; + + +} // namespace detail + + +/** + * A class providing basic support for half precision floating point types. + * + * For now the only features are reduced storage compared to single precision + * and conversions from and to single precision floating point type. + */ +class half { +public: + // create half value from the bits directly. + static constexpr half create_from_bits(uint16 bits) noexcept + { + half result; + result.data_ = bits; + return result; + } + + // TODO: NVHPC (host side) may not use zero initialization for the data + // member by default constructor in some cases. Not sure whether it is + // caused by something else in jacobi or isai. + constexpr half() noexcept : data_(0){}; + + template ::value>> + half(const T val) : data_(0) + { + this->float2half(static_cast(val)); + } + + template + half& operator=(const V val) + { + this->float2half(static_cast(val)); + return *this; + } + + operator float() const noexcept + { + const auto bits = half2float(data_); + float ans(0); + std::memcpy(&ans, &bits, sizeof(float)); + return ans; + } + + // can not use half operator _op(const half) for half + half + // operation will cast it to float and then do float operation such that it + // becomes float in the end. +#define HALF_OPERATOR(_op, _opeq) \ + friend half operator _op(const half lhf, const half rhf) \ + { \ + return static_cast(static_cast(lhf) \ + _op static_cast(rhf)); \ + } \ + half& operator _opeq(const half& hf) \ + { \ + auto result = *this _op hf; \ + data_ = result.data_; \ + return *this; \ + } + HALF_OPERATOR(+, +=) + HALF_OPERATOR(-, -=) + HALF_OPERATOR(*, *=) + HALF_OPERATOR(/, /=) + + // Do operation with different type + // If it is floating point, using floating point as type. + // If it is integer, using half as type +#define HALF_FRIEND_OPERATOR(_op, _opeq) \ + template \ + friend std::enable_if_t< \ + !std::is_same::value && std::is_scalar::value, \ + std::conditional_t::value, T, half>> \ + operator _op(const half hf, const T val) \ + { \ + using type = \ + std::conditional_t::value, T, half>; \ + auto result = static_cast(hf); \ + result _opeq static_cast(val); \ + return result; \ + } \ + template \ + friend std::enable_if_t< \ + !std::is_same::value && std::is_scalar::value, \ + std::conditional_t::value, T, half>> \ + operator _op(const T val, const half hf) \ + { \ + using type = \ + std::conditional_t::value, T, half>; \ + auto result = static_cast(val); \ + result _opeq static_cast(hf); \ + return result; \ + } + + HALF_FRIEND_OPERATOR(+, +=) + HALF_FRIEND_OPERATOR(-, -=) + HALF_FRIEND_OPERATOR(*, *=) + HALF_FRIEND_OPERATOR(/, /=) + + // the negative + half operator-() const + { + auto val = 0.0f - *this; + return static_cast(val); + } + +private: + using f16_traits = detail::float_traits; + using f32_traits = detail::float_traits; + + void float2half(float val) noexcept + { + uint32 bit_val(0); + std::memcpy(&bit_val, &val, sizeof(float)); + data_ = float2half(bit_val); + } + + static constexpr uint16 float2half(uint32 data_) noexcept + { + using conv = detail::precision_converter; + if (f32_traits::is_inf(data_)) { + return conv::shift_sign(data_) | f16_traits::exponent_mask; + } else if (f32_traits::is_nan(data_)) { + return conv::shift_sign(data_) | f16_traits::exponent_mask | + f16_traits::significand_mask; + } else { + const auto exp = conv::shift_exponent(data_); + if (f16_traits::is_inf(exp)) { + return conv::shift_sign(data_) | exp; + } else if (f16_traits::is_denom(exp)) { + // TODO: handle denormals + return conv::shift_sign(data_); + } else { + // Rounding to even + const auto result = conv::shift_sign(data_) | exp | + conv::shift_significand(data_); + const auto tail = + data_ & static_cast( + (1 << conv::significand_offset) - 1); + + constexpr auto half = static_cast( + 1 << (conv::significand_offset - 1)); + return result + + (tail > half || ((tail == half) && (result & 1))); + } + } + } + + static constexpr uint32 half2float(uint16 data_) noexcept + { + using conv = detail::precision_converter; + if (f16_traits::is_inf(data_)) { + return conv::shift_sign(data_) | f32_traits::exponent_mask; + } else if (f16_traits::is_nan(data_)) { + return conv::shift_sign(data_) | f32_traits::exponent_mask | + f32_traits::significand_mask; + } else if (f16_traits::is_denom(data_)) { + // TODO: handle denormals + return conv::shift_sign(data_); + } else { + return conv::shift_sign(data_) | conv::shift_exponent(data_) | + conv::shift_significand(data_); + } + } + + uint16 data_; +}; + + +} // namespace gko + + +namespace std { + + +template <> +class complex { +public: + using value_type = gko::half; + + complex(const value_type& real = value_type(0.f), + const value_type& imag = value_type(0.f)) + : real_(real), imag_(imag) + {} + + template ::value && + std::is_scalar::value>> + explicit complex(const T& real, const U& imag) + : real_(static_cast(real)), + imag_(static_cast(imag)) + {} + + template ::value>> + complex(const T& real) + : real_(static_cast(real)), + imag_(static_cast(0.f)) + {} + + // When using complex(real, imag), MSVC with CUDA try to recognize the + // complex is a member not constructor. + template ::value>> + explicit complex(const complex& other) + : real_(static_cast(other.real())), + imag_(static_cast(other.imag())) + {} + + value_type real() const noexcept { return real_; } + + value_type imag() const noexcept { return imag_; } + + operator std::complex() const noexcept + { + return std::complex(static_cast(real_), + static_cast(imag_)); + } + + template + complex& operator=(const V& val) + { + real_ = val; + imag_ = value_type(); + return *this; + } + + template + complex& operator=(const std::complex& val) + { + real_ = val.real(); + imag_ = val.imag(); + return *this; + } + + complex& operator+=(const value_type& real) + { + real_ += real; + return *this; + } + + complex& operator-=(const value_type& real) + { + real_ -= real; + return *this; + } + + complex& operator*=(const value_type& real) + { + real_ *= real; + imag_ *= real; + return *this; + } + + complex& operator/=(const value_type& real) + { + real_ /= real; + imag_ /= real; + return *this; + } + + template + complex& operator+=(const complex& val) + { + real_ += val.real(); + imag_ += val.imag(); + return *this; + } + + template + complex& operator-=(const complex& val) + { + real_ -= val.real(); + imag_ -= val.imag(); + return *this; + } + + template + complex& operator*=(const complex& val) + { + auto val_f = static_cast>(val); + auto result_f = static_cast>(*this); + result_f *= val_f; + real_ = result_f.real(); + imag_ = result_f.imag(); + return *this; + } + + template + complex& operator/=(const complex& val) + { + auto val_f = static_cast>(val); + auto result_f = static_cast>(*this); + result_f /= val_f; + real_ = result_f.real(); + imag_ = result_f.imag(); + return *this; + } + +// It's for MacOS. +// TODO: check whether mac compiler always use complex version even when real +// half +#define COMPLEX_HALF_OPERATOR(_op, _opeq) \ + friend complex operator _op(const complex lhf, \ + const complex rhf) \ + { \ + auto a = lhf; \ + a _opeq rhf; \ + return a; \ + } + + COMPLEX_HALF_OPERATOR(+, +=) + COMPLEX_HALF_OPERATOR(-, -=) + COMPLEX_HALF_OPERATOR(*, *=) + COMPLEX_HALF_OPERATOR(/, /=) + +private: + value_type real_; + value_type imag_; +}; + + +template <> +struct numeric_limits { + static constexpr bool is_specialized{true}; + static constexpr bool is_signed{true}; + static constexpr bool is_integer{false}; + static constexpr bool is_exact{false}; + static constexpr bool is_bounded{true}; + static constexpr bool is_modulo{false}; + static constexpr int digits{ + gko::detail::float_traits::significand_bits + 1}; + // 3/10 is approx. log_10(2) + static constexpr int digits10{digits * 3 / 10}; + + static constexpr gko::half epsilon() + { + constexpr auto bits = static_cast(0b0'00101'0000000000u); + return gko::half::create_from_bits(bits); + } + + static constexpr gko::half infinity() + { + constexpr auto bits = static_cast(0b0'11111'0000000000u); + return gko::half::create_from_bits(bits); + } + + static constexpr gko::half min() + { + constexpr auto bits = static_cast(0b0'00001'0000000000u); + return gko::half::create_from_bits(bits); + } + + static constexpr gko::half max() + { + constexpr auto bits = static_cast(0b0'11110'1111111111u); + return gko::half::create_from_bits(bits); + } + + static constexpr gko::half lowest() + { + constexpr auto bits = static_cast(0b1'11110'1111111111u); + return gko::half::create_from_bits(bits); + }; + + static constexpr gko::half quiet_NaN() + { + constexpr auto bits = static_cast(0b0'11111'1111111111u); + return gko::half::create_from_bits(bits); + } +}; + + +// complex using a template on operator= for any kind of complex, so we can +// do full specialization for half +template <> +inline complex& complex::operator=( + const std::complex& a) +{ + complex t(a.real(), a.imag()); + operator=(t); + return *this; +} + + +// For MSVC +template <> +inline complex& complex::operator=( + const std::complex& a) +{ + complex t(a.real(), a.imag()); + operator=(t); + return *this; +} + + +} // namespace std + + +#endif // GKO_PUBLIC_CORE_BASE_HALF_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index e9cda520a19..2234232905a 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include From c37b7beeb60241605bf9bde9f8d57cc40151482a Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 23 Oct 2024 00:31:30 +0200 Subject: [PATCH 02/13] half does not have constexpr constructor --- core/test/accessor/reduced_row_major_ginkgo.cpp | 17 ++++++++++------- core/test/utils.hpp | 11 ++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/core/test/accessor/reduced_row_major_ginkgo.cpp b/core/test/accessor/reduced_row_major_ginkgo.cpp index 7acad0b9638..41aaed54457 100644 --- a/core/test/accessor/reduced_row_major_ginkgo.cpp +++ b/core/test/accessor/reduced_row_major_ginkgo.cpp @@ -10,11 +10,12 @@ #include +#include // necessary for gko::half + #include "accessor/index_span.hpp" #include "accessor/range.hpp" #include "accessor/reduced_row_major.hpp" #include "accessor/utils.hpp" -#include "core/base/extended_float.hpp" // necessary for gko::half #include "core/test/utils.hpp" @@ -33,12 +34,7 @@ class ReducedStorage3d : public ::testing::Test { using st_type = typename std::tuple_element<1, decltype(ArithmeticStorageType{})>::type; using rcar_type = gko::acc::remove_complex_t; - static constexpr rcar_type delta{ - std::is_same::value - ? 0 - : std::numeric_limits< - gko::acc::remove_complex_t>::epsilon() * - 1e1}; + static const rcar_type delta; // Type for `check_accessor_correctness` to forward the indices using t = std::tuple; @@ -119,6 +115,13 @@ class ReducedStorage3d : public ::testing::Test { } }; +template +const typename ReducedStorage3d::rcar_type ReducedStorage3d::delta = + std::is_same::value + ? 0 + : std::numeric_limits>::epsilon() * + 1e1; + using ReducedStorage3dTypes = ::testing::Types, std::tuple, std::tuple, std::tuple, diff --git a/core/test/utils.hpp b/core/test/utils.hpp index cacc7191bbf..eee2900d731 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -365,15 +365,16 @@ template struct reduction_factor { using nc_output = remove_complex; using nc_precision = remove_complex; - static constexpr nc_output value{ - std::numeric_limits::epsilon() * nc_output{10} * - (gko::is_complex() ? nc_output{1.4142} : one())}; + + static const nc_output value; }; template -constexpr remove_complex - reduction_factor::value; +const remove_complex + reduction_factor::value = + std::numeric_limits::epsilon() * nc_output{10} * + (gko::is_complex() ? nc_output{1.4142} : one()); } // namespace test From afb108bb92c26ad11993c95f9ae90eb43b1e063d Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Tue, 22 Oct 2024 15:14:06 +0200 Subject: [PATCH 03/13] fix the undefined behavior and the issue from big-endian, and extract half to another test --- core/test/base/CMakeLists.txt | 1 + core/test/base/extended_float.cpp | 232 ++---------------------- core/test/base/half.cpp | 285 ++++++++++++++++++++++++++++++ 3 files changed, 304 insertions(+), 214 deletions(-) create mode 100644 core/test/base/half.cpp diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt index d7deeec6fb7..50306c61455 100644 --- a/core/test/base/CMakeLists.txt +++ b/core/test/base/CMakeLists.txt @@ -14,6 +14,7 @@ ginkgo_create_test(exception EXECUTABLE_NAME exception_test) # exception collide ginkgo_create_test(exception_helpers) ginkgo_create_test(extended_float) ginkgo_create_test(executor) +ginkgo_create_test(half) ginkgo_create_test(index_range) ginkgo_create_test(iterator_factory) ginkgo_create_test(lin_op) diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp index 818843baa38..bdb7a58ed84 100644 --- a/core/test/base/extended_float.cpp +++ b/core/test/base/extended_float.cpp @@ -49,15 +49,28 @@ class ExtendedFloatTestBase : public ::testing::Test { static floating create_from_bits(const char (&s)[N]) { auto bits = std::bitset(s).to_ullong(); - return reinterpret_cast&>(bits); + // We cast to the same size of integer type first. + // Otherwise, the first memory chunk is different when we use + // reinterpret_cast or memcpy to get the smaller type out of unsigned + // long long. + using bits_type = + typename gko::detail::float_traits>::bits_type; + auto bits_val = static_cast(bits); + floating result; + static_assert(sizeof(floating) == sizeof(bits_type), + "the type should have the same size as its bits_type"); + std::memcpy(&result, &bits_val, sizeof(bits_type)); + return result; } template static std::bitset get_bits(T val) { - auto bits = - reinterpret_cast::bits_type&>( - val); + using bits_type = typename gko::detail::float_traits::bits_type; + bits_type bits; + static_assert(sizeof(T) == sizeof(bits_type), + "the type should have the same size as its bits_type"); + std::memcpy(&bits, &val, sizeof(T)); return std::bitset(bits); } @@ -69,218 +82,9 @@ class ExtendedFloatTestBase : public ::testing::Test { }; -class FloatToHalf : public ExtendedFloatTestBase {}; - - -// clang-format does terrible formatting of string literal concatenation -// clang-format off - - -TEST_F(FloatToHalf, ConvertsOne) -{ - half x = create_from_bits("0" "01111111" "00000000000000000000000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "01111" "0000000000")); -} - - -TEST_F(FloatToHalf, ConvertsZero) -{ - half x = create_from_bits("0" "00000000" "00000000000000000000000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000")); -} - - -TEST_F(FloatToHalf, ConvertsInf) -{ - half x = create_from_bits("0" "11111111" "00000000000000000000000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000")); -} - - -TEST_F(FloatToHalf, ConvertsNegInf) -{ - half x = create_from_bits("1" "11111111" "00000000000000000000000"); - - ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000")); -} - - -TEST_F(FloatToHalf, ConvertsNan) -{ - half x = create_from_bits("0" "11111111" "00000000000000000000001"); - - #if defined(SYCL_LANGUAGE_VERSION) - // Sycl put the 1000000000, but ours put mask - ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000")); - #else - ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111")); - #endif -} - - -TEST_F(FloatToHalf, ConvertsNegNan) -{ - half x = create_from_bits("1" "11111111" "00010000000000000000000"); - - #if defined(SYCL_LANGUAGE_VERSION) - // Sycl put the 1000000000, but ours put mask - ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000")); - #else - ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111")); - #endif -} - - -TEST_F(FloatToHalf, FlushesToZero) -{ - half x = create_from_bits("0" "00000111" "00010001000100000001000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000")); -} - - -TEST_F(FloatToHalf, FlushesToNegZero) -{ - half x = create_from_bits("1" "00000010" "00010001000100000001000"); - - ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000000000")); -} - - -TEST_F(FloatToHalf, FlushesToInf) -{ - half x = create_from_bits("0" "10100000" "10010000000000010000100"); - - ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000")); -} - - -TEST_F(FloatToHalf, FlushesToNegInf) -{ - half x = create_from_bits("1" "11000000" "10010000000000010000100"); - - ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000")); -} - - -TEST_F(FloatToHalf, TruncatesSmallNumber) -{ - half x = create_from_bits("0" "01110001" "10010000000000010000100"); - - ASSERT_EQ(get_bits(x), get_bits("0" "00001" "1001000000")); -} - - -TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven) -{ - half neg_x = create_from_bits("1" "10001110" "10010011111000010000100"); - half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100"); - half x = create_from_bits("0" "10001110" "10010011111000010000100"); - half x2 = create_from_bits("0" "10001110" "10010011101000010000100"); - half x3 = create_from_bits("0" "10001110" "10010011101000000000000"); - half x4 = create_from_bits("0" "10001110" "10010011111000000000000"); - - EXPECT_EQ(get_bits(x), get_bits("0" "11110" "1001010000")); - EXPECT_EQ(get_bits(x2), get_bits("0" "11110" "1001001111")); - EXPECT_EQ(get_bits(x3), get_bits("0" "11110" "1001001110")); - EXPECT_EQ(get_bits(x4), get_bits("0" "11110" "1001010000")); - EXPECT_EQ(get_bits(neg_x), get_bits("1" "11110" "1001010000")); - EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111")); -} - - -// clang-format on - - -class HalfToFloat : public ExtendedFloatTestBase {}; - - -// clang-format off - - -TEST_F(HalfToFloat, ConvertsOne) -{ - float x = create_from_bits("0" "01111" "0000000000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "01111111" "00000000000000000000000")); -} - - -TEST_F(HalfToFloat, ConvertsZero) -{ - float x = create_from_bits("0" "00000" "0000000000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "00000000" "00000000000000000000000")); -} - - -TEST_F(HalfToFloat, ConvertsInf) -{ - float x = create_from_bits("0" "11111" "0000000000"); - - ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00000000000000000000000")); -} - - -TEST_F(HalfToFloat, ConvertsNegInf) -{ - float x = create_from_bits("1" "11111" "0000000000"); - - ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000000000000000000")); -} - - -TEST_F(HalfToFloat, ConvertsNan) -{ - float x = create_from_bits("0" "11111" "0001001000"); - - #if defined(SYCL_LANGUAGE_VERSION) - // sycl keeps significand - ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000")); - #else - ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111")); - #endif -} - - -TEST_F(HalfToFloat, ConvertsNegNan) -{ - float x = create_from_bits("1" "11111" "0000000001"); - - #if defined(SYCL_LANGUAGE_VERSION) - // sycl keeps significand - ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000")); - #else - ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111")); - #endif -} - - -TEST_F(HalfToFloat, ExtendsSmallNumber) -{ - float x = create_from_bits("0" "00001" "1000010001"); - - ASSERT_EQ(get_bits(x), get_bits("0" "01110001" "10000100010000000000000")); -} - - -TEST_F(HalfToFloat, ExtendsLargeNumber) -{ - float x = create_from_bits("1" "11110" "1001001111"); - - ASSERT_EQ(get_bits(x), get_bits("1" "10001110" "10010011110000000000000")); -} - - -// clang-format on - - class TruncatedDouble : public ExtendedFloatTestBase {}; - +// clang-format does terrible formatting of string literal concatenation // clang-format off diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp new file mode 100644 index 00000000000..f20bac0d47a --- /dev/null +++ b/core/test/base/half.cpp @@ -0,0 +1,285 @@ +// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include +#include +#include + +#include + +#include + + +template +struct floating_impl; + +template <> +struct floating_impl<16> { + using type = gko::half; +}; + +template <> +struct floating_impl<32> { + using type = float; +}; + +template <> +struct floating_impl<64> { + using type = double; +}; + +template +using floating = typename floating_impl::type; + + +class ExtendedFloatTestBase : public ::testing::Test { +protected: + using half = gko::half; + + static constexpr auto byte_size = gko::byte_size; + + template + static floating create_from_bits(const char (&s)[N]) + { + auto bits = std::bitset(s).to_ullong(); + // We cast to the same size of integer type first. + // Otherwise, the first memory chunk is different when we use + // reinterpret_cast or memcpy to get the smaller type out of unsigned + // long long. + using bits_type = + typename gko::detail::float_traits>::bits_type; + auto bits_val = static_cast(bits); + floating result; + static_assert(sizeof(floating) == sizeof(bits_type), + "the type should have the same size as its bits_type"); + std::memcpy(&result, &bits_val, sizeof(bits_type)); + return result; + } + + template + static std::bitset get_bits(T val) + { + using bits_type = typename gko::detail::float_traits::bits_type; + bits_type bits; + static_assert(sizeof(T) == sizeof(bits_type), + "the type should have the same size as its bits_type"); + std::memcpy(&bits, &val, sizeof(T)); + return std::bitset(bits); + } + + template + static std::bitset get_bits(const char (&s)[N]) + { + return std::bitset(s); + } +}; + + +class FloatToHalf : public ExtendedFloatTestBase {}; + + +// clang-format does terrible formatting of string literal concatenation +// clang-format off + + +TEST_F(FloatToHalf, ConvertsOne) +{ + half x = create_from_bits("0" "01111111" "00000000000000000000000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "01111" "0000000000")); +} + + +TEST_F(FloatToHalf, ConvertsZero) +{ + half x = create_from_bits("0" "00000000" "00000000000000000000000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000")); +} + + +TEST_F(FloatToHalf, ConvertsInf) +{ + half x = create_from_bits("0" "11111111" "00000000000000000000000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000")); +} + + +TEST_F(FloatToHalf, ConvertsNegInf) +{ + half x = create_from_bits("1" "11111111" "00000000000000000000000"); + + ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000")); +} + + +TEST_F(FloatToHalf, ConvertsNan) +{ + half x = create_from_bits("0" "11111111" "00000000000000000000001"); + + #if defined(SYCL_LANGUAGE_VERSION) + // Sycl put the 1000000000, but ours put mask + ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000")); + #else + ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111")); + #endif +} + + +TEST_F(FloatToHalf, ConvertsNegNan) +{ + half x = create_from_bits("1" "11111111" "00010000000000000000000"); + + #if defined(SYCL_LANGUAGE_VERSION) + // Sycl put the 1000000000, but ours put mask + ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000")); + #else + ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111")); + #endif +} + + +TEST_F(FloatToHalf, FlushesToZero) +{ + half x = create_from_bits("0" "00000111" "00010001000100000001000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000")); +} + + +TEST_F(FloatToHalf, FlushesToNegZero) +{ + half x = create_from_bits("1" "00000010" "00010001000100000001000"); + + ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000000000")); +} + + +TEST_F(FloatToHalf, FlushesToInf) +{ + half x = create_from_bits("0" "10100000" "10010000000000010000100"); + + ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000")); +} + + +TEST_F(FloatToHalf, FlushesToNegInf) +{ + half x = create_from_bits("1" "11000000" "10010000000000010000100"); + + ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000")); +} + + +TEST_F(FloatToHalf, TruncatesSmallNumber) +{ + half x = create_from_bits("0" "01110001" "10010000000000010000100"); + + ASSERT_EQ(get_bits(x), get_bits("0" "00001" "1001000000")); +} + + +TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven) +{ + half neg_x = create_from_bits("1" "10001110" "10010011111000010000100"); + half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100"); + half x = create_from_bits("0" "10001110" "10010011111000010000100"); + half x2 = create_from_bits("0" "10001110" "10010011101000010000100"); + half x3 = create_from_bits("0" "10001110" "10010011101000000000000"); + half x4 = create_from_bits("0" "10001110" "10010011111000000000000"); + + EXPECT_EQ(get_bits(x), get_bits("0" "11110" "1001010000")); + EXPECT_EQ(get_bits(x2), get_bits("0" "11110" "1001001111")); + EXPECT_EQ(get_bits(x3), get_bits("0" "11110" "1001001110")); + EXPECT_EQ(get_bits(x4), get_bits("0" "11110" "1001010000")); + EXPECT_EQ(get_bits(neg_x), get_bits("1" "11110" "1001010000")); + EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111")); +} + + +// clang-format on + + +class HalfToFloat : public ExtendedFloatTestBase {}; + + +// clang-format off + + +TEST_F(HalfToFloat, ConvertsOne) +{ + float x = create_from_bits("0" "01111" "0000000000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "01111111" "00000000000000000000000")); +} + + +TEST_F(HalfToFloat, ConvertsZero) +{ + float x = create_from_bits("0" "00000" "0000000000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "00000000" "00000000000000000000000")); +} + + +TEST_F(HalfToFloat, ConvertsInf) +{ + float x = create_from_bits("0" "11111" "0000000000"); + + ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00000000000000000000000")); +} + + +TEST_F(HalfToFloat, ConvertsNegInf) +{ + float x = create_from_bits("1" "11111" "0000000000"); + + ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000000000000000000")); +} + + +TEST_F(HalfToFloat, ConvertsNan) +{ + float x = create_from_bits("0" "11111" "0001001000"); + + #if defined(SYCL_LANGUAGE_VERSION) + // sycl keeps significand + ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000")); + #else + ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111")); + #endif +} + + +TEST_F(HalfToFloat, ConvertsNegNan) +{ + float x = create_from_bits("1" "11111" "0000000001"); + + #if defined(SYCL_LANGUAGE_VERSION) + // sycl keeps significand + ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000")); + #else + ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111")); + #endif +} + + +TEST_F(HalfToFloat, ExtendsSmallNumber) +{ + float x = create_from_bits("0" "00001" "1000010001"); + + ASSERT_EQ(get_bits(x), get_bits("0" "01110001" "10000100010000000000000")); +} + + +TEST_F(HalfToFloat, ExtendsLargeNumber) +{ + float x = create_from_bits("1" "11110" "1001001111"); + + ASSERT_EQ(get_bits(x), get_bits("1" "10001110" "10010011110000000000000")); +} + + +// clang-format on From 3b46e410b45d9f706bbdc01631e4953255a01250 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Tue, 22 Oct 2024 17:34:34 +0200 Subject: [PATCH 04/13] jacobi use __half in device not gko::half now --- .../jacobi_advanced_apply_kernels.instantiate.cpp | 2 +- .../jacobi_generate_kernels.instantiate.cpp | 10 +++++----- common/cuda_hip/preconditioner/jacobi_kernels.cpp | 4 ++-- .../jacobi_simple_apply_kernels.instantiate.cpp | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp index 0ecc3d0d44b..131c530d2ee 100644 --- a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp +++ b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp @@ -90,7 +90,7 @@ __launch_bounds__(warps_per_block* config::warp_size) advanced_adaptive_apply( ValueType, block_precisions[block_id], multiply_vec( subwarp, block_size, v, - reinterpret_cast( + reinterpret_cast*>( blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id) + subwarp.thread_rank(), diff --git a/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp b/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp index d004309c622..fdb0ad11e9e 100644 --- a/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp +++ b/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp @@ -68,7 +68,7 @@ __device__ __forceinline__ bool validate_precision_reduction_feasibility( } } - return succeeded && block_cond >= 1.0 && + return succeeded && block_cond >= remove_complex{1.0} && block_cond * static_cast>( float_traits>::eps) < remove_complex{1e-3}; @@ -160,7 +160,7 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_generate( accuracy, block_cond, [&subwarp, &block_size, &row, &block_data, &storage_scheme, &block_id] { - using target = reduce_precision; + using target = device_type>; return validate_precision_reduction_feasibility< max_block_size, target>( subwarp, block_size, row, @@ -170,8 +170,8 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_generate( }, [&subwarp, &block_size, &row, &block_data, &storage_scheme, &block_id] { - using target = - reduce_precision>; + using target = device_type< + reduce_precision>>; return validate_precision_reduction_feasibility< max_block_size, target>( subwarp, block_size, row, @@ -195,7 +195,7 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_generate( ValueType, prec, copy_matrix( subwarp, block_size, row, 1, perm, trans_perm, - reinterpret_cast( + reinterpret_cast*>( block_data + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id), storage_scheme.get_stride())); diff --git a/common/cuda_hip/preconditioner/jacobi_kernels.cpp b/common/cuda_hip/preconditioner/jacobi_kernels.cpp index f3b099e7c18..6f2d4ae3974 100644 --- a/common/cuda_hip/preconditioner/jacobi_kernels.cpp +++ b/common/cuda_hip/preconditioner/jacobi_kernels.cpp @@ -206,11 +206,11 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_transpose_jacobi( GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, block_precisions[block_id], auto local_block = - reinterpret_cast( + reinterpret_cast*>( blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id); auto local_out_block = - reinterpret_cast( + reinterpret_cast*>( out_blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id); for (int i = rank; i < block_size * block_size; i += subwarp_size) { diff --git a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp index 734385970e3..faf869718a6 100644 --- a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp +++ b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp @@ -84,7 +84,7 @@ __global__ void __launch_bounds__(warps_per_block* config::warp_size) ValueType, block_precisions[block_id], multiply_vec( subwarp, block_size, v, - reinterpret_cast( + reinterpret_cast*>( blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id) + subwarp.thread_rank(), From 685331c0c8feedddf0ab41a683f6bd62460cc435 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Tue, 22 Oct 2024 18:02:57 +0200 Subject: [PATCH 05/13] type map --- accessor/cuda_helper.hpp | 15 ++++++++++++- accessor/hip_helper.hpp | 14 +++++++++++- cuda/base/types.hpp | 36 +++++++++++++++++++++++++------ hip/base/types.hip.hpp | 33 ++++++++++++++++++++++++---- include/ginkgo/core/base/math.hpp | 9 +++++++- 5 files changed, 94 insertions(+), 13 deletions(-) diff --git a/accessor/cuda_helper.hpp b/accessor/cuda_helper.hpp index 31d3599516d..3efc6eb22b7 100644 --- a/accessor/cuda_helper.hpp +++ b/accessor/cuda_helper.hpp @@ -17,7 +17,15 @@ #include "utils.hpp" +struct __half; + + namespace gko { + + +class half; + + namespace acc { namespace detail { @@ -27,6 +35,11 @@ struct cuda_type { using type = T; }; +template <> +struct cuda_type { + using type = __half; +}; + // Unpack cv and reference / pointer qualifiers template struct cuda_type { @@ -57,7 +70,7 @@ struct cuda_type { // Transform std::complex to thrust::complex template struct cuda_type> { - using type = thrust::complex; + using type = thrust::complex::type>; }; diff --git a/accessor/hip_helper.hpp b/accessor/hip_helper.hpp index 6b76b726c10..8827fd6eb11 100644 --- a/accessor/hip_helper.hpp +++ b/accessor/hip_helper.hpp @@ -17,7 +17,15 @@ #include "utils.hpp" +struct __half; + + namespace gko { + + +class half; + + namespace acc { namespace detail { @@ -53,11 +61,15 @@ struct hip_type { using type = typename hip_type::type&&; }; +template <> +struct hip_type { + using type = __half; +}; // Transform std::complex to thrust::complex template struct hip_type> { - using type = thrust::complex; + using type = thrust::complex::type>; }; diff --git a/cuda/base/types.hpp b/cuda/base/types.hpp index a4a2b877c28..05f07ceb8dd 100644 --- a/cuda/base/types.hpp +++ b/cuda/base/types.hpp @@ -14,20 +14,17 @@ #include #include +#include #include #include namespace gko { - namespace kernels { namespace cuda { - - namespace detail { - /** * @internal * @@ -124,6 +121,17 @@ struct culibs_type_impl> { using type = cuDoubleComplex; }; + +template <> +struct culibs_type_impl { + using type = __half; +}; + +template <> +struct culibs_type_impl> { + using type = __half2; +}; + template struct culibs_type_impl> { using type = typename culibs_type_impl>::type; @@ -154,9 +162,14 @@ struct cuda_type_impl { using type = volatile typename cuda_type_impl::type; }; +template <> +struct cuda_type_impl { + using type = __half; +}; + template struct cuda_type_impl> { - using type = thrust::complex; + using type = thrust::complex::type>; }; template <> @@ -169,6 +182,11 @@ struct cuda_type_impl { using type = thrust::complex; }; +template <> +struct cuda_type_impl<__half2> { + using type = thrust::complex<__half>; +}; + template struct cuda_struct_member_type_impl { using type = T; @@ -176,7 +194,12 @@ struct cuda_struct_member_type_impl { template struct cuda_struct_member_type_impl> { - using type = fake_complex; + using type = fake_complex::type>; +}; + +template <> +struct cuda_struct_member_type_impl { + using type = __half; }; template @@ -200,6 +223,7 @@ GKO_CUDA_DATA_TYPE(float, CUDA_R_32F); GKO_CUDA_DATA_TYPE(double, CUDA_R_64F); GKO_CUDA_DATA_TYPE(std::complex, CUDA_C_32F); GKO_CUDA_DATA_TYPE(std::complex, CUDA_C_64F); +GKO_CUDA_DATA_TYPE(std::complex, CUDA_C_16F); GKO_CUDA_DATA_TYPE(int32, CUDA_R_32I); GKO_CUDA_DATA_TYPE(int8, CUDA_R_8I); diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp index bb0d4a2d0c9..c3982b7562e 100644 --- a/hip/base/types.hip.hpp +++ b/hip/base/types.hip.hpp @@ -21,14 +21,13 @@ #endif #include +#include #include #include "common/cuda_hip/base/runtime.hpp" namespace gko { - - namespace kernels { namespace hip { namespace detail { @@ -130,6 +129,17 @@ struct hiplibs_type_impl> { using type = hipDoubleComplex; }; +template <> +struct hiplibs_type_impl { + using type = __half; +}; + +template <> +struct hiplibs_type_impl> { + using type = __half2; +}; + + template struct hiplibs_type_impl> { using type = typename hiplibs_type_impl>::type; @@ -202,9 +212,14 @@ struct hip_type_impl { using type = volatile typename hip_type_impl::type; }; +template <> +struct hip_type_impl { + using type = __half; +}; + template struct hip_type_impl> { - using type = thrust::complex; + using type = thrust::complex::type>; }; template <> @@ -217,6 +232,11 @@ struct hip_type_impl { using type = thrust::complex; }; +template <> +struct hip_type_impl<__half2> { + using type = thrust::complex<__half>; +}; + template struct hip_struct_member_type_impl { using type = T; @@ -224,7 +244,12 @@ struct hip_struct_member_type_impl { template struct hip_struct_member_type_impl> { - using type = fake_complex; + using type = fake_complex::type>; +}; + +template <> +struct hip_struct_member_type_impl { + using type = __half; }; template diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp index cd5e489b95d..5e15bb05d6a 100644 --- a/include/ginkgo/core/base/math.hpp +++ b/include/ginkgo/core/base/math.hpp @@ -21,6 +21,9 @@ namespace gko { +class half; + + // HIP should not see std::abs or std::sqrt, we want the custom implementation. // Hence, provide the using declaration only for some cases namespace kernels { @@ -151,8 +154,12 @@ struct is_complex_impl> template struct is_complex_or_scalar_impl : std::is_scalar {}; +template <> +struct is_complex_or_scalar_impl : std::true_type {}; + template -struct is_complex_or_scalar_impl> : std::is_scalar {}; +struct is_complex_or_scalar_impl> + : is_complex_or_scalar_impl {}; /** From 9c51790640fca65ec8d8a1c76ab27aa7478f7a08 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 23 Oct 2024 13:50:49 +0200 Subject: [PATCH 06/13] fix error: non-constant-expression cannot be narrowed --- reference/test/base/batch_multi_vector_kernels.cpp | 4 ++-- reference/test/matrix/coo_kernels.cpp | 14 ++++++++------ reference/test/matrix/csr_kernels.cpp | 14 ++++++++------ reference/test/matrix/dense_kernels.cpp | 4 ++-- reference/test/matrix/diagonal_kernels.cpp | 14 ++++++++------ reference/test/matrix/ell_kernels.cpp | 14 ++++++++------ reference/test/matrix/fbcsr_kernels.cpp | 14 ++++++++------ reference/test/matrix/hybrid_kernels.cpp | 14 ++++++++------ reference/test/matrix/sellp_kernels.cpp | 14 ++++++++------ test/mpi/matrix.cpp | 4 ++-- 10 files changed, 62 insertions(+), 48 deletions(-) diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp index e673046a490..694ae491ef4 100644 --- a/reference/test/base/batch_multi_vector_kernels.cpp +++ b/reference/test/base/batch_multi_vector_kernels.cpp @@ -349,7 +349,7 @@ TYPED_TEST(MultiVector, ConvertsToPrecision) // If OtherT is more precise: 0, otherwise r auto residual = r::value < r::value ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + : static_cast>(r::value); this->mtx_1->convert_to(tmp.get()); tmp->convert_to(res.get()); @@ -373,7 +373,7 @@ TYPED_TEST(MultiVector, MovesToPrecision) // If OtherT is more precise: 0, otherwise r auto residual = r::value < r::value ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + : static_cast>(r::value); this->mtx_1->move_to(tmp.get()); tmp->move_to(res.get()); diff --git a/reference/test/matrix/coo_kernels.cpp b/reference/test/matrix/coo_kernels.cpp index 42b68d1cb4c..fcca61a33d4 100644 --- a/reference/test/matrix/coo_kernels.cpp +++ b/reference/test/matrix/coo_kernels.cpp @@ -85,9 +85,10 @@ TYPED_TEST(Coo, ConvertsToPrecision) auto tmp = OtherCoo::create(this->exec); auto res = Coo::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx->convert_to(tmp); tmp->convert_to(res); @@ -106,9 +107,10 @@ TYPED_TEST(Coo, MovesToPrecision) auto tmp = OtherCoo::create(this->exec); auto res = Coo::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx->move_to(tmp); tmp->move_to(res); diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 2d4c61786ad..2dd68bd9239 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -794,9 +794,10 @@ TYPED_TEST(Csr, ConvertsToPrecision) auto tmp = OtherCsr::create(this->exec); auto res = Csr::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); // use mtx2 as mtx's strategy would involve creating a CudaExecutor this->mtx2->convert_to(tmp); @@ -819,9 +820,10 @@ TYPED_TEST(Csr, MovesToPrecision) auto tmp = OtherCsr::create(this->exec); auto res = Csr::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); // use mtx2 as mtx's strategy would involve creating a CudaExecutor this->mtx2->move_to(tmp); diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 41294c89d49..51b0aa148fd 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -753,7 +753,7 @@ TYPED_TEST(Dense, ConvertsToPrecision) // If OtherT is more precise: 0, otherwise r auto residual = r::value < r::value ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + : static_cast>(r::value); this->mtx1->convert_to(tmp); tmp->convert_to(res); @@ -773,7 +773,7 @@ TYPED_TEST(Dense, MovesToPrecision) // If OtherT is more precise: 0, otherwise r auto residual = r::value < r::value ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + : static_cast>(r::value); this->mtx1->move_to(tmp); tmp->move_to(res); diff --git a/reference/test/matrix/diagonal_kernels.cpp b/reference/test/matrix/diagonal_kernels.cpp index 208c9d98639..b0932c7eb66 100644 --- a/reference/test/matrix/diagonal_kernels.cpp +++ b/reference/test/matrix/diagonal_kernels.cpp @@ -91,9 +91,10 @@ TYPED_TEST(Diagonal, ConvertsToPrecision) auto tmp = OtherDiagonal::create(this->exec); auto res = Diagonal::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->diag1->convert_to(tmp); tmp->convert_to(res); @@ -111,9 +112,10 @@ TYPED_TEST(Diagonal, MovesToPrecision) auto tmp = OtherDiagonal::create(this->exec); auto res = Diagonal::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->diag1->move_to(tmp); tmp->move_to(res); diff --git a/reference/test/matrix/ell_kernels.cpp b/reference/test/matrix/ell_kernels.cpp index c96dcae773a..e1eef9f087c 100644 --- a/reference/test/matrix/ell_kernels.cpp +++ b/reference/test/matrix/ell_kernels.cpp @@ -449,9 +449,10 @@ TYPED_TEST(Ell, ConvertsToPrecision) auto tmp = OtherEll::create(this->exec); auto res = Ell::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx1->convert_to(tmp); tmp->convert_to(res); @@ -470,9 +471,10 @@ TYPED_TEST(Ell, MovesToPrecision) auto tmp = OtherEll::create(this->exec); auto res = Ell::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx1->move_to(tmp); tmp->move_to(res); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index cd82bade8b7..f7c6d2197ef 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -277,9 +277,10 @@ TYPED_TEST(Fbcsr, ConvertsToPrecision) auto tmp = OtherFbcsr::create(this->exec); auto res = Fbcsr::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx->convert_to(tmp); tmp->convert_to(res); @@ -298,9 +299,10 @@ TYPED_TEST(Fbcsr, MovesToPrecision) auto tmp = OtherFbcsr::create(this->exec); auto res = Fbcsr::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx->move_to(tmp); tmp->move_to(res); diff --git a/reference/test/matrix/hybrid_kernels.cpp b/reference/test/matrix/hybrid_kernels.cpp index 014b5bb1024..754e599b8fe 100644 --- a/reference/test/matrix/hybrid_kernels.cpp +++ b/reference/test/matrix/hybrid_kernels.cpp @@ -239,9 +239,10 @@ TYPED_TEST(Hybrid, ConvertsToPrecision) auto tmp = OtherHybrid::create(this->exec); auto res = Hybrid::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx1->convert_to(tmp); tmp->convert_to(res); @@ -260,9 +261,10 @@ TYPED_TEST(Hybrid, MovesToPrecision) auto tmp = OtherHybrid::create(this->exec); auto res = Hybrid::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx1->move_to(tmp); tmp->move_to(res); diff --git a/reference/test/matrix/sellp_kernels.cpp b/reference/test/matrix/sellp_kernels.cpp index 18cf793c7f3..a39d8e16832 100644 --- a/reference/test/matrix/sellp_kernels.cpp +++ b/reference/test/matrix/sellp_kernels.cpp @@ -195,9 +195,10 @@ TYPED_TEST(Sellp, ConvertsToPrecision) auto tmp = OtherSellp::create(this->exec); auto res = Sellp::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx1->convert_to(tmp); tmp->convert_to(res); @@ -216,9 +217,10 @@ TYPED_TEST(Sellp, MovesToPrecision) auto tmp = OtherSellp::create(this->exec); auto res = Sellp::create(this->exec); // If OtherType is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + auto residual = + r::value < r::value + ? gko::remove_complex{0} + : static_cast>(r::value); this->mtx1->move_to(tmp); tmp->move_to(res); diff --git a/test/mpi/matrix.cpp b/test/mpi/matrix.cpp index 0cfb3aca477..88fe4092668 100644 --- a/test/mpi/matrix.cpp +++ b/test/mpi/matrix.cpp @@ -741,7 +741,7 @@ TYPED_TEST(Matrix, CanConvertToNextPrecision) // If OtherT is more precise: 0, otherwise r auto residual = r::value < r::value ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + : static_cast>(r::value); this->dist_mat->convert_to(tmp); tmp->convert_to(res); @@ -768,7 +768,7 @@ TYPED_TEST(Matrix, CanMoveToNextPrecision) // If OtherT is more precise: 0, otherwise r auto residual = r::value < r::value ? gko::remove_complex{0} - : gko::remove_complex{r::value}; + : static_cast>(r::value); this->dist_mat->move_to(tmp); tmp->convert_to(res); From 6317e3f67fcc8a304eda5172372cbd215ef62243 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 23 Oct 2024 13:57:12 +0200 Subject: [PATCH 07/13] update gdb-ginkgo Co-authored-by: Marcel Koch --- dev_tools/scripts/gdb-ginkgo.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dev_tools/scripts/gdb-ginkgo.py b/dev_tools/scripts/gdb-ginkgo.py index d3de8f09a25..122d177031f 100644 --- a/dev_tools/scripts/gdb-ginkgo.py +++ b/dev_tools/scripts/gdb-ginkgo.py @@ -51,6 +51,7 @@ def next(self): _versioned_namespace = '__8::' + # new version adapted from https://gcc.gnu.org/pipermail/gcc-cvs/2021-November/356230.html # necessary due to empty class optimization def is_specialization_of(x, template_name): @@ -64,6 +65,7 @@ def is_specialization_of(x, template_name): expr = '^std::{}<.*>$'.format(template_name) return re.match(expr, x) is not None + def get_template_arg_list(type_obj): "Return a type's template arguments as a list" n = 0 @@ -75,6 +77,7 @@ def get_template_arg_list(type_obj): return template_args n += 1 + def _tuple_impl_get(val): "Return the tuple element stored in a _Tuple_impl base class." bases = val.type.fields() @@ -95,6 +98,7 @@ def _tuple_impl_get(val): else: raise ValueError("Unsupported implementation for std::tuple: %s" % str(val.type)) + def tuple_get(n, val): "Return the result of std::get(val) on a std::tuple" tuple_size = len(get_template_arg_list(val.type)) @@ -108,6 +112,7 @@ def tuple_get(n, val): n -= 1 return _tuple_impl_get(node) + def get_unique_ptr_data_ptr(val): "Return the result of val.get() on a std::unique_ptr" # std::unique_ptr contains a std::tuple, @@ -220,12 +225,28 @@ def display_hint(self): return 'array' +class GkoHalfPrinter: + "Print a gko::half" + + def __init__(self, val): + # GDB doesn't seem to consider the user-defined conversion in its Value.cast, + # so we need to call the conversion operator explicitly + address = hex(val.address) + self.float_val = gdb.parse_and_eval(f"reinterpret_cast({address})->operator float()") + + def to_string(self): + self.float_val.fetch_lazy() + return self.float_val + + def lookup_type(val): if not str(val.type.unqualified()).startswith('gko::'): return None suffix = str(val.type.unqualified())[5:] if suffix.startswith('array<') and val.type.code == gdb.TYPE_CODE_STRUCT: return GkoArrayPrinter(val) + if suffix.startswith("half") and val.type.code == gdb.TYPE_CODE_STRUCT: + return GkoHalfPrinter(val) return None From b8f45849ec01dfb288e391e91327343b9128dd23 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Thu, 24 Oct 2024 02:28:45 +0200 Subject: [PATCH 08/13] make half not rely on type --- core/test/base/half.cpp | 2 +- include/ginkgo/core/base/half.hpp | 51 ++++++++++++++++-------------- include/ginkgo/core/base/types.hpp | 5 ++- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp index f20bac0d47a..51d3e60ce40 100644 --- a/core/test/base/half.cpp +++ b/core/test/base/half.cpp @@ -37,7 +37,7 @@ class ExtendedFloatTestBase : public ::testing::Test { protected: using half = gko::half; - static constexpr auto byte_size = gko::byte_size; + static constexpr auto byte_size = gko::detail::byte_size; template static floating create_from_bits(const char (&s)[N]) diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp index 25a38abb6eb..fb5761c51fb 100644 --- a/include/ginkgo/core/base/half.hpp +++ b/include/ginkgo/core/base/half.hpp @@ -6,13 +6,12 @@ #define GKO_PUBLIC_CORE_BASE_HALF_HPP_ +#include #include +#include #include #include -#include -#include - class __half; @@ -20,29 +19,34 @@ class __half; namespace gko { -template +template class truncated; +class half; + + namespace detail { +constexpr std::size_t byte_size = CHAR_BIT; + template struct uint_of_impl {}; template struct uint_of_impl> { - using type = uint16; + using type = std::uint16_t; }; template struct uint_of_impl> { - using type = uint32; + using type = std::uint32_t; }; template struct uint_of_impl> { - using type = uint64; + using type = std::uint64_t; }; template @@ -53,8 +57,8 @@ template struct basic_float_traits {}; template <> -struct basic_float_traits { - using type = float16; +struct basic_float_traits { + using type = half; static constexpr int sign_bits = 1; static constexpr int significand_bits = 10; static constexpr int exponent_bits = 5; @@ -71,8 +75,8 @@ struct basic_float_traits<__half> { }; template <> -struct basic_float_traits { - using type = float32; +struct basic_float_traits { + using type = float; static constexpr int sign_bits = 1; static constexpr int significand_bits = 23; static constexpr int exponent_bits = 8; @@ -80,15 +84,16 @@ struct basic_float_traits { }; template <> -struct basic_float_traits { - using type = float64; +struct basic_float_traits { + using type = double; static constexpr int sign_bits = 1; static constexpr int significand_bits = 52; static constexpr int exponent_bits = 11; static constexpr bool rounds_to_nearest = true; }; -template +template struct basic_float_traits> { using type = truncated; static constexpr int sign_bits = ComponentId == 0 ? 1 : 0; @@ -281,7 +286,7 @@ struct precision_converter { class half { public: // create half value from the bits directly. - static constexpr half create_from_bits(uint16 bits) noexcept + static constexpr half create_from_bits(std::uint16_t bits) noexcept { half result; result.data_ = bits; @@ -376,19 +381,19 @@ class half { } private: - using f16_traits = detail::float_traits; - using f32_traits = detail::float_traits; + using f16_traits = detail::float_traits; + using f32_traits = detail::float_traits; void float2half(float val) noexcept { - uint32 bit_val(0); + std::uint32_t bit_val(0); std::memcpy(&bit_val, &val, sizeof(float)); data_ = float2half(bit_val); } - static constexpr uint16 float2half(uint32 data_) noexcept + static constexpr std::uint16_t float2half(std::uint32_t data_) noexcept { - using conv = detail::precision_converter; + using conv = detail::precision_converter; if (f32_traits::is_inf(data_)) { return conv::shift_sign(data_) | f16_traits::exponent_mask; } else if (f32_traits::is_nan(data_)) { @@ -417,9 +422,9 @@ class half { } } - static constexpr uint32 half2float(uint16 data_) noexcept + static constexpr std::uint32_t half2float(std::uint16_t data_) noexcept { - using conv = detail::precision_converter; + using conv = detail::precision_converter; if (f16_traits::is_inf(data_)) { return conv::shift_sign(data_) | f32_traits::exponent_mask; } else if (f16_traits::is_nan(data_)) { @@ -434,7 +439,7 @@ class half { } } - uint16 data_; + std::uint16_t data_; }; diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 72dd8a93584..1d5963c0fe8 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -17,6 +17,8 @@ #include #include +#include + #ifdef __HIPCC__ #include @@ -138,9 +140,6 @@ using uint64 = std::uint64_t; using uintptr = std::uintptr_t; -class half; - - /** * Half precision floating point type. */ From 8508fbb97010e1b8a819999a5989c61b3bf4c4f8 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Thu, 24 Oct 2024 14:02:22 +0200 Subject: [PATCH 09/13] collect the reused part and undef after usage Co-authored-by: Marcel Koch --- .pre-commit-config.yaml | 4 + core/test/base/extended_float.cpp | 108 ++++------------------ core/test/base/floating_bit_helper.hpp | 82 +++++++++++++++++ core/test/base/half.cpp | 123 +++++-------------------- include/ginkgo/core/base/half.hpp | 7 ++ 5 files changed, 133 insertions(+), 191 deletions(-) create mode 100644 core/test/base/floating_bit_helper.hpp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fca3a1ef28f..8eccb113759 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,8 @@ repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: end-of-file-fixer - repo: https://github.com/pre-commit/mirrors-clang-format rev: 'v14.0.0' # The default in Ubuntu 22.04, which is used in our CI hooks: diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp index bdb7a58ed84..764f5fc0c8d 100644 --- a/core/test/base/extended_float.cpp +++ b/core/test/base/extended_float.cpp @@ -9,86 +9,22 @@ #include -#include - - -namespace { - - -template -struct floating_impl; - -template <> -struct floating_impl<16> { - using type = gko::half; -}; - -template <> -struct floating_impl<32> { - using type = float; -}; - -template <> -struct floating_impl<64> { - using type = double; -}; - -template -using floating = typename floating_impl::type; - - -class ExtendedFloatTestBase : public ::testing::Test { -protected: - using half = gko::half; - template - using truncated = gko::truncated; - - static constexpr auto byte_size = gko::byte_size; - - template - static floating create_from_bits(const char (&s)[N]) - { - auto bits = std::bitset(s).to_ullong(); - // We cast to the same size of integer type first. - // Otherwise, the first memory chunk is different when we use - // reinterpret_cast or memcpy to get the smaller type out of unsigned - // long long. - using bits_type = - typename gko::detail::float_traits>::bits_type; - auto bits_val = static_cast(bits); - floating result; - static_assert(sizeof(floating) == sizeof(bits_type), - "the type should have the same size as its bits_type"); - std::memcpy(&result, &bits_val, sizeof(bits_type)); - return result; - } - - template - static std::bitset get_bits(T val) - { - using bits_type = typename gko::detail::float_traits::bits_type; - bits_type bits; - static_assert(sizeof(T) == sizeof(bits_type), - "the type should have the same size as its bits_type"); - std::memcpy(&bits, &val, sizeof(T)); - return std::bitset(bits); - } - - template - static std::bitset get_bits(const char (&s)[N]) - { - return std::bitset(s); - } -}; - - -class TruncatedDouble : public ExtendedFloatTestBase {}; +#include "core/test/base/floating_bit_helper.hpp" + + +using namespace floating_bit_helper; + +using half = gko::half; + +template +using truncated = gko::truncated; + // clang-format does terrible formatting of string literal concatenation // clang-format off -TEST_F(TruncatedDouble, SplitsDoubleToHalves) +TEST(TruncatedDouble, SplitsDoubleToHalves) { double x = create_from_bits("1" "11110100100" "1111" "1000110110110101" "1100101011010101" "1001011101110111"); @@ -102,7 +38,7 @@ TEST_F(TruncatedDouble, SplitsDoubleToHalves) } -TEST_F(TruncatedDouble, AssemblesDoubleFromHalves) +TEST(TruncatedDouble, AssemblesDoubleFromHalves) { double x = create_from_bits("1" "11110100100" "1111" "1000110110110101" "1100101011010101" "1001011101110111"); @@ -121,7 +57,7 @@ TEST_F(TruncatedDouble, AssemblesDoubleFromHalves) } -TEST_F(TruncatedDouble, SplitsDoubleToQuarters) +TEST(TruncatedDouble, SplitsDoubleToQuarters) { double x = create_from_bits("1" "11110100100" "1111" "1000110110110101" "1100101011010101" "1001011101110111"); @@ -138,7 +74,7 @@ TEST_F(TruncatedDouble, SplitsDoubleToQuarters) } -TEST_F(TruncatedDouble, AssemblesDoubleFromQuarters) +TEST(TruncatedDouble, AssemblesDoubleFromQuarters) { double x = create_from_bits("1" "11110100100" "1111" "1000110110110101" "1100101011010101" "1001011101110111"); @@ -167,16 +103,7 @@ TEST_F(TruncatedDouble, AssemblesDoubleFromQuarters) } -// clang-format on - - -class TruncatedFloat : public ExtendedFloatTestBase {}; - - -// clang-format off - - -TEST_F(TruncatedFloat, SplitsFloatToHalves) +TEST(TruncatedFloat, SplitsFloatToHalves) { float x = create_from_bits("1" "11110100" "1001111" "1000110110110101"); @@ -188,7 +115,7 @@ TEST_F(TruncatedFloat, SplitsFloatToHalves) } -TEST_F(TruncatedFloat, AssemblesFloatFromHalves) +TEST(TruncatedFloat, AssemblesFloatFromHalves) { float x = create_from_bits("1" "11110100" "1001111" "1000110110110101"); auto p1 = static_cast>(x); @@ -205,6 +132,3 @@ TEST_F(TruncatedFloat, AssemblesFloatFromHalves) // clang-format on - - -} // namespace diff --git a/core/test/base/floating_bit_helper.hpp b/core/test/base/floating_bit_helper.hpp new file mode 100644 index 00000000000..bbdc76ee9c2 --- /dev/null +++ b/core/test/base/floating_bit_helper.hpp @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: 2024 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GKO_CORE_TEST_BASE_FLOATING_BIT_HELPER_HPP_ +#define GKO_CORE_TEST_BASE_FLOATING_BIT_HELPER_HPP_ + + +#include +#include + +#include + +namespace floating_bit_helper { + + +constexpr auto byte_size = gko::detail::byte_size; + + +template +struct floating_impl; + +template <> +struct floating_impl<16> { + using type = gko::half; +}; + +template <> +struct floating_impl<32> { + using type = float; +}; + +template <> +struct floating_impl<64> { + using type = double; +}; + + +template +using floating = typename floating_impl::type; + + +template +floating create_from_bits(const char (&s)[N]) +{ + auto bits = std::bitset(s).to_ullong(); + // We cast to the same size of integer type first. + // Otherwise, the first memory chunk is different when we use + // reinterpret_cast or memcpy to get the smaller type out of unsigned + // long long. + using bits_type = + typename gko::detail::float_traits>::bits_type; + auto bits_val = static_cast(bits); + floating result; + static_assert(sizeof(floating) == sizeof(bits_type), + "the type should have the same size as its bits_type"); + std::memcpy(&result, &bits_val, sizeof(bits_type)); + return result; +} + + +template +std::bitset get_bits(T val) +{ + using bits_type = typename gko::detail::float_traits::bits_type; + bits_type bits; + static_assert(sizeof(T) == sizeof(bits_type), + "the type should have the same size as its bits_type"); + std::memcpy(&bits, &val, sizeof(T)); + return std::bitset(bits); +} + +template +std::bitset get_bits(const char (&s)[N]) +{ + return std::bitset(s); +} + + +} // namespace floating_bit_helper + +#endif // GKO_CORE_TEST_BASE_FLOATING_BIT_HELPER_HPP_ diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp index 51d3e60ce40..39c47c49e15 100644 --- a/core/test/base/half.cpp +++ b/core/test/base/half.cpp @@ -2,88 +2,22 @@ // // SPDX-License-Identifier: BSD-3-Clause -#include -#include -#include - #include #include +#include "core/test/base/floating_bit_helper.hpp" + -template -struct floating_impl; - -template <> -struct floating_impl<16> { - using type = gko::half; -}; - -template <> -struct floating_impl<32> { - using type = float; -}; - -template <> -struct floating_impl<64> { - using type = double; -}; - -template -using floating = typename floating_impl::type; - - -class ExtendedFloatTestBase : public ::testing::Test { -protected: - using half = gko::half; - - static constexpr auto byte_size = gko::detail::byte_size; - - template - static floating create_from_bits(const char (&s)[N]) - { - auto bits = std::bitset(s).to_ullong(); - // We cast to the same size of integer type first. - // Otherwise, the first memory chunk is different when we use - // reinterpret_cast or memcpy to get the smaller type out of unsigned - // long long. - using bits_type = - typename gko::detail::float_traits>::bits_type; - auto bits_val = static_cast(bits); - floating result; - static_assert(sizeof(floating) == sizeof(bits_type), - "the type should have the same size as its bits_type"); - std::memcpy(&result, &bits_val, sizeof(bits_type)); - return result; - } - - template - static std::bitset get_bits(T val) - { - using bits_type = typename gko::detail::float_traits::bits_type; - bits_type bits; - static_assert(sizeof(T) == sizeof(bits_type), - "the type should have the same size as its bits_type"); - std::memcpy(&bits, &val, sizeof(T)); - return std::bitset(bits); - } - - template - static std::bitset get_bits(const char (&s)[N]) - { - return std::bitset(s); - } -}; - - -class FloatToHalf : public ExtendedFloatTestBase {}; +using half = gko::half; +using namespace floating_bit_helper; // clang-format does terrible formatting of string literal concatenation // clang-format off -TEST_F(FloatToHalf, ConvertsOne) +TEST(FloatToHalf, ConvertsOne) { half x = create_from_bits("0" "01111111" "00000000000000000000000"); @@ -91,7 +25,7 @@ TEST_F(FloatToHalf, ConvertsOne) } -TEST_F(FloatToHalf, ConvertsZero) +TEST(FloatToHalf, ConvertsZero) { half x = create_from_bits("0" "00000000" "00000000000000000000000"); @@ -99,7 +33,7 @@ TEST_F(FloatToHalf, ConvertsZero) } -TEST_F(FloatToHalf, ConvertsInf) +TEST(FloatToHalf, ConvertsInf) { half x = create_from_bits("0" "11111111" "00000000000000000000000"); @@ -107,7 +41,7 @@ TEST_F(FloatToHalf, ConvertsInf) } -TEST_F(FloatToHalf, ConvertsNegInf) +TEST(FloatToHalf, ConvertsNegInf) { half x = create_from_bits("1" "11111111" "00000000000000000000000"); @@ -115,7 +49,7 @@ TEST_F(FloatToHalf, ConvertsNegInf) } -TEST_F(FloatToHalf, ConvertsNan) +TEST(FloatToHalf, ConvertsNan) { half x = create_from_bits("0" "11111111" "00000000000000000000001"); @@ -128,7 +62,7 @@ TEST_F(FloatToHalf, ConvertsNan) } -TEST_F(FloatToHalf, ConvertsNegNan) +TEST(FloatToHalf, ConvertsNegNan) { half x = create_from_bits("1" "11111111" "00010000000000000000000"); @@ -141,7 +75,7 @@ TEST_F(FloatToHalf, ConvertsNegNan) } -TEST_F(FloatToHalf, FlushesToZero) +TEST(FloatToHalf, FlushesToZero) { half x = create_from_bits("0" "00000111" "00010001000100000001000"); @@ -149,7 +83,7 @@ TEST_F(FloatToHalf, FlushesToZero) } -TEST_F(FloatToHalf, FlushesToNegZero) +TEST(FloatToHalf, FlushesToNegZero) { half x = create_from_bits("1" "00000010" "00010001000100000001000"); @@ -157,7 +91,7 @@ TEST_F(FloatToHalf, FlushesToNegZero) } -TEST_F(FloatToHalf, FlushesToInf) +TEST(FloatToHalf, FlushesToInf) { half x = create_from_bits("0" "10100000" "10010000000000010000100"); @@ -165,7 +99,7 @@ TEST_F(FloatToHalf, FlushesToInf) } -TEST_F(FloatToHalf, FlushesToNegInf) +TEST(FloatToHalf, FlushesToNegInf) { half x = create_from_bits("1" "11000000" "10010000000000010000100"); @@ -173,7 +107,7 @@ TEST_F(FloatToHalf, FlushesToNegInf) } -TEST_F(FloatToHalf, TruncatesSmallNumber) +TEST(FloatToHalf, TruncatesSmallNumber) { half x = create_from_bits("0" "01110001" "10010000000000010000100"); @@ -181,7 +115,7 @@ TEST_F(FloatToHalf, TruncatesSmallNumber) } -TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven) +TEST(FloatToHalf, TruncatesLargeNumberRoundToEven) { half neg_x = create_from_bits("1" "10001110" "10010011111000010000100"); half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100"); @@ -199,16 +133,7 @@ TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven) } -// clang-format on - - -class HalfToFloat : public ExtendedFloatTestBase {}; - - -// clang-format off - - -TEST_F(HalfToFloat, ConvertsOne) +TEST(HalfToFloat, ConvertsOne) { float x = create_from_bits("0" "01111" "0000000000"); @@ -216,7 +141,7 @@ TEST_F(HalfToFloat, ConvertsOne) } -TEST_F(HalfToFloat, ConvertsZero) +TEST(HalfToFloat, ConvertsZero) { float x = create_from_bits("0" "00000" "0000000000"); @@ -224,7 +149,7 @@ TEST_F(HalfToFloat, ConvertsZero) } -TEST_F(HalfToFloat, ConvertsInf) +TEST(HalfToFloat, ConvertsInf) { float x = create_from_bits("0" "11111" "0000000000"); @@ -232,7 +157,7 @@ TEST_F(HalfToFloat, ConvertsInf) } -TEST_F(HalfToFloat, ConvertsNegInf) +TEST(HalfToFloat, ConvertsNegInf) { float x = create_from_bits("1" "11111" "0000000000"); @@ -240,7 +165,7 @@ TEST_F(HalfToFloat, ConvertsNegInf) } -TEST_F(HalfToFloat, ConvertsNan) +TEST(HalfToFloat, ConvertsNan) { float x = create_from_bits("0" "11111" "0001001000"); @@ -253,7 +178,7 @@ TEST_F(HalfToFloat, ConvertsNan) } -TEST_F(HalfToFloat, ConvertsNegNan) +TEST(HalfToFloat, ConvertsNegNan) { float x = create_from_bits("1" "11111" "0000000001"); @@ -266,7 +191,7 @@ TEST_F(HalfToFloat, ConvertsNegNan) } -TEST_F(HalfToFloat, ExtendsSmallNumber) +TEST(HalfToFloat, ExtendsSmallNumber) { float x = create_from_bits("0" "00001" "1000010001"); @@ -274,7 +199,7 @@ TEST_F(HalfToFloat, ExtendsSmallNumber) } -TEST_F(HalfToFloat, ExtendsLargeNumber) +TEST(HalfToFloat, ExtendsLargeNumber) { float x = create_from_bits("1" "11110" "1001001111"); diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp index fb5761c51fb..b559ad2cfc5 100644 --- a/include/ginkgo/core/base/half.hpp +++ b/include/ginkgo/core/base/half.hpp @@ -334,11 +334,14 @@ class half { data_ = result.data_; \ return *this; \ } + HALF_OPERATOR(+, +=) HALF_OPERATOR(-, -=) HALF_OPERATOR(*, *=) HALF_OPERATOR(/, /=) +#undef HALF_OPERATOR + // Do operation with different type // If it is floating point, using floating point as type. // If it is integer, using half as type @@ -373,6 +376,8 @@ class half { HALF_FRIEND_OPERATOR(*, *=) HALF_FRIEND_OPERATOR(/, /=) +#undef HALF_FRIEND_OPERATOR + // the negative half operator-() const { @@ -588,6 +593,8 @@ class complex { COMPLEX_HALF_OPERATOR(*, *=) COMPLEX_HALF_OPERATOR(/, /=) +#undef COMPLEX_HALF_OPERATOR + private: value_type real_; value_type imag_; From 03eb0227d3f167d33c5f28dd38a01b1b50a15545 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Thu, 24 Oct 2024 12:14:54 +0200 Subject: [PATCH 10/13] use memcpy not std::memcpy in hip --- hip/components/memory.hip.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hip/components/memory.hip.hpp b/hip/components/memory.hip.hpp index d8238c11795..8a98ee822b8 100644 --- a/hip/components/memory.hip.hpp +++ b/hip/components/memory.hip.hpp @@ -99,7 +99,7 @@ __device__ __forceinline__ ValueType load_generic(const ValueType* ptr) auto cast_value = HIP_ATOMIC_LOAD(reinterpret_cast(ptr), memorder, scope); ValueType result{}; - std::memcpy(&result, &cast_value, sizeof(ValueType)); + memcpy(&result, &cast_value, sizeof(ValueType)); return result; } @@ -122,7 +122,7 @@ __device__ __forceinline__ void store_generic(ValueType* ptr, ValueType value) static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map"); static_assert(alignof(atomic_type) == alignof(ValueType), "invalid map"); atomic_type cast_value{}; - std::memcpy(&cast_value, &value, sizeof(ValueType)); + memcpy(&cast_value, &value, sizeof(ValueType)); HIP_ATOMIC_STORE(reinterpret_cast(ptr), cast_value, memorder, scope); } From 1f0f6195dcc2ee47d6c1c1a3c826aec1d40e71ef Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Mon, 18 Nov 2024 11:10:13 +0100 Subject: [PATCH 11/13] add alignment --- core/test/base/half.cpp | 8 ++++++++ include/ginkgo/core/base/half.hpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp index 39c47c49e15..7fcd0ffa70f 100644 --- a/core/test/base/half.cpp +++ b/core/test/base/half.cpp @@ -2,6 +2,8 @@ // // SPDX-License-Identifier: BSD-3-Clause +#include + #include #include @@ -13,6 +15,12 @@ using half = gko::half; using namespace floating_bit_helper; +TEST(Half, SizeAndAlign) +{ + ASSERT_EQ(sizeof(half), sizeof(std::uint16_t)); + ASSERT_EQ(alignof(half), alignof(std::uint16_t)); +} + // clang-format does terrible formatting of string literal concatenation // clang-format off diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp index b559ad2cfc5..bd04d2da832 100644 --- a/include/ginkgo/core/base/half.hpp +++ b/include/ginkgo/core/base/half.hpp @@ -283,7 +283,7 @@ struct precision_converter { * For now the only features are reduced storage compared to single precision * and conversions from and to single precision floating point type. */ -class half { +class alignas(std::uint16_t) half { public: // create half value from the bits directly. static constexpr half create_from_bits(std::uint16_t bits) noexcept From acbae4a56d3f49e866c6d04c619b7c1629dc3e23 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Mon, 18 Nov 2024 13:45:44 +0100 Subject: [PATCH 12/13] delete the sycl half test as we do not enable it directly --- core/test/base/half.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp index 7fcd0ffa70f..82732c62d16 100644 --- a/core/test/base/half.cpp +++ b/core/test/base/half.cpp @@ -61,12 +61,7 @@ TEST(FloatToHalf, ConvertsNan) { half x = create_from_bits("0" "11111111" "00000000000000000000001"); - #if defined(SYCL_LANGUAGE_VERSION) - // Sycl put the 1000000000, but ours put mask - ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000")); - #else ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111")); - #endif } @@ -74,12 +69,7 @@ TEST(FloatToHalf, ConvertsNegNan) { half x = create_from_bits("1" "11111111" "00010000000000000000000"); - #if defined(SYCL_LANGUAGE_VERSION) - // Sycl put the 1000000000, but ours put mask - ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000")); - #else ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111")); - #endif } @@ -177,12 +167,7 @@ TEST(HalfToFloat, ConvertsNan) { float x = create_from_bits("0" "11111" "0001001000"); - #if defined(SYCL_LANGUAGE_VERSION) - // sycl keeps significand - ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000")); - #else ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111")); - #endif } @@ -190,12 +175,7 @@ TEST(HalfToFloat, ConvertsNegNan) { float x = create_from_bits("1" "11111" "0000000001"); - #if defined(SYCL_LANGUAGE_VERSION) - // sycl keeps significand - ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000")); - #else ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111")); - #endif } From b5afcacb81808782226d8560de176f39b4828bf0 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Sat, 30 Nov 2024 00:25:05 +0100 Subject: [PATCH 13/13] use reference for half when it is possible --- include/ginkgo/core/base/half.hpp | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp index bd04d2da832..b6d11dd7c64 100644 --- a/include/ginkgo/core/base/half.hpp +++ b/include/ginkgo/core/base/half.hpp @@ -286,7 +286,7 @@ struct precision_converter { class alignas(std::uint16_t) half { public: // create half value from the bits directly. - static constexpr half create_from_bits(std::uint16_t bits) noexcept + static constexpr half create_from_bits(const std::uint16_t& bits) noexcept { half result; result.data_ = bits; @@ -299,13 +299,13 @@ class alignas(std::uint16_t) half { constexpr half() noexcept : data_(0){}; template ::value>> - half(const T val) : data_(0) + half(const T& val) : data_(0) { this->float2half(static_cast(val)); } template - half& operator=(const V val) + half& operator=(const V& val) { this->float2half(static_cast(val)); return *this; @@ -323,7 +323,7 @@ class alignas(std::uint16_t) half { // operation will cast it to float and then do float operation such that it // becomes float in the end. #define HALF_OPERATOR(_op, _opeq) \ - friend half operator _op(const half lhf, const half rhf) \ + friend half operator _op(const half& lhf, const half& rhf) \ { \ return static_cast(static_cast(lhf) \ _op static_cast(rhf)); \ @@ -350,7 +350,7 @@ class alignas(std::uint16_t) half { friend std::enable_if_t< \ !std::is_same::value && std::is_scalar::value, \ std::conditional_t::value, T, half>> \ - operator _op(const half hf, const T val) \ + operator _op(const half& hf, const T& val) \ { \ using type = \ std::conditional_t::value, T, half>; \ @@ -362,7 +362,7 @@ class alignas(std::uint16_t) half { friend std::enable_if_t< \ !std::is_same::value && std::is_scalar::value, \ std::conditional_t::value, T, half>> \ - operator _op(const T val, const half hf) \ + operator _op(const T& val, const half& hf) \ { \ using type = \ std::conditional_t::value, T, half>; \ @@ -389,7 +389,7 @@ class alignas(std::uint16_t) half { using f16_traits = detail::float_traits; using f32_traits = detail::float_traits; - void float2half(float val) noexcept + void float2half(const float& val) noexcept { std::uint32_t bit_val(0); std::memcpy(&bit_val, &val, sizeof(float)); @@ -576,16 +576,12 @@ class complex { return *this; } -// It's for MacOS. -// TODO: check whether mac compiler always use complex version even when real -// half -#define COMPLEX_HALF_OPERATOR(_op, _opeq) \ - friend complex operator _op(const complex lhf, \ - const complex rhf) \ - { \ - auto a = lhf; \ - a _opeq rhf; \ - return a; \ +#define COMPLEX_HALF_OPERATOR(_op, _opeq) \ + friend complex operator _op(const complex& lhf, const complex& rhf) \ + { \ + auto a = lhf; \ + a _opeq rhf; \ + return a; \ } COMPLEX_HALF_OPERATOR(+, +=)