From f39aeabd6a0490d78e0ef5b663afef27223ddb1c Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 22 Oct 2024 12:05:26 +0200
Subject: [PATCH 01/13] half base type

---
 core/base/extended_float.hpp      | 393 +-----------------
 core/test/base/extended_float.cpp |  41 +-
 include/ginkgo/core/base/half.hpp | 669 ++++++++++++++++++++++++++++++
 include/ginkgo/ginkgo.hpp         |   1 +
 4 files changed, 707 insertions(+), 397 deletions(-)
 create mode 100644 include/ginkgo/core/base/half.hpp
diff --git a/core/base/extended_float.hpp b/core/base/extended_float.hpp
index c14b5d1bd39..dd7d46c363d 100644
--- a/core/base/extended_float.hpp
+++ b/core/base/extended_float.hpp
@@ -9,6 +9,7 @@
 #include <limits>
 #include <type_traits>
 
+#include <ginkgo/core/base/half.hpp>
 #include <ginkgo/core/base/types.hpp>
 
 
@@ -30,341 +31,6 @@
 namespace gko {
 
 
-template <typename, size_type, size_type>
-class truncated;
-
-
-namespace detail {
-
-
-template <std::size_t, typename = void>
-struct uint_of_impl {};
-
-template <std::size_t Bits>
-struct uint_of_impl<Bits, std::enable_if_t<(Bits <= 16)>> {
-    using type = uint16;
-};
-
-template <std::size_t Bits>
-struct uint_of_impl<Bits, std::enable_if_t<(16 < Bits && Bits <= 32)>> {
-    using type = uint32;
-};
-
-template <std::size_t Bits>
-struct uint_of_impl<Bits, std::enable_if_t<(32 < Bits)>> {
-    using type = uint64;
-};
-
-template <std::size_t Bits>
-using uint_of = typename uint_of_impl<Bits>::type;
-
-
-template <typename T>
-struct basic_float_traits {};
-
-template <>
-struct basic_float_traits<float16> {
-    using type = float16;
-    static constexpr int sign_bits = 1;
-    static constexpr int significand_bits = 10;
-    static constexpr int exponent_bits = 5;
-    static constexpr bool rounds_to_nearest = true;
-};
-
-template <>
-struct basic_float_traits<float32> {
-    using type = float32;
-    static constexpr int sign_bits = 1;
-    static constexpr int significand_bits = 23;
-    static constexpr int exponent_bits = 8;
-    static constexpr bool rounds_to_nearest = true;
-};
-
-template <>
-struct basic_float_traits<float64> {
-    using type = float64;
-    static constexpr int sign_bits = 1;
-    static constexpr int significand_bits = 52;
-    static constexpr int exponent_bits = 11;
-    static constexpr bool rounds_to_nearest = true;
-};
-
-template <typename FloatType, size_type NumComponents, size_type ComponentId>
-struct basic_float_traits<truncated<FloatType, NumComponents, ComponentId>> {
-    using type = truncated<FloatType, NumComponents, ComponentId>;
-    static constexpr int sign_bits = ComponentId == 0 ? 1 : 0;
-    static constexpr int exponent_bits =
-        ComponentId == 0 ? basic_float_traits<FloatType>::exponent_bits : 0;
-    static constexpr int significand_bits =
-        ComponentId == 0 ? sizeof(type) * byte_size - exponent_bits - 1
-                         : sizeof(type) * byte_size;
-    static constexpr bool rounds_to_nearest = false;
-};
-
-
-template <typename UintType>
-constexpr UintType create_ones(int n)
-{
-    return (n == sizeof(UintType) * byte_size ? static_cast<UintType>(0)
-                                              : static_cast<UintType>(1) << n) -
-           static_cast<UintType>(1);
-}
-
-template <typename T>
-struct float_traits {
-    using type = typename basic_float_traits<T>::type;
-    using bits_type = uint_of<sizeof(type) * byte_size>;
-    static constexpr int sign_bits = basic_float_traits<T>::sign_bits;
-    static constexpr int significand_bits =
-        basic_float_traits<T>::significand_bits;
-    static constexpr int exponent_bits = basic_float_traits<T>::exponent_bits;
-    static constexpr bits_type significand_mask =
-        create_ones<bits_type>(significand_bits);
-    static constexpr bits_type exponent_mask =
-        create_ones<bits_type>(significand_bits + exponent_bits) -
-        significand_mask;
-    static constexpr bits_type bias_mask =
-        create_ones<bits_type>(significand_bits + exponent_bits - 1) -
-        significand_mask;
-    static constexpr bits_type sign_mask =
-        create_ones<bits_type>(sign_bits + significand_bits + exponent_bits) -
-        exponent_mask - significand_mask;
-    static constexpr bool rounds_to_nearest =
-        basic_float_traits<T>::rounds_to_nearest;
-
-    static constexpr auto eps =
-        1.0 / (1ll << (significand_bits + rounds_to_nearest));
-
-    static constexpr bool is_inf(bits_type data)
-    {
-        return (data & exponent_mask) == exponent_mask &&
-               (data & significand_mask) == bits_type{};
-    }
-
-    static constexpr bool is_nan(bits_type data)
-    {
-        return (data & exponent_mask) == exponent_mask &&
-               (data & significand_mask) != bits_type{};
-    }
-
-    static constexpr bool is_denom(bits_type data)
-    {
-        return (data & exponent_mask) == bits_type{};
-    }
-};
-
-
-template <typename SourceType, typename ResultType,
-          bool = (sizeof(SourceType) <= sizeof(ResultType))>
-struct precision_converter;
-
-// upcasting implementation details
-template <typename SourceType, typename ResultType>
-struct precision_converter<SourceType, ResultType, true> {
-    using source_traits = float_traits<SourceType>;
-    using result_traits = float_traits<ResultType>;
-    using source_bits = typename source_traits::bits_type;
-    using result_bits = typename result_traits::bits_type;
-
-    static_assert(source_traits::exponent_bits <=
-                          result_traits::exponent_bits &&
-                      source_traits::significand_bits <=
-                          result_traits::significand_bits,
-                  "SourceType has to have both lower range and precision or "
-                  "higher range and precision than ResultType");
-
-    static constexpr int significand_offset =
-        result_traits::significand_bits - source_traits::significand_bits;
-    static constexpr int exponent_offset = significand_offset;
-    static constexpr int sign_offset = result_traits::exponent_bits -
-                                       source_traits::exponent_bits +
-                                       exponent_offset;
-    static constexpr result_bits bias_change =
-        result_traits::bias_mask -
-        (static_cast<result_bits>(source_traits::bias_mask) << exponent_offset);
-
-    static constexpr result_bits shift_significand(source_bits data) noexcept
-    {
-        return static_cast<result_bits>(data & source_traits::significand_mask)
-               << significand_offset;
-    }
-
-    static constexpr result_bits shift_exponent(source_bits data) noexcept
-    {
-        return update_bias(
-            static_cast<result_bits>(data & source_traits::exponent_mask)
-            << exponent_offset);
-    }
-
-    static constexpr result_bits shift_sign(source_bits data) noexcept
-    {
-        return static_cast<result_bits>(data & source_traits::sign_mask)
-               << sign_offset;
-    }
-
-private:
-    static constexpr result_bits update_bias(result_bits data) noexcept
-    {
-        return data == typename result_traits::bits_type{} ? data
-                                                           : data + bias_change;
-    }
-};
-
-// downcasting implementation details
-template <typename SourceType, typename ResultType>
-struct precision_converter<SourceType, ResultType, false> {
-    using source_traits = float_traits<SourceType>;
-    using result_traits = float_traits<ResultType>;
-    using source_bits = typename source_traits::bits_type;
-    using result_bits = typename result_traits::bits_type;
-
-    static_assert(source_traits::exponent_bits >=
-                          result_traits::exponent_bits &&
-                      source_traits::significand_bits >=
-                          result_traits::significand_bits,
-                  "SourceType has to have both lower range and precision or "
-                  "higher range and precision than ResultType");
-
-    static constexpr int significand_offset =
-        source_traits::significand_bits - result_traits::significand_bits;
-    static constexpr int exponent_offset = significand_offset;
-    static constexpr int sign_offset = source_traits::exponent_bits -
-                                       result_traits::exponent_bits +
-                                       exponent_offset;
-    static constexpr source_bits bias_change =
-        (source_traits::bias_mask >> exponent_offset) -
-        static_cast<source_bits>(result_traits::bias_mask);
-
-    static constexpr result_bits shift_significand(source_bits data) noexcept
-    {
-        return static_cast<result_bits>(
-            (data & source_traits::significand_mask) >> significand_offset);
-    }
-
-    static constexpr result_bits shift_exponent(source_bits data) noexcept
-    {
-        return static_cast<result_bits>(update_bias(
-            (data & source_traits::exponent_mask) >> exponent_offset));
-    }
-
-    static constexpr result_bits shift_sign(source_bits data) noexcept
-    {
-        return static_cast<result_bits>((data & source_traits::sign_mask) >>
-                                        sign_offset);
-    }
-
-private:
-    static constexpr source_bits update_bias(source_bits data) noexcept
-    {
-        return data <= bias_change ? typename source_traits::bits_type{}
-                                   : limit_exponent(data - bias_change);
-    }
-
-    static constexpr source_bits limit_exponent(source_bits data) noexcept
-    {
-        return data >= static_cast<source_bits>(result_traits::exponent_mask)
-                   ? static_cast<source_bits>(result_traits::exponent_mask)
-                   : data;
-    }
-};
-
-
-}  // namespace detail
-
-
-/**
- * A class providing basic support for half precision floating point types.
- *
- * For now the only features are reduced storage compared to single precision
- * and conversions from and to single precision floating point type.
- */
-class half {
-public:
-    half() noexcept = default;
-
-    GKO_ATTRIBUTES half(float32 val) noexcept
-    {
-#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-        const auto tmp = __float2half_rn(val);
-        data_ = reinterpret_cast<const uint16&>(tmp);
-#else   // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-        data_ = float2half(reinterpret_cast<const uint32&>(val));
-#endif  // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-    }
-
-    GKO_ATTRIBUTES half(float64 val) noexcept : half(static_cast<float32>(val))
-    {}
-
-    GKO_ATTRIBUTES operator float32() const noexcept
-    {
-#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-        return __half2float(reinterpret_cast<const __half&>(data_));
-#else   // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-        const auto bits = half2float(data_);
-        return reinterpret_cast<const float32&>(bits);
-#endif  // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
-    }
-
-    GKO_ATTRIBUTES operator float64() const noexcept
-    {
-        return static_cast<float64>(static_cast<float32>(*this));
-    }
-
-    GKO_ATTRIBUTES half operator-() const noexcept
-    {
-        auto res = *this;
-        // flip sign bit
-        res.data_ ^= f16_traits::sign_mask;
-        return res;
-    }
-
-private:
-    using f16_traits = detail::float_traits<float16>;
-    using f32_traits = detail::float_traits<float32>;
-
-    static uint16 float2half(uint32 data_) noexcept
-    {
-        using conv = detail::precision_converter<float32, float16>;
-        if (f32_traits::is_inf(data_)) {
-            return conv::shift_sign(data_) | f16_traits::exponent_mask;
-        } else if (f32_traits::is_nan(data_)) {
-            return conv::shift_sign(data_) | f16_traits::exponent_mask |
-                   f16_traits::significand_mask;
-        } else {
-            const auto exp = conv::shift_exponent(data_);
-            if (f16_traits::is_inf(exp)) {
-                return conv::shift_sign(data_) | exp;
-            } else if (f16_traits::is_denom(exp)) {
-                // TODO: handle denormals
-                return conv::shift_sign(data_);
-            } else {
-                return conv::shift_sign(data_) | exp |
-                       conv::shift_significand(data_);
-            }
-        }
-    }
-
-    static uint32 half2float(uint16 data_) noexcept
-    {
-        using conv = detail::precision_converter<float16, float32>;
-        if (f16_traits::is_inf(data_)) {
-            return conv::shift_sign(data_) | f32_traits::exponent_mask;
-        } else if (f16_traits::is_nan(data_)) {
-            return conv::shift_sign(data_) | f32_traits::exponent_mask |
-                   f32_traits::significand_mask;
-        } else if (f16_traits::is_denom(data_)) {
-            // TODO: handle denormals
-            return conv::shift_sign(data_);
-        } else {
-            return conv::shift_sign(data_) | conv::shift_exponent(data_) |
-                   conv::shift_significand(data_);
-        }
-    }
-
-    uint16 data_;
-};
-
-
 /**
  * This template implements the truncated (or split) storage of a floating point
  * type.
@@ -458,38 +124,6 @@ class truncated {
 namespace std {
 
 
-template <>
-class complex<gko::half> {
-public:
-    using value_type = gko::half;
-
-    complex(const value_type& real = 0.f, const value_type& imag = 0.f)
-        : real_(real), imag_(imag)
-    {}
-
-    template <typename U>
-    explicit complex(const complex<U>& other)
-        : complex(static_cast<value_type>(other.real()),
-                  static_cast<value_type>(other.imag()))
-    {}
-
-    value_type real() const noexcept { return real_; }
-
-    value_type imag() const noexcept { return imag_; }
-
-
-    operator std::complex<gko::float32>() const noexcept
-    {
-        return std::complex<gko::float32>(static_cast<gko::float32>(real_),
-                                          static_cast<gko::float32>(imag_));
-    }
-
-private:
-    value_type real_;
-    value_type imag_;
-};
-
-
 template <typename T, gko::size_type NumComponents>
 class complex<gko::truncated<T, NumComponents>> {
 public:
@@ -521,31 +155,6 @@ class complex<gko::truncated<T, NumComponents>> {
 };
 
 
-template <>
-struct is_scalar<gko::half> : std::true_type {};
-
-
-template <>
-struct numeric_limits<gko::half> {
-    static constexpr bool is_specialized{true};
-    static constexpr bool is_signed{true};
-    static constexpr bool is_integer{false};
-    static constexpr bool is_exact{false};
-    static constexpr bool is_bounded{true};
-    static constexpr bool is_modulo{false};
-    static constexpr int digits{
-        gko::detail::float_traits<gko::half>::significand_bits + 1};
-    // 3/10 is approx. log_10(2)
-    static constexpr int digits10{digits * 3 / 10};
-
-    // Note: gko::half can't return gko::half here because it does not have
-    //       a constexpr constructor.
-    static constexpr float epsilon()
-    {
-        return gko::detail::float_traits<gko::half>::eps;
-    }
-};
-
 }  // namespace std
 
 
diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp
index 6148c7c350a..818843baa38 100644
--- a/core/test/base/extended_float.cpp
+++ b/core/test/base/extended_float.cpp
@@ -9,6 +9,8 @@
 
 #include <gtest/gtest.h>
 
+#include <ginkgo/core/base/half.hpp>
+
 
 namespace {
 
@@ -110,7 +112,12 @@ TEST_F(FloatToHalf, ConvertsNan)
 {
     half x = create_from_bits("0" "11111111" "00000000000000000000001");
 
+    #if defined(SYCL_LANGUAGE_VERSION) 
+    // Sycl put the 1000000000, but ours put mask
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000"));
+    #else
     ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111"));
+    #endif
 }
 
 
@@ -118,7 +125,12 @@ TEST_F(FloatToHalf, ConvertsNegNan)
 {
     half x = create_from_bits("1" "11111111" "00010000000000000000000");
 
+    #if defined(SYCL_LANGUAGE_VERSION)
+    // Sycl put the 1000000000, but ours put mask
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000"));
+    #else
     ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111"));
+    #endif
 }
 
 
@@ -162,12 +174,21 @@ TEST_F(FloatToHalf, TruncatesSmallNumber)
 }
 
 
-TEST_F(FloatToHalf, TruncatesLargeNumber)
+TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven)
 {
-    half x = create_from_bits("1" "10001110" "10010011111000010000100");
-
-    ASSERT_EQ(get_bits(x), get_bits("1" "11110" "1001001111"));
-
+    half neg_x = create_from_bits("1" "10001110" "10010011111000010000100");
+    half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100");
+    half x = create_from_bits("0" "10001110" "10010011111000010000100");
+    half x2 = create_from_bits("0" "10001110" "10010011101000010000100");
+    half x3 = create_from_bits("0" "10001110" "10010011101000000000000");
+    half x4 = create_from_bits("0" "10001110" "10010011111000000000000");
+
+    EXPECT_EQ(get_bits(x), get_bits("0" "11110" "1001010000"));
+    EXPECT_EQ(get_bits(x2), get_bits("0" "11110" "1001001111"));
+    EXPECT_EQ(get_bits(x3), get_bits("0" "11110" "1001001110"));
+    EXPECT_EQ(get_bits(x4), get_bits("0" "11110" "1001010000"));
+    EXPECT_EQ(get_bits(neg_x), get_bits("1" "11110" "1001010000"));
+    EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111"));
 }
 
 
@@ -216,7 +237,12 @@ TEST_F(HalfToFloat, ConvertsNan)
 {
     float x = create_from_bits("0" "11111" "0001001000");
 
+    #if defined(SYCL_LANGUAGE_VERSION) 
+    // sycl keeps significand
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000"));
+    #else
     ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111"));
+    #endif
 }
 
 
@@ -224,7 +250,12 @@ TEST_F(HalfToFloat, ConvertsNegNan)
 {
     float x = create_from_bits("1" "11111" "0000000001");
 
+    #if defined(SYCL_LANGUAGE_VERSION) 
+    // sycl keeps significand
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000"));
+    #else
     ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111"));
+    #endif
 }
 
 
diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp
new file mode 100644
index 00000000000..25a38abb6eb
--- /dev/null
+++ b/include/ginkgo/core/base/half.hpp
@@ -0,0 +1,669 @@
+// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#ifndef GKO_PUBLIC_CORE_BASE_HALF_HPP_
+#define GKO_PUBLIC_CORE_BASE_HALF_HPP_
+
+
+#include <complex>
+#include <cstring>
+#include <type_traits>
+
+#include <ginkgo/core/base/std_extensions.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+class __half;
+
+
+namespace gko {
+
+
+template <typename, size_type, size_type>
+class truncated;
+
+
+namespace detail {
+
+
+template <std::size_t, typename = void>
+struct uint_of_impl {};
+
+template <std::size_t Bits>
+struct uint_of_impl<Bits, std::enable_if_t<(Bits <= 16)>> {
+    using type = uint16;
+};
+
+template <std::size_t Bits>
+struct uint_of_impl<Bits, std::enable_if_t<(16 < Bits && Bits <= 32)>> {
+    using type = uint32;
+};
+
+template <std::size_t Bits>
+struct uint_of_impl<Bits, std::enable_if_t<(32 < Bits) && (Bits <= 64)>> {
+    using type = uint64;
+};
+
+template <std::size_t Bits>
+using uint_of = typename uint_of_impl<Bits>::type;
+
+
+template <typename T>
+struct basic_float_traits {};
+
+template <>
+struct basic_float_traits<float16> {
+    using type = float16;
+    static constexpr int sign_bits = 1;
+    static constexpr int significand_bits = 10;
+    static constexpr int exponent_bits = 5;
+    static constexpr bool rounds_to_nearest = true;
+};
+
+template <>
+struct basic_float_traits<__half> {
+    using type = __half;
+    static constexpr int sign_bits = 1;
+    static constexpr int significand_bits = 10;
+    static constexpr int exponent_bits = 5;
+    static constexpr bool rounds_to_nearest = true;
+};
+
+template <>
+struct basic_float_traits<float32> {
+    using type = float32;
+    static constexpr int sign_bits = 1;
+    static constexpr int significand_bits = 23;
+    static constexpr int exponent_bits = 8;
+    static constexpr bool rounds_to_nearest = true;
+};
+
+template <>
+struct basic_float_traits<float64> {
+    using type = float64;
+    static constexpr int sign_bits = 1;
+    static constexpr int significand_bits = 52;
+    static constexpr int exponent_bits = 11;
+    static constexpr bool rounds_to_nearest = true;
+};
+
+template <typename FloatType, size_type NumComponents, size_type ComponentId>
+struct basic_float_traits<truncated<FloatType, NumComponents, ComponentId>> {
+    using type = truncated<FloatType, NumComponents, ComponentId>;
+    static constexpr int sign_bits = ComponentId == 0 ? 1 : 0;
+    static constexpr int exponent_bits =
+        ComponentId == 0 ? basic_float_traits<FloatType>::exponent_bits : 0;
+    static constexpr int significand_bits =
+        ComponentId == 0 ? sizeof(type) * byte_size - exponent_bits - 1
+                         : sizeof(type) * byte_size;
+    static constexpr bool rounds_to_nearest = false;
+};
+
+
+template <typename UintType>
+constexpr UintType create_ones(int n)
+{
+    return (n == sizeof(UintType) * byte_size ? static_cast<UintType>(0)
+                                              : static_cast<UintType>(1) << n) -
+           static_cast<UintType>(1);
+}
+
+
+template <typename T>
+struct float_traits {
+    using type = typename basic_float_traits<T>::type;
+    using bits_type = uint_of<sizeof(type) * byte_size>;
+    static constexpr int sign_bits = basic_float_traits<T>::sign_bits;
+    static constexpr int significand_bits =
+        basic_float_traits<T>::significand_bits;
+    static constexpr int exponent_bits = basic_float_traits<T>::exponent_bits;
+    static constexpr bits_type significand_mask =
+        create_ones<bits_type>(significand_bits);
+    static constexpr bits_type exponent_mask =
+        create_ones<bits_type>(significand_bits + exponent_bits) -
+        significand_mask;
+    static constexpr bits_type bias_mask =
+        create_ones<bits_type>(significand_bits + exponent_bits - 1) -
+        significand_mask;
+    static constexpr bits_type sign_mask =
+        create_ones<bits_type>(sign_bits + significand_bits + exponent_bits) -
+        exponent_mask - significand_mask;
+    static constexpr bool rounds_to_nearest =
+        basic_float_traits<T>::rounds_to_nearest;
+
+    static constexpr auto eps =
+        1.0 / (1ll << (significand_bits + rounds_to_nearest));
+
+    static constexpr bool is_inf(bits_type data)
+    {
+        return (data & exponent_mask) == exponent_mask &&
+               (data & significand_mask) == bits_type{};
+    }
+
+    static constexpr bool is_nan(bits_type data)
+    {
+        return (data & exponent_mask) == exponent_mask &&
+               (data & significand_mask) != bits_type{};
+    }
+
+    static constexpr bool is_denom(bits_type data)
+    {
+        return (data & exponent_mask) == bits_type{};
+    }
+};
+
+
+template <typename SourceType, typename ResultType,
+          bool = (sizeof(SourceType) <= sizeof(ResultType))>
+struct precision_converter;
+
+// upcasting implementation details
+template <typename SourceType, typename ResultType>
+struct precision_converter<SourceType, ResultType, true> {
+    using source_traits = float_traits<SourceType>;
+    using result_traits = float_traits<ResultType>;
+    using source_bits = typename source_traits::bits_type;
+    using result_bits = typename result_traits::bits_type;
+
+    static_assert(source_traits::exponent_bits <=
+                          result_traits::exponent_bits &&
+                      source_traits::significand_bits <=
+                          result_traits::significand_bits,
+                  "SourceType has to have both lower range and precision or "
+                  "higher range and precision than ResultType");
+
+    static constexpr int significand_offset =
+        result_traits::significand_bits - source_traits::significand_bits;
+    static constexpr int exponent_offset = significand_offset;
+    static constexpr int sign_offset = result_traits::exponent_bits -
+                                       source_traits::exponent_bits +
+                                       exponent_offset;
+    static constexpr result_bits bias_change =
+        result_traits::bias_mask -
+        (static_cast<result_bits>(source_traits::bias_mask) << exponent_offset);
+
+    static constexpr result_bits shift_significand(source_bits data) noexcept
+    {
+        return static_cast<result_bits>(data & source_traits::significand_mask)
+               << significand_offset;
+    }
+
+    static constexpr result_bits shift_exponent(source_bits data) noexcept
+    {
+        return update_bias(
+            static_cast<result_bits>(data & source_traits::exponent_mask)
+            << exponent_offset);
+    }
+
+    static constexpr result_bits shift_sign(source_bits data) noexcept
+    {
+        return static_cast<result_bits>(data & source_traits::sign_mask)
+               << sign_offset;
+    }
+
+private:
+    static constexpr result_bits update_bias(result_bits data) noexcept
+    {
+        return data == typename result_traits::bits_type{} ? data
+                                                           : data + bias_change;
+    }
+};
+
+// downcasting implementation details
+template <typename SourceType, typename ResultType>
+struct precision_converter<SourceType, ResultType, false> {
+    using source_traits = float_traits<SourceType>;
+    using result_traits = float_traits<ResultType>;
+    using source_bits = typename source_traits::bits_type;
+    using result_bits = typename result_traits::bits_type;
+
+    static_assert(source_traits::exponent_bits >=
+                          result_traits::exponent_bits &&
+                      source_traits::significand_bits >=
+                          result_traits::significand_bits,
+                  "SourceType has to have both lower range and precision or "
+                  "higher range and precision than ResultType");
+
+    static constexpr int significand_offset =
+        source_traits::significand_bits - result_traits::significand_bits;
+    static constexpr int exponent_offset = significand_offset;
+    static constexpr int sign_offset = source_traits::exponent_bits -
+                                       result_traits::exponent_bits +
+                                       exponent_offset;
+    static constexpr source_bits bias_change =
+        (source_traits::bias_mask >> exponent_offset) -
+        static_cast<source_bits>(result_traits::bias_mask);
+
+    static constexpr result_bits shift_significand(source_bits data) noexcept
+    {
+        return static_cast<result_bits>(
+            (data & source_traits::significand_mask) >> significand_offset);
+    }
+
+    static constexpr result_bits shift_exponent(source_bits data) noexcept
+    {
+        return static_cast<result_bits>(update_bias(
+            (data & source_traits::exponent_mask) >> exponent_offset));
+    }
+
+    static constexpr result_bits shift_sign(source_bits data) noexcept
+    {
+        return static_cast<result_bits>((data & source_traits::sign_mask) >>
+                                        sign_offset);
+    }
+
+private:
+    static constexpr source_bits update_bias(source_bits data) noexcept
+    {
+        return data <= bias_change ? typename source_traits::bits_type{}
+                                   : limit_exponent(data - bias_change);
+    }
+
+    static constexpr source_bits limit_exponent(source_bits data) noexcept
+    {
+        return data >= static_cast<source_bits>(result_traits::exponent_mask)
+                   ? static_cast<source_bits>(result_traits::exponent_mask)
+                   : data;
+    }
+};
+
+
+}  // namespace detail
+
+
+/**
+ * A class providing basic support for half precision floating point types.
+ *
+ * For now the only features are reduced storage compared to single precision
+ * and conversions from and to single precision floating point type.
+ */
+class half {
+public:
+    // create half value from the bits directly.
+    static constexpr half create_from_bits(uint16 bits) noexcept
+    {
+        half result;
+        result.data_ = bits;
+        return result;
+    }
+
+    // TODO: NVHPC (host side) may not use zero initialization for the data
+    // member by default constructor in some cases. Not sure whether it is
+    // caused by something else in jacobi or isai.
+    constexpr half() noexcept : data_(0){};
+
+    template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
+    half(const T val) : data_(0)
+    {
+        this->float2half(static_cast<float>(val));
+    }
+
+    template <typename V>
+    half& operator=(const V val)
+    {
+        this->float2half(static_cast<float>(val));
+        return *this;
+    }
+
+    operator float() const noexcept
+    {
+        const auto bits = half2float(data_);
+        float ans(0);
+        std::memcpy(&ans, &bits, sizeof(float));
+        return ans;
+    }
+
+    // can not use half operator _op(const half) for half + half
+    // operation will cast it to float and then do float operation such that it
+    // becomes float in the end.
+#define HALF_OPERATOR(_op, _opeq)                                  \
+    friend half operator _op(const half lhf, const half rhf)       \
+    {                                                              \
+        return static_cast<half>(static_cast<float>(lhf)           \
+                                     _op static_cast<float>(rhf)); \
+    }                                                              \
+    half& operator _opeq(const half& hf)                           \
+    {                                                              \
+        auto result = *this _op hf;                                \
+        data_ = result.data_;                                      \
+        return *this;                                              \
+    }
+    HALF_OPERATOR(+, +=)
+    HALF_OPERATOR(-, -=)
+    HALF_OPERATOR(*, *=)
+    HALF_OPERATOR(/, /=)
+
+    // Do operation with different type
+    // If it is floating point, using floating point as type.
+    // If it is integer, using half as type
+#define HALF_FRIEND_OPERATOR(_op, _opeq)                                   \
+    template <typename T>                                                  \
+    friend std::enable_if_t<                                               \
+        !std::is_same<T, half>::value && std::is_scalar<T>::value,         \
+        std::conditional_t<std::is_floating_point<T>::value, T, half>>     \
+    operator _op(const half hf, const T val)                               \
+    {                                                                      \
+        using type =                                                       \
+            std::conditional_t<std::is_floating_point<T>::value, T, half>; \
+        auto result = static_cast<type>(hf);                               \
+        result _opeq static_cast<type>(val);                               \
+        return result;                                                     \
+    }                                                                      \
+    template <typename T>                                                  \
+    friend std::enable_if_t<                                               \
+        !std::is_same<T, half>::value && std::is_scalar<T>::value,         \
+        std::conditional_t<std::is_floating_point<T>::value, T, half>>     \
+    operator _op(const T val, const half hf)                               \
+    {                                                                      \
+        using type =                                                       \
+            std::conditional_t<std::is_floating_point<T>::value, T, half>; \
+        auto result = static_cast<type>(val);                              \
+        result _opeq static_cast<type>(hf);                                \
+        return result;                                                     \
+    }
+
+    HALF_FRIEND_OPERATOR(+, +=)
+    HALF_FRIEND_OPERATOR(-, -=)
+    HALF_FRIEND_OPERATOR(*, *=)
+    HALF_FRIEND_OPERATOR(/, /=)
+
+    // the negative
+    half operator-() const
+    {
+        auto val = 0.0f - *this;
+        return static_cast<half>(val);
+    }
+
+private:
+    using f16_traits = detail::float_traits<float16>;
+    using f32_traits = detail::float_traits<float32>;
+
+    void float2half(float val) noexcept
+    {
+        uint32 bit_val(0);
+        std::memcpy(&bit_val, &val, sizeof(float));
+        data_ = float2half(bit_val);
+    }
+
+    static constexpr uint16 float2half(uint32 data_) noexcept
+    {
+        using conv = detail::precision_converter<float32, float16>;
+        if (f32_traits::is_inf(data_)) {
+            return conv::shift_sign(data_) | f16_traits::exponent_mask;
+        } else if (f32_traits::is_nan(data_)) {
+            return conv::shift_sign(data_) | f16_traits::exponent_mask |
+                   f16_traits::significand_mask;
+        } else {
+            const auto exp = conv::shift_exponent(data_);
+            if (f16_traits::is_inf(exp)) {
+                return conv::shift_sign(data_) | exp;
+            } else if (f16_traits::is_denom(exp)) {
+                // TODO: handle denormals
+                return conv::shift_sign(data_);
+            } else {
+                // Rounding to even
+                const auto result = conv::shift_sign(data_) | exp |
+                                    conv::shift_significand(data_);
+                const auto tail =
+                    data_ & static_cast<f32_traits::bits_type>(
+                                (1 << conv::significand_offset) - 1);
+
+                constexpr auto half = static_cast<f32_traits::bits_type>(
+                    1 << (conv::significand_offset - 1));
+                return result +
+                       (tail > half || ((tail == half) && (result & 1)));
+            }
+        }
+    }
+
+    static constexpr uint32 half2float(uint16 data_) noexcept
+    {
+        using conv = detail::precision_converter<float16, float32>;
+        if (f16_traits::is_inf(data_)) {
+            return conv::shift_sign(data_) | f32_traits::exponent_mask;
+        } else if (f16_traits::is_nan(data_)) {
+            return conv::shift_sign(data_) | f32_traits::exponent_mask |
+                   f32_traits::significand_mask;
+        } else if (f16_traits::is_denom(data_)) {
+            // TODO: handle denormals
+            return conv::shift_sign(data_);
+        } else {
+            return conv::shift_sign(data_) | conv::shift_exponent(data_) |
+                   conv::shift_significand(data_);
+        }
+    }
+
+    uint16 data_;
+};
+
+
+}  // namespace gko
+
+
+namespace std {
+
+
+template <>
+class complex<gko::half> {
+public:
+    using value_type = gko::half;
+
+    complex(const value_type& real = value_type(0.f),
+            const value_type& imag = value_type(0.f))
+        : real_(real), imag_(imag)
+    {}
+
+    template <typename T, typename U,
+              typename = std::enable_if_t<std::is_scalar<T>::value &&
+                                          std::is_scalar<U>::value>>
+    explicit complex(const T& real, const U& imag)
+        : real_(static_cast<value_type>(real)),
+          imag_(static_cast<value_type>(imag))
+    {}
+
+    template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
+    complex(const T& real)
+        : real_(static_cast<value_type>(real)),
+          imag_(static_cast<value_type>(0.f))
+    {}
+
+    // When using complex(real, imag), MSVC with CUDA try to recognize the
+    // complex is a member not constructor.
+    template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
+    explicit complex(const complex<T>& other)
+        : real_(static_cast<value_type>(other.real())),
+          imag_(static_cast<value_type>(other.imag()))
+    {}
+
+    value_type real() const noexcept { return real_; }
+
+    value_type imag() const noexcept { return imag_; }
+
+    operator std::complex<float>() const noexcept
+    {
+        return std::complex<float>(static_cast<float>(real_),
+                                   static_cast<float>(imag_));
+    }
+
+    template <typename V>
+    complex& operator=(const V& val)
+    {
+        real_ = val;
+        imag_ = value_type();
+        return *this;
+    }
+
+    template <typename V>
+    complex& operator=(const std::complex<V>& val)
+    {
+        real_ = val.real();
+        imag_ = val.imag();
+        return *this;
+    }
+
+    complex& operator+=(const value_type& real)
+    {
+        real_ += real;
+        return *this;
+    }
+
+    complex& operator-=(const value_type& real)
+    {
+        real_ -= real;
+        return *this;
+    }
+
+    complex& operator*=(const value_type& real)
+    {
+        real_ *= real;
+        imag_ *= real;
+        return *this;
+    }
+
+    complex& operator/=(const value_type& real)
+    {
+        real_ /= real;
+        imag_ /= real;
+        return *this;
+    }
+
+    template <typename T>
+    complex& operator+=(const complex<T>& val)
+    {
+        real_ += val.real();
+        imag_ += val.imag();
+        return *this;
+    }
+
+    template <typename T>
+    complex& operator-=(const complex<T>& val)
+    {
+        real_ -= val.real();
+        imag_ -= val.imag();
+        return *this;
+    }
+
+    template <typename T>
+    complex& operator*=(const complex<T>& val)
+    {
+        auto val_f = static_cast<std::complex<float>>(val);
+        auto result_f = static_cast<std::complex<float>>(*this);
+        result_f *= val_f;
+        real_ = result_f.real();
+        imag_ = result_f.imag();
+        return *this;
+    }
+
+    template <typename T>
+    complex& operator/=(const complex<T>& val)
+    {
+        auto val_f = static_cast<std::complex<float>>(val);
+        auto result_f = static_cast<std::complex<float>>(*this);
+        result_f /= val_f;
+        real_ = result_f.real();
+        imag_ = result_f.imag();
+        return *this;
+    }
+
+// It's for MacOS.
+// TODO: check whether mac compiler always use complex version even when real
+// half
+#define COMPLEX_HALF_OPERATOR(_op, _opeq)                                \
+    friend complex<gko::half> operator _op(const complex<gko::half> lhf, \
+                                           const complex<gko::half> rhf) \
+    {                                                                    \
+        auto a = lhf;                                                    \
+        a _opeq rhf;                                                     \
+        return a;                                                        \
+    }
+
+    COMPLEX_HALF_OPERATOR(+, +=)
+    COMPLEX_HALF_OPERATOR(-, -=)
+    COMPLEX_HALF_OPERATOR(*, *=)
+    COMPLEX_HALF_OPERATOR(/, /=)
+
+private:
+    value_type real_;
+    value_type imag_;
+};
+
+
+template <>
+struct numeric_limits<gko::half> {
+    static constexpr bool is_specialized{true};
+    static constexpr bool is_signed{true};
+    static constexpr bool is_integer{false};
+    static constexpr bool is_exact{false};
+    static constexpr bool is_bounded{true};
+    static constexpr bool is_modulo{false};
+    static constexpr int digits{
+        gko::detail::float_traits<gko::half>::significand_bits + 1};
+    // 3/10 is approx. log_10(2)
+    static constexpr int digits10{digits * 3 / 10};
+
+    static constexpr gko::half epsilon()
+    {
+        constexpr auto bits = static_cast<std::uint16_t>(0b0'00101'0000000000u);
+        return gko::half::create_from_bits(bits);
+    }
+
+    static constexpr gko::half infinity()
+    {
+        constexpr auto bits = static_cast<std::uint16_t>(0b0'11111'0000000000u);
+        return gko::half::create_from_bits(bits);
+    }
+
+    static constexpr gko::half min()
+    {
+        constexpr auto bits = static_cast<std::uint16_t>(0b0'00001'0000000000u);
+        return gko::half::create_from_bits(bits);
+    }
+
+    static constexpr gko::half max()
+    {
+        constexpr auto bits = static_cast<std::uint16_t>(0b0'11110'1111111111u);
+        return gko::half::create_from_bits(bits);
+    }
+
+    static constexpr gko::half lowest()
+    {
+        constexpr auto bits = static_cast<std::uint16_t>(0b1'11110'1111111111u);
+        return gko::half::create_from_bits(bits);
+    };
+
+    static constexpr gko::half quiet_NaN()
+    {
+        constexpr auto bits = static_cast<std::uint16_t>(0b0'11111'1111111111u);
+        return gko::half::create_from_bits(bits);
+    }
+};
+
+
+// complex using a template on operator= for any kind of complex<T>, so we can
+// do full specialization for half
+template <>
+inline complex<double>& complex<double>::operator=(
+    const std::complex<gko::half>& a)
+{
+    complex<double> t(a.real(), a.imag());
+    operator=(t);
+    return *this;
+}
+
+
+// For MSVC
+template <>
+inline complex<float>& complex<float>::operator=(
+    const std::complex<gko::half>& a)
+{
+    complex<float> t(a.real(), a.imag());
+    operator=(t);
+    return *this;
+}
+
+
+}  // namespace std
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_HALF_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index e9cda520a19..2234232905a 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -25,6 +25,7 @@
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/fwd_decls.hpp>
+#include <ginkgo/core/base/half.hpp>
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/intrinsics.hpp>
 #include <ginkgo/core/base/lin_op.hpp>

From c37b7beeb60241605bf9bde9f8d57cc40151482a Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 23 Oct 2024 00:31:30 +0200
Subject: [PATCH 02/13] half does not have constexpr constructor

---
 core/test/accessor/reduced_row_major_ginkgo.cpp | 17 ++++++++++-------
 core/test/utils.hpp                             | 11 ++++++-----
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/core/test/accessor/reduced_row_major_ginkgo.cpp b/core/test/accessor/reduced_row_major_ginkgo.cpp
index 7acad0b9638..41aaed54457 100644
--- a/core/test/accessor/reduced_row_major_ginkgo.cpp
+++ b/core/test/accessor/reduced_row_major_ginkgo.cpp
@@ -10,11 +10,12 @@
 
 #include <gtest/gtest.h>
 
+#include <ginkgo/core/base/half.hpp>  // necessary for gko::half
+
 #include "accessor/index_span.hpp"
 #include "accessor/range.hpp"
 #include "accessor/reduced_row_major.hpp"
 #include "accessor/utils.hpp"
-#include "core/base/extended_float.hpp"  // necessary for gko::half
 #include "core/test/utils.hpp"
 
 
@@ -33,12 +34,7 @@ class ReducedStorage3d : public ::testing::Test {
     using st_type =
         typename std::tuple_element<1, decltype(ArithmeticStorageType{})>::type;
     using rcar_type = gko::acc::remove_complex_t<ar_type>;
-    static constexpr rcar_type delta{
-        std::is_same<ar_type, st_type>::value
-            ? 0
-            : std::numeric_limits<
-                  gko::acc::remove_complex_t<st_type>>::epsilon() *
-                  1e1};
+    static const rcar_type delta;
 
     // Type for `check_accessor_correctness` to forward the indices
     using t = std::tuple<int, int, int>;
@@ -119,6 +115,13 @@ class ReducedStorage3d : public ::testing::Test {
     }
 };
 
+template <typename T>
+const typename ReducedStorage3d<T>::rcar_type ReducedStorage3d<T>::delta =
+    std::is_same<ar_type, st_type>::value
+        ? 0
+        : std::numeric_limits<gko::acc::remove_complex_t<st_type>>::epsilon() *
+              1e1;
+
 using ReducedStorage3dTypes =
     ::testing::Types<std::tuple<double, double>, std::tuple<double, float>,
                      std::tuple<float, float>, std::tuple<double, gko::half>,
diff --git a/core/test/utils.hpp b/core/test/utils.hpp
index cacc7191bbf..eee2900d731 100644
--- a/core/test/utils.hpp
+++ b/core/test/utils.hpp
@@ -365,15 +365,16 @@ template <typename Precision, typename OutputType>
 struct reduction_factor {
     using nc_output = remove_complex<OutputType>;
     using nc_precision = remove_complex<Precision>;
-    static constexpr nc_output value{
-        std::numeric_limits<nc_precision>::epsilon() * nc_output{10} *
-        (gko::is_complex<Precision>() ? nc_output{1.4142} : one<nc_output>())};
+
+    static const nc_output value;
 };
 
 
 template <typename Precision, typename OutputType>
-constexpr remove_complex<OutputType>
-    reduction_factor<Precision, OutputType>::value;
+const remove_complex<OutputType>
+    reduction_factor<Precision, OutputType>::value =
+        std::numeric_limits<nc_precision>::epsilon() * nc_output{10} *
+        (gko::is_complex<Precision>() ? nc_output{1.4142} : one<nc_output>());
 
 
 }  // namespace test

From afb108bb92c26ad11993c95f9ae90eb43b1e063d Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 22 Oct 2024 15:14:06 +0200
Subject: [PATCH 03/13] fix the undefined behavior and the issue from
 big-endian, and extract half to another test

---
 core/test/base/CMakeLists.txt     |   1 +
 core/test/base/extended_float.cpp | 232 ++----------------------
 core/test/base/half.cpp           | 285 ++++++++++++++++++++++++++++++
 3 files changed, 304 insertions(+), 214 deletions(-)
 create mode 100644 core/test/base/half.cpp

diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt
index d7deeec6fb7..50306c61455 100644
--- a/core/test/base/CMakeLists.txt
+++ b/core/test/base/CMakeLists.txt
@@ -14,6 +14,7 @@ ginkgo_create_test(exception EXECUTABLE_NAME exception_test) # exception collide
 ginkgo_create_test(exception_helpers)
 ginkgo_create_test(extended_float)
 ginkgo_create_test(executor)
+ginkgo_create_test(half)
 ginkgo_create_test(index_range)
 ginkgo_create_test(iterator_factory)
 ginkgo_create_test(lin_op)
diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp
index 818843baa38..bdb7a58ed84 100644
--- a/core/test/base/extended_float.cpp
+++ b/core/test/base/extended_float.cpp
@@ -49,15 +49,28 @@ class ExtendedFloatTestBase : public ::testing::Test {
     static floating<N - 1> create_from_bits(const char (&s)[N])
     {
         auto bits = std::bitset<N - 1>(s).to_ullong();
-        return reinterpret_cast<floating<N - 1>&>(bits);
+        // We cast to the same size of integer type first.
+        // Otherwise, the first memory chunk is different when we use
+        // reinterpret_cast or memcpy to get the smaller type out of unsigned
+        // long long.
+        using bits_type =
+            typename gko::detail::float_traits<floating<N - 1>>::bits_type;
+        auto bits_val = static_cast<bits_type>(bits);
+        floating<N - 1> result;
+        static_assert(sizeof(floating<N - 1>) == sizeof(bits_type),
+                      "the type should have the same size as its bits_type");
+        std::memcpy(&result, &bits_val, sizeof(bits_type));
+        return result;
     }
 
     template <typename T>
     static std::bitset<sizeof(T) * byte_size> get_bits(T val)
     {
-        auto bits =
-            reinterpret_cast<typename gko::detail::float_traits<T>::bits_type&>(
-                val);
+        using bits_type = typename gko::detail::float_traits<T>::bits_type;
+        bits_type bits;
+        static_assert(sizeof(T) == sizeof(bits_type),
+                      "the type should have the same size as its bits_type");
+        std::memcpy(&bits, &val, sizeof(T));
         return std::bitset<sizeof(T) * byte_size>(bits);
     }
 
@@ -69,218 +82,9 @@ class ExtendedFloatTestBase : public ::testing::Test {
 };
 
 
-class FloatToHalf : public ExtendedFloatTestBase {};
-
-
-// clang-format does terrible formatting of string literal concatenation
-// clang-format off
-
-
-TEST_F(FloatToHalf, ConvertsOne)
-{
-    half x = create_from_bits("0" "01111111" "00000000000000000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "01111" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, ConvertsZero)
-{
-    half x = create_from_bits("0" "00000000" "00000000000000000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, ConvertsInf)
-{
-    half x = create_from_bits("0" "11111111" "00000000000000000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, ConvertsNegInf)
-{
-    half x = create_from_bits("1" "11111111" "00000000000000000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, ConvertsNan)
-{
-    half x = create_from_bits("0" "11111111" "00000000000000000000001");
-
-    #if defined(SYCL_LANGUAGE_VERSION) 
-    // Sycl put the 1000000000, but ours put mask
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000"));
-    #else
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111"));
-    #endif
-}
-
-
-TEST_F(FloatToHalf, ConvertsNegNan)
-{
-    half x = create_from_bits("1" "11111111" "00010000000000000000000");
-
-    #if defined(SYCL_LANGUAGE_VERSION)
-    // Sycl put the 1000000000, but ours put mask
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000"));
-    #else
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111"));
-    #endif
-}
-
-
-TEST_F(FloatToHalf, FlushesToZero)
-{
-    half x = create_from_bits("0" "00000111" "00010001000100000001000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, FlushesToNegZero)
-{
-    half x = create_from_bits("1" "00000010" "00010001000100000001000");
-
-    ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, FlushesToInf)
-{
-    half x = create_from_bits("0" "10100000" "10010000000000010000100");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, FlushesToNegInf)
-{
-    half x = create_from_bits("1" "11000000" "10010000000000010000100");
-
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000"));
-}
-
-
-TEST_F(FloatToHalf, TruncatesSmallNumber)
-{
-    half x = create_from_bits("0" "01110001" "10010000000000010000100");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "00001" "1001000000"));
-}
-
-
-TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven)
-{
-    half neg_x = create_from_bits("1" "10001110" "10010011111000010000100");
-    half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100");
-    half x = create_from_bits("0" "10001110" "10010011111000010000100");
-    half x2 = create_from_bits("0" "10001110" "10010011101000010000100");
-    half x3 = create_from_bits("0" "10001110" "10010011101000000000000");
-    half x4 = create_from_bits("0" "10001110" "10010011111000000000000");
-
-    EXPECT_EQ(get_bits(x), get_bits("0" "11110" "1001010000"));
-    EXPECT_EQ(get_bits(x2), get_bits("0" "11110" "1001001111"));
-    EXPECT_EQ(get_bits(x3), get_bits("0" "11110" "1001001110"));
-    EXPECT_EQ(get_bits(x4), get_bits("0" "11110" "1001010000"));
-    EXPECT_EQ(get_bits(neg_x), get_bits("1" "11110" "1001010000"));
-    EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111"));
-}
-
-
-// clang-format on
-
-
-class HalfToFloat : public ExtendedFloatTestBase {};
-
-
-// clang-format off
-
-
-TEST_F(HalfToFloat, ConvertsOne)
-{
-    float x = create_from_bits("0" "01111" "0000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "01111111" "00000000000000000000000"));
-}
-
-
-TEST_F(HalfToFloat, ConvertsZero)
-{
-    float x = create_from_bits("0" "00000" "0000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "00000000" "00000000000000000000000"));
-}
-
-
-TEST_F(HalfToFloat, ConvertsInf)
-{
-    float x = create_from_bits("0" "11111" "0000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00000000000000000000000"));
-}
-
-
-TEST_F(HalfToFloat, ConvertsNegInf)
-{
-    float x = create_from_bits("1" "11111" "0000000000");
-
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000000000000000000"));
-}
-
-
-TEST_F(HalfToFloat, ConvertsNan)
-{
-    float x = create_from_bits("0" "11111" "0001001000");
-
-    #if defined(SYCL_LANGUAGE_VERSION) 
-    // sycl keeps significand
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000"));
-    #else
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111"));
-    #endif
-}
-
-
-TEST_F(HalfToFloat, ConvertsNegNan)
-{
-    float x = create_from_bits("1" "11111" "0000000001");
-
-    #if defined(SYCL_LANGUAGE_VERSION) 
-    // sycl keeps significand
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000"));
-    #else
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111"));
-    #endif
-}
-
-
-TEST_F(HalfToFloat, ExtendsSmallNumber)
-{
-    float x = create_from_bits("0" "00001" "1000010001");
-
-    ASSERT_EQ(get_bits(x), get_bits("0" "01110001" "10000100010000000000000"));
-}
-
-
-TEST_F(HalfToFloat, ExtendsLargeNumber)
-{
-    float x = create_from_bits("1" "11110" "1001001111");
-
-    ASSERT_EQ(get_bits(x), get_bits("1" "10001110" "10010011110000000000000"));
-}
-
-
-// clang-format on
-
-
 class TruncatedDouble : public ExtendedFloatTestBase {};
 
-
+// clang-format does terrible formatting of string literal concatenation
 // clang-format off
 
 
diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp
new file mode 100644
index 00000000000..f20bac0d47a
--- /dev/null
+++ b/core/test/base/half.cpp
@@ -0,0 +1,285 @@
+// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include <bitset>
+#include <cstring>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include <ginkgo/core/base/half.hpp>
+
+
+template <std::size_t N>
+struct floating_impl;
+
+template <>
+struct floating_impl<16> {
+    using type = gko::half;
+};
+
+template <>
+struct floating_impl<32> {
+    using type = float;
+};
+
+template <>
+struct floating_impl<64> {
+    using type = double;
+};
+
+template <std::size_t N>
+using floating = typename floating_impl<N>::type;
+
+
+class ExtendedFloatTestBase : public ::testing::Test {
+protected:
+    using half = gko::half;
+
+    static constexpr auto byte_size = gko::byte_size;
+
+    template <std::size_t N>
+    static floating<N - 1> create_from_bits(const char (&s)[N])
+    {
+        auto bits = std::bitset<N - 1>(s).to_ullong();
+        // We cast to the same size of integer type first.
+        // Otherwise, the first memory chunk is different when we use
+        // reinterpret_cast or memcpy to get the smaller type out of unsigned
+        // long long.
+        using bits_type =
+            typename gko::detail::float_traits<floating<N - 1>>::bits_type;
+        auto bits_val = static_cast<bits_type>(bits);
+        floating<N - 1> result;
+        static_assert(sizeof(floating<N - 1>) == sizeof(bits_type),
+                      "the type should have the same size as its bits_type");
+        std::memcpy(&result, &bits_val, sizeof(bits_type));
+        return result;
+    }
+
+    template <typename T>
+    static std::bitset<sizeof(T) * byte_size> get_bits(T val)
+    {
+        using bits_type = typename gko::detail::float_traits<T>::bits_type;
+        bits_type bits;
+        static_assert(sizeof(T) == sizeof(bits_type),
+                      "the type should have the same size as its bits_type");
+        std::memcpy(&bits, &val, sizeof(T));
+        return std::bitset<sizeof(T) * byte_size>(bits);
+    }
+
+    template <std::size_t N>
+    static std::bitset<N - 1> get_bits(const char (&s)[N])
+    {
+        return std::bitset<N - 1>(s);
+    }
+};
+
+
+class FloatToHalf : public ExtendedFloatTestBase {};
+
+
+// clang-format does terrible formatting of string literal concatenation
+// clang-format off
+
+
+TEST_F(FloatToHalf, ConvertsOne)
+{
+    half x = create_from_bits("0" "01111111" "00000000000000000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "01111" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, ConvertsZero)
+{
+    half x = create_from_bits("0" "00000000" "00000000000000000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, ConvertsInf)
+{
+    half x = create_from_bits("0" "11111111" "00000000000000000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, ConvertsNegInf)
+{
+    half x = create_from_bits("1" "11111111" "00000000000000000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, ConvertsNan)
+{
+    half x = create_from_bits("0" "11111111" "00000000000000000000001");
+
+    #if defined(SYCL_LANGUAGE_VERSION) 
+    // Sycl put the 1000000000, but ours put mask
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000"));
+    #else
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111"));
+    #endif
+}
+
+
+TEST_F(FloatToHalf, ConvertsNegNan)
+{
+    half x = create_from_bits("1" "11111111" "00010000000000000000000");
+
+    #if defined(SYCL_LANGUAGE_VERSION)
+    // Sycl put the 1000000000, but ours put mask
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000"));
+    #else
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111"));
+    #endif
+}
+
+
+TEST_F(FloatToHalf, FlushesToZero)
+{
+    half x = create_from_bits("0" "00000111" "00010001000100000001000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, FlushesToNegZero)
+{
+    half x = create_from_bits("1" "00000010" "00010001000100000001000");
+
+    ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, FlushesToInf)
+{
+    half x = create_from_bits("0" "10100000" "10010000000000010000100");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, FlushesToNegInf)
+{
+    half x = create_from_bits("1" "11000000" "10010000000000010000100");
+
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "0000000000"));
+}
+
+
+TEST_F(FloatToHalf, TruncatesSmallNumber)
+{
+    half x = create_from_bits("0" "01110001" "10010000000000010000100");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "00001" "1001000000"));
+}
+
+
+TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven)
+{
+    half neg_x = create_from_bits("1" "10001110" "10010011111000010000100");
+    half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100");
+    half x = create_from_bits("0" "10001110" "10010011111000010000100");
+    half x2 = create_from_bits("0" "10001110" "10010011101000010000100");
+    half x3 = create_from_bits("0" "10001110" "10010011101000000000000");
+    half x4 = create_from_bits("0" "10001110" "10010011111000000000000");
+
+    EXPECT_EQ(get_bits(x), get_bits("0" "11110" "1001010000"));
+    EXPECT_EQ(get_bits(x2), get_bits("0" "11110" "1001001111"));
+    EXPECT_EQ(get_bits(x3), get_bits("0" "11110" "1001001110"));
+    EXPECT_EQ(get_bits(x4), get_bits("0" "11110" "1001010000"));
+    EXPECT_EQ(get_bits(neg_x), get_bits("1" "11110" "1001010000"));
+    EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111"));
+}
+
+
+// clang-format on
+
+
+class HalfToFloat : public ExtendedFloatTestBase {};
+
+
+// clang-format off
+
+
+TEST_F(HalfToFloat, ConvertsOne)
+{
+    float x = create_from_bits("0" "01111" "0000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "01111111" "00000000000000000000000"));
+}
+
+
+TEST_F(HalfToFloat, ConvertsZero)
+{
+    float x = create_from_bits("0" "00000" "0000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "00000000" "00000000000000000000000"));
+}
+
+
+TEST_F(HalfToFloat, ConvertsInf)
+{
+    float x = create_from_bits("0" "11111" "0000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00000000000000000000000"));
+}
+
+
+TEST_F(HalfToFloat, ConvertsNegInf)
+{
+    float x = create_from_bits("1" "11111" "0000000000");
+
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000000000000000000"));
+}
+
+
+TEST_F(HalfToFloat, ConvertsNan)
+{
+    float x = create_from_bits("0" "11111" "0001001000");
+
+    #if defined(SYCL_LANGUAGE_VERSION) 
+    // sycl keeps significand
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000"));
+    #else
+    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111"));
+    #endif
+}
+
+
+TEST_F(HalfToFloat, ConvertsNegNan)
+{
+    float x = create_from_bits("1" "11111" "0000000001");
+
+    #if defined(SYCL_LANGUAGE_VERSION) 
+    // sycl keeps significand
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000"));
+    #else
+    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111"));
+    #endif
+}
+
+
+TEST_F(HalfToFloat, ExtendsSmallNumber)
+{
+    float x = create_from_bits("0" "00001" "1000010001");
+
+    ASSERT_EQ(get_bits(x), get_bits("0" "01110001" "10000100010000000000000"));
+}
+
+
+TEST_F(HalfToFloat, ExtendsLargeNumber)
+{
+    float x = create_from_bits("1" "11110" "1001001111");
+
+    ASSERT_EQ(get_bits(x), get_bits("1" "10001110" "10010011110000000000000"));
+}
+
+
+// clang-format on

From 3b46e410b45d9f706bbdc01631e4953255a01250 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 22 Oct 2024 17:34:34 +0200
Subject: [PATCH 04/13] jacobi use __half in device not gko::half now

---
 .../jacobi_advanced_apply_kernels.instantiate.cpp      |  2 +-
 .../jacobi_generate_kernels.instantiate.cpp            | 10 +++++-----
 common/cuda_hip/preconditioner/jacobi_kernels.cpp      |  4 ++--
 .../jacobi_simple_apply_kernels.instantiate.cpp        |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp
index 0ecc3d0d44b..131c530d2ee 100644
--- a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp
+++ b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernels.instantiate.cpp
@@ -90,7 +90,7 @@ __launch_bounds__(warps_per_block* config::warp_size) advanced_adaptive_apply(
         ValueType, block_precisions[block_id],
         multiply_vec<max_block_size>(
             subwarp, block_size, v,
-            reinterpret_cast<const resolved_precision*>(
+            reinterpret_cast<const device_type<resolved_precision>*>(
                 blocks + storage_scheme.get_group_offset(block_id)) +
                 storage_scheme.get_block_offset(block_id) +
                 subwarp.thread_rank(),
diff --git a/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp b/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp
index d004309c622..fdb0ad11e9e 100644
--- a/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp
+++ b/common/cuda_hip/preconditioner/jacobi_generate_kernels.instantiate.cpp
@@ -68,7 +68,7 @@ __device__ __forceinline__ bool validate_precision_reduction_feasibility(
         }
     }
 
-    return succeeded && block_cond >= 1.0 &&
+    return succeeded && block_cond >= remove_complex<ValueType>{1.0} &&
            block_cond * static_cast<remove_complex<ValueType>>(
                             float_traits<remove_complex<ValueType>>::eps) <
                remove_complex<ValueType>{1e-3};
@@ -160,7 +160,7 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_generate(
                 accuracy, block_cond,
                 [&subwarp, &block_size, &row, &block_data, &storage_scheme,
                  &block_id] {
-                    using target = reduce_precision<ValueType>;
+                    using target = device_type<reduce_precision<ValueType>>;
                     return validate_precision_reduction_feasibility<
                         max_block_size, target>(
                         subwarp, block_size, row,
@@ -170,8 +170,8 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_generate(
                 },
                 [&subwarp, &block_size, &row, &block_data, &storage_scheme,
                  &block_id] {
-                    using target =
-                        reduce_precision<reduce_precision<ValueType>>;
+                    using target = device_type<
+                        reduce_precision<reduce_precision<ValueType>>>;
                     return validate_precision_reduction_feasibility<
                         max_block_size, target>(
                         subwarp, block_size, row,
@@ -195,7 +195,7 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_generate(
             ValueType, prec,
             copy_matrix<max_block_size, and_transpose>(
                 subwarp, block_size, row, 1, perm, trans_perm,
-                reinterpret_cast<resolved_precision*>(
+                reinterpret_cast<device_type<resolved_precision>*>(
                     block_data + storage_scheme.get_group_offset(block_id)) +
                     storage_scheme.get_block_offset(block_id),
                 storage_scheme.get_stride()));
diff --git a/common/cuda_hip/preconditioner/jacobi_kernels.cpp b/common/cuda_hip/preconditioner/jacobi_kernels.cpp
index f3b099e7c18..6f2d4ae3974 100644
--- a/common/cuda_hip/preconditioner/jacobi_kernels.cpp
+++ b/common/cuda_hip/preconditioner/jacobi_kernels.cpp
@@ -206,11 +206,11 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_transpose_jacobi(
     GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(
         ValueType, block_precisions[block_id],
         auto local_block =
-            reinterpret_cast<const resolved_precision*>(
+            reinterpret_cast<const device_type<resolved_precision>*>(
                 blocks + storage_scheme.get_group_offset(block_id)) +
             storage_scheme.get_block_offset(block_id);
         auto local_out_block =
-            reinterpret_cast<resolved_precision*>(
+            reinterpret_cast<device_type<resolved_precision>*>(
                 out_blocks + storage_scheme.get_group_offset(block_id)) +
             storage_scheme.get_block_offset(block_id);
         for (int i = rank; i < block_size * block_size; i += subwarp_size) {
diff --git a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp
index 734385970e3..faf869718a6 100644
--- a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp
+++ b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernels.instantiate.cpp
@@ -84,7 +84,7 @@ __global__ void __launch_bounds__(warps_per_block* config::warp_size)
         ValueType, block_precisions[block_id],
         multiply_vec<max_block_size>(
             subwarp, block_size, v,
-            reinterpret_cast<const resolved_precision*>(
+            reinterpret_cast<const device_type<resolved_precision>*>(
                 blocks + storage_scheme.get_group_offset(block_id)) +
                 storage_scheme.get_block_offset(block_id) +
                 subwarp.thread_rank(),

From 685331c0c8feedddf0ab41a683f6bd62460cc435 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 22 Oct 2024 18:02:57 +0200
Subject: [PATCH 05/13] type map

---
 accessor/cuda_helper.hpp          | 15 ++++++++++++-
 accessor/hip_helper.hpp           | 14 +++++++++++-
 cuda/base/types.hpp               | 36 +++++++++++++++++++++++++------
 hip/base/types.hip.hpp            | 33 ++++++++++++++++++++++++----
 include/ginkgo/core/base/math.hpp |  9 +++++++-
 5 files changed, 94 insertions(+), 13 deletions(-)

diff --git a/accessor/cuda_helper.hpp b/accessor/cuda_helper.hpp
index 31d3599516d..3efc6eb22b7 100644
--- a/accessor/cuda_helper.hpp
+++ b/accessor/cuda_helper.hpp
@@ -17,7 +17,15 @@
 #include "utils.hpp"
 
 
+struct __half;
+
+
 namespace gko {
+
+
+class half;
+
+
 namespace acc {
 namespace detail {
 
@@ -27,6 +35,11 @@ struct cuda_type {
     using type = T;
 };
 
+template <>
+struct cuda_type<gko::half> {
+    using type = __half;
+};
+
 // Unpack cv and reference / pointer qualifiers
 template <typename T>
 struct cuda_type<const T> {
@@ -57,7 +70,7 @@ struct cuda_type<T&&> {
 // Transform std::complex to thrust::complex
 template <typename T>
 struct cuda_type<std::complex<T>> {
-    using type = thrust::complex<T>;
+    using type = thrust::complex<typename cuda_type<T>::type>;
 };
 
 
diff --git a/accessor/hip_helper.hpp b/accessor/hip_helper.hpp
index 6b76b726c10..8827fd6eb11 100644
--- a/accessor/hip_helper.hpp
+++ b/accessor/hip_helper.hpp
@@ -17,7 +17,15 @@
 #include "utils.hpp"
 
 
+struct __half;
+
+
 namespace gko {
+
+
+class half;
+
+
 namespace acc {
 namespace detail {
 
@@ -53,11 +61,15 @@ struct hip_type<T&&> {
     using type = typename hip_type<T>::type&&;
 };
 
+template <>
+struct hip_type<gko::half> {
+    using type = __half;
+};
 
 // Transform std::complex to thrust::complex
 template <typename T>
 struct hip_type<std::complex<T>> {
-    using type = thrust::complex<T>;
+    using type = thrust::complex<typename hip_type<T>::type>;
 };
 
 
diff --git a/cuda/base/types.hpp b/cuda/base/types.hpp
index a4a2b877c28..05f07ceb8dd 100644
--- a/cuda/base/types.hpp
+++ b/cuda/base/types.hpp
@@ -14,20 +14,17 @@
 #include <cusparse.h>
 #include <thrust/complex.h>
 
+#include <ginkgo/core/base/half.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/types.hpp>
 
 
 namespace gko {
 
-
 namespace kernels {
 namespace cuda {
-
-
 namespace detail {
 
-
 /**
  * @internal
  *
@@ -124,6 +121,17 @@ struct culibs_type_impl<std::complex<double>> {
     using type = cuDoubleComplex;
 };
 
+
+template <>
+struct culibs_type_impl<half> {
+    using type = __half;
+};
+
+template <>
+struct culibs_type_impl<std::complex<half>> {
+    using type = __half2;
+};
+
 template <typename T>
 struct culibs_type_impl<thrust::complex<T>> {
     using type = typename culibs_type_impl<std::complex<T>>::type;
@@ -154,9 +162,14 @@ struct cuda_type_impl<volatile T> {
     using type = volatile typename cuda_type_impl<T>::type;
 };
 
+template <>
+struct cuda_type_impl<half> {
+    using type = __half;
+};
+
 template <typename T>
 struct cuda_type_impl<std::complex<T>> {
-    using type = thrust::complex<T>;
+    using type = thrust::complex<typename cuda_type_impl<T>::type>;
 };
 
 template <>
@@ -169,6 +182,11 @@ struct cuda_type_impl<cuComplex> {
     using type = thrust::complex<float>;
 };
 
+template <>
+struct cuda_type_impl<__half2> {
+    using type = thrust::complex<__half>;
+};
+
 template <typename T>
 struct cuda_struct_member_type_impl {
     using type = T;
@@ -176,7 +194,12 @@ struct cuda_struct_member_type_impl {
 
 template <typename T>
 struct cuda_struct_member_type_impl<std::complex<T>> {
-    using type = fake_complex<T>;
+    using type = fake_complex<typename cuda_struct_member_type_impl<T>::type>;
+};
+
+template <>
+struct cuda_struct_member_type_impl<gko::half> {
+    using type = __half;
 };
 
 template <typename ValueType, typename IndexType>
@@ -200,6 +223,7 @@ GKO_CUDA_DATA_TYPE(float, CUDA_R_32F);
 GKO_CUDA_DATA_TYPE(double, CUDA_R_64F);
 GKO_CUDA_DATA_TYPE(std::complex<float>, CUDA_C_32F);
 GKO_CUDA_DATA_TYPE(std::complex<double>, CUDA_C_64F);
+GKO_CUDA_DATA_TYPE(std::complex<float16>, CUDA_C_16F);
 GKO_CUDA_DATA_TYPE(int32, CUDA_R_32I);
 GKO_CUDA_DATA_TYPE(int8, CUDA_R_8I);
 
diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp
index bb0d4a2d0c9..c3982b7562e 100644
--- a/hip/base/types.hip.hpp
+++ b/hip/base/types.hip.hpp
@@ -21,14 +21,13 @@
 #endif
 #include <thrust/complex.h>
 
+#include <ginkgo/core/base/half.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
 
 #include "common/cuda_hip/base/runtime.hpp"
 
 
 namespace gko {
-
-
 namespace kernels {
 namespace hip {
 namespace detail {
@@ -130,6 +129,17 @@ struct hiplibs_type_impl<std::complex<double>> {
     using type = hipDoubleComplex;
 };
 
+template <>
+struct hiplibs_type_impl<half> {
+    using type = __half;
+};
+
+template <>
+struct hiplibs_type_impl<std::complex<half>> {
+    using type = __half2;
+};
+
+
 template <typename T>
 struct hiplibs_type_impl<thrust::complex<T>> {
     using type = typename hiplibs_type_impl<std::complex<T>>::type;
@@ -202,9 +212,14 @@ struct hip_type_impl<volatile T> {
     using type = volatile typename hip_type_impl<T>::type;
 };
 
+template <>
+struct hip_type_impl<gko::half> {
+    using type = __half;
+};
+
 template <typename T>
 struct hip_type_impl<std::complex<T>> {
-    using type = thrust::complex<T>;
+    using type = thrust::complex<typename hip_type_impl<T>::type>;
 };
 
 template <>
@@ -217,6 +232,11 @@ struct hip_type_impl<hipComplex> {
     using type = thrust::complex<float>;
 };
 
+template <>
+struct hip_type_impl<__half2> {
+    using type = thrust::complex<__half>;
+};
+
 template <typename T>
 struct hip_struct_member_type_impl {
     using type = T;
@@ -224,7 +244,12 @@ struct hip_struct_member_type_impl {
 
 template <typename T>
 struct hip_struct_member_type_impl<std::complex<T>> {
-    using type = fake_complex<T>;
+    using type = fake_complex<typename hip_struct_member_type_impl<T>::type>;
+};
+
+template <>
+struct hip_struct_member_type_impl<gko::half> {
+    using type = __half;
 };
 
 template <typename ValueType, typename IndexType>
diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp
index cd5e489b95d..5e15bb05d6a 100644
--- a/include/ginkgo/core/base/math.hpp
+++ b/include/ginkgo/core/base/math.hpp
@@ -21,6 +21,9 @@
 namespace gko {
 
 
+class half;
+
+
 // HIP should not see std::abs or std::sqrt, we want the custom implementation.
 // Hence, provide the using declaration only for some cases
 namespace kernels {
@@ -151,8 +154,12 @@ struct is_complex_impl<std::complex<T>>
 template <typename T>
 struct is_complex_or_scalar_impl : std::is_scalar<T> {};
 
+template <>
+struct is_complex_or_scalar_impl<half> : std::true_type {};
+
 template <typename T>
-struct is_complex_or_scalar_impl<std::complex<T>> : std::is_scalar<T> {};
+struct is_complex_or_scalar_impl<std::complex<T>>
+    : is_complex_or_scalar_impl<T> {};
 
 
 /**

From 9c51790640fca65ec8d8a1c76ab27aa7478f7a08 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 23 Oct 2024 13:50:49 +0200
Subject: [PATCH 06/13] fix error: non-constant-expression cannot be narrowed

---
 reference/test/base/batch_multi_vector_kernels.cpp |  4 ++--
 reference/test/matrix/coo_kernels.cpp              | 14 ++++++++------
 reference/test/matrix/csr_kernels.cpp              | 14 ++++++++------
 reference/test/matrix/dense_kernels.cpp            |  4 ++--
 reference/test/matrix/diagonal_kernels.cpp         | 14 ++++++++------
 reference/test/matrix/ell_kernels.cpp              | 14 ++++++++------
 reference/test/matrix/fbcsr_kernels.cpp            | 14 ++++++++------
 reference/test/matrix/hybrid_kernels.cpp           | 14 ++++++++------
 reference/test/matrix/sellp_kernels.cpp            | 14 ++++++++------
 test/mpi/matrix.cpp                                |  4 ++--
 10 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index e673046a490..694ae491ef4 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -349,7 +349,7 @@ TYPED_TEST(MultiVector, ConvertsToPrecision)
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
-                        : gko::remove_complex<T>{r<OtherT>::value};
+                        : static_cast<gko::remove_complex<T>>(r<OtherT>::value);
 
     this->mtx_1->convert_to(tmp.get());
     tmp->convert_to(res.get());
@@ -373,7 +373,7 @@ TYPED_TEST(MultiVector, MovesToPrecision)
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
-                        : gko::remove_complex<T>{r<OtherT>::value};
+                        : static_cast<gko::remove_complex<T>>(r<OtherT>::value);
 
     this->mtx_1->move_to(tmp.get());
     tmp->move_to(res.get());
diff --git a/reference/test/matrix/coo_kernels.cpp b/reference/test/matrix/coo_kernels.cpp
index 42b68d1cb4c..fcca61a33d4 100644
--- a/reference/test/matrix/coo_kernels.cpp
+++ b/reference/test/matrix/coo_kernels.cpp
@@ -85,9 +85,10 @@ TYPED_TEST(Coo, ConvertsToPrecision)
     auto tmp = OtherCoo::create(this->exec);
     auto res = Coo::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx->convert_to(tmp);
     tmp->convert_to(res);
@@ -106,9 +107,10 @@ TYPED_TEST(Coo, MovesToPrecision)
     auto tmp = OtherCoo::create(this->exec);
     auto res = Coo::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx->move_to(tmp);
     tmp->move_to(res);
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index 2d4c61786ad..2dd68bd9239 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -794,9 +794,10 @@ TYPED_TEST(Csr, ConvertsToPrecision)
     auto tmp = OtherCsr::create(this->exec);
     auto res = Csr::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     // use mtx2 as mtx's strategy would involve creating a CudaExecutor
     this->mtx2->convert_to(tmp);
@@ -819,9 +820,10 @@ TYPED_TEST(Csr, MovesToPrecision)
     auto tmp = OtherCsr::create(this->exec);
     auto res = Csr::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     // use mtx2 as mtx's strategy would involve creating a CudaExecutor
     this->mtx2->move_to(tmp);
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 41294c89d49..51b0aa148fd 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -753,7 +753,7 @@ TYPED_TEST(Dense, ConvertsToPrecision)
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
-                        : gko::remove_complex<T>{r<OtherT>::value};
+                        : static_cast<gko::remove_complex<T>>(r<OtherT>::value);
 
     this->mtx1->convert_to(tmp);
     tmp->convert_to(res);
@@ -773,7 +773,7 @@ TYPED_TEST(Dense, MovesToPrecision)
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
-                        : gko::remove_complex<T>{r<OtherT>::value};
+                        : static_cast<gko::remove_complex<T>>(r<OtherT>::value);
 
     this->mtx1->move_to(tmp);
     tmp->move_to(res);
diff --git a/reference/test/matrix/diagonal_kernels.cpp b/reference/test/matrix/diagonal_kernels.cpp
index 208c9d98639..b0932c7eb66 100644
--- a/reference/test/matrix/diagonal_kernels.cpp
+++ b/reference/test/matrix/diagonal_kernels.cpp
@@ -91,9 +91,10 @@ TYPED_TEST(Diagonal, ConvertsToPrecision)
     auto tmp = OtherDiagonal::create(this->exec);
     auto res = Diagonal::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->diag1->convert_to(tmp);
     tmp->convert_to(res);
@@ -111,9 +112,10 @@ TYPED_TEST(Diagonal, MovesToPrecision)
     auto tmp = OtherDiagonal::create(this->exec);
     auto res = Diagonal::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->diag1->move_to(tmp);
     tmp->move_to(res);
diff --git a/reference/test/matrix/ell_kernels.cpp b/reference/test/matrix/ell_kernels.cpp
index c96dcae773a..e1eef9f087c 100644
--- a/reference/test/matrix/ell_kernels.cpp
+++ b/reference/test/matrix/ell_kernels.cpp
@@ -449,9 +449,10 @@ TYPED_TEST(Ell, ConvertsToPrecision)
     auto tmp = OtherEll::create(this->exec);
     auto res = Ell::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx1->convert_to(tmp);
     tmp->convert_to(res);
@@ -470,9 +471,10 @@ TYPED_TEST(Ell, MovesToPrecision)
     auto tmp = OtherEll::create(this->exec);
     auto res = Ell::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx1->move_to(tmp);
     tmp->move_to(res);
diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp
index cd82bade8b7..f7c6d2197ef 100644
--- a/reference/test/matrix/fbcsr_kernels.cpp
+++ b/reference/test/matrix/fbcsr_kernels.cpp
@@ -277,9 +277,10 @@ TYPED_TEST(Fbcsr, ConvertsToPrecision)
     auto tmp = OtherFbcsr::create(this->exec);
     auto res = Fbcsr::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx->convert_to(tmp);
     tmp->convert_to(res);
@@ -298,9 +299,10 @@ TYPED_TEST(Fbcsr, MovesToPrecision)
     auto tmp = OtherFbcsr::create(this->exec);
     auto res = Fbcsr::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx->move_to(tmp);
     tmp->move_to(res);
diff --git a/reference/test/matrix/hybrid_kernels.cpp b/reference/test/matrix/hybrid_kernels.cpp
index 014b5bb1024..754e599b8fe 100644
--- a/reference/test/matrix/hybrid_kernels.cpp
+++ b/reference/test/matrix/hybrid_kernels.cpp
@@ -239,9 +239,10 @@ TYPED_TEST(Hybrid, ConvertsToPrecision)
     auto tmp = OtherHybrid::create(this->exec);
     auto res = Hybrid::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx1->convert_to(tmp);
     tmp->convert_to(res);
@@ -260,9 +261,10 @@ TYPED_TEST(Hybrid, MovesToPrecision)
     auto tmp = OtherHybrid::create(this->exec);
     auto res = Hybrid::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx1->move_to(tmp);
     tmp->move_to(res);
diff --git a/reference/test/matrix/sellp_kernels.cpp b/reference/test/matrix/sellp_kernels.cpp
index 18cf793c7f3..a39d8e16832 100644
--- a/reference/test/matrix/sellp_kernels.cpp
+++ b/reference/test/matrix/sellp_kernels.cpp
@@ -195,9 +195,10 @@ TYPED_TEST(Sellp, ConvertsToPrecision)
     auto tmp = OtherSellp::create(this->exec);
     auto res = Sellp::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx1->convert_to(tmp);
     tmp->convert_to(res);
@@ -216,9 +217,10 @@ TYPED_TEST(Sellp, MovesToPrecision)
     auto tmp = OtherSellp::create(this->exec);
     auto res = Sellp::create(this->exec);
     // If OtherType is more precise: 0, otherwise r
-    auto residual = r<OtherType>::value < r<ValueType>::value
-                        ? gko::remove_complex<ValueType>{0}
-                        : gko::remove_complex<ValueType>{r<OtherType>::value};
+    auto residual =
+        r<OtherType>::value < r<ValueType>::value
+            ? gko::remove_complex<ValueType>{0}
+            : static_cast<gko::remove_complex<ValueType>>(r<OtherType>::value);
 
     this->mtx1->move_to(tmp);
     tmp->move_to(res);
diff --git a/test/mpi/matrix.cpp b/test/mpi/matrix.cpp
index 0cfb3aca477..88fe4092668 100644
--- a/test/mpi/matrix.cpp
+++ b/test/mpi/matrix.cpp
@@ -741,7 +741,7 @@ TYPED_TEST(Matrix, CanConvertToNextPrecision)
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
-                        : gko::remove_complex<T>{r<OtherT>::value};
+                        : static_cast<gko::remove_complex<T>>(r<OtherT>::value);
 
     this->dist_mat->convert_to(tmp);
     tmp->convert_to(res);
@@ -768,7 +768,7 @@ TYPED_TEST(Matrix, CanMoveToNextPrecision)
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
-                        : gko::remove_complex<T>{r<OtherT>::value};
+                        : static_cast<gko::remove_complex<T>>(r<OtherT>::value);
 
     this->dist_mat->move_to(tmp);
     tmp->convert_to(res);

From 6317e3f67fcc8a304eda5172372cbd215ef62243 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 23 Oct 2024 13:57:12 +0200
Subject: [PATCH 07/13] update gdb-ginkgo

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 dev_tools/scripts/gdb-ginkgo.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/dev_tools/scripts/gdb-ginkgo.py b/dev_tools/scripts/gdb-ginkgo.py
index d3de8f09a25..122d177031f 100644
--- a/dev_tools/scripts/gdb-ginkgo.py
+++ b/dev_tools/scripts/gdb-ginkgo.py
@@ -51,6 +51,7 @@ def next(self):
 
 _versioned_namespace = '__8::'
 
+
 # new version adapted from https://gcc.gnu.org/pipermail/gcc-cvs/2021-November/356230.html
 # necessary due to empty class optimization
 def is_specialization_of(x, template_name):
@@ -64,6 +65,7 @@ def is_specialization_of(x, template_name):
         expr = '^std::{}<.*>$'.format(template_name)
     return re.match(expr, x) is not None
 
+
 def get_template_arg_list(type_obj):
     "Return a type's template arguments as a list"
     n = 0
@@ -75,6 +77,7 @@ def get_template_arg_list(type_obj):
             return template_args
         n += 1
 
+
 def _tuple_impl_get(val):
     "Return the tuple element stored in a _Tuple_impl<N, T> base class."
     bases = val.type.fields()
@@ -95,6 +98,7 @@ def _tuple_impl_get(val):
     else:
         raise ValueError("Unsupported implementation for std::tuple: %s" % str(val.type))
 
+
 def tuple_get(n, val):
     "Return the result of std::get<n>(val) on a std::tuple"
     tuple_size = len(get_template_arg_list(val.type))
@@ -108,6 +112,7 @@ def tuple_get(n, val):
         n -= 1
     return _tuple_impl_get(node)
 
+
 def get_unique_ptr_data_ptr(val):
     "Return the result of val.get() on a std::unique_ptr"
     # std::unique_ptr<T, D> contains a std::tuple<D::pointer, D>,
@@ -220,12 +225,28 @@ def display_hint(self):
         return 'array'
 
 
+class GkoHalfPrinter:
+    "Print a gko::half"
+
+    def __init__(self, val):
+        # GDB doesn't seem to consider the user-defined conversion in its Value.cast,
+        # so we need to call the conversion operator explicitly
+        address = hex(val.address)
+        self.float_val = gdb.parse_and_eval(f"reinterpret_cast<gko::half*>({address})->operator float()")
+
+    def to_string(self):
+        self.float_val.fetch_lazy()
+        return self.float_val
+
+
 def lookup_type(val):
     if not str(val.type.unqualified()).startswith('gko::'):
         return None
     suffix = str(val.type.unqualified())[5:]
     if suffix.startswith('array<') and val.type.code == gdb.TYPE_CODE_STRUCT:
         return GkoArrayPrinter(val)
+    if suffix.startswith("half") and val.type.code == gdb.TYPE_CODE_STRUCT:
+        return GkoHalfPrinter(val)
     return None
 
 

From b8f45849ec01dfb288e391e91327343b9128dd23 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 24 Oct 2024 02:28:45 +0200
Subject: [PATCH 08/13] make half not rely on type

---
 core/test/base/half.cpp            |  2 +-
 include/ginkgo/core/base/half.hpp  | 51 ++++++++++++++++--------------
 include/ginkgo/core/base/types.hpp |  5 ++-
 3 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp
index f20bac0d47a..51d3e60ce40 100644
--- a/core/test/base/half.cpp
+++ b/core/test/base/half.cpp
@@ -37,7 +37,7 @@ class ExtendedFloatTestBase : public ::testing::Test {
 protected:
     using half = gko::half;
 
-    static constexpr auto byte_size = gko::byte_size;
+    static constexpr auto byte_size = gko::detail::byte_size;
 
     template <std::size_t N>
     static floating<N - 1> create_from_bits(const char (&s)[N])
diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp
index 25a38abb6eb..fb5761c51fb 100644
--- a/include/ginkgo/core/base/half.hpp
+++ b/include/ginkgo/core/base/half.hpp
@@ -6,13 +6,12 @@
 #define GKO_PUBLIC_CORE_BASE_HALF_HPP_
 
 
+#include <climits>
 #include <complex>
+#include <cstdint>
 #include <cstring>
 #include <type_traits>
 
-#include <ginkgo/core/base/std_extensions.hpp>
-#include <ginkgo/core/base/types.hpp>
-
 
 class __half;
 
@@ -20,29 +19,34 @@ class __half;
 namespace gko {
 
 
-template <typename, size_type, size_type>
+template <typename, std::size_t, std::size_t>
 class truncated;
 
 
+class half;
+
+
 namespace detail {
 
 
+constexpr std::size_t byte_size = CHAR_BIT;
+
 template <std::size_t, typename = void>
 struct uint_of_impl {};
 
 template <std::size_t Bits>
 struct uint_of_impl<Bits, std::enable_if_t<(Bits <= 16)>> {
-    using type = uint16;
+    using type = std::uint16_t;
 };
 
 template <std::size_t Bits>
 struct uint_of_impl<Bits, std::enable_if_t<(16 < Bits && Bits <= 32)>> {
-    using type = uint32;
+    using type = std::uint32_t;
 };
 
 template <std::size_t Bits>
 struct uint_of_impl<Bits, std::enable_if_t<(32 < Bits) && (Bits <= 64)>> {
-    using type = uint64;
+    using type = std::uint64_t;
 };
 
 template <std::size_t Bits>
@@ -53,8 +57,8 @@ template <typename T>
 struct basic_float_traits {};
 
 template <>
-struct basic_float_traits<float16> {
-    using type = float16;
+struct basic_float_traits<half> {
+    using type = half;
     static constexpr int sign_bits = 1;
     static constexpr int significand_bits = 10;
     static constexpr int exponent_bits = 5;
@@ -71,8 +75,8 @@ struct basic_float_traits<__half> {
 };
 
 template <>
-struct basic_float_traits<float32> {
-    using type = float32;
+struct basic_float_traits<float> {
+    using type = float;
     static constexpr int sign_bits = 1;
     static constexpr int significand_bits = 23;
     static constexpr int exponent_bits = 8;
@@ -80,15 +84,16 @@ struct basic_float_traits<float32> {
 };
 
 template <>
-struct basic_float_traits<float64> {
-    using type = float64;
+struct basic_float_traits<double> {
+    using type = double;
     static constexpr int sign_bits = 1;
     static constexpr int significand_bits = 52;
     static constexpr int exponent_bits = 11;
     static constexpr bool rounds_to_nearest = true;
 };
 
-template <typename FloatType, size_type NumComponents, size_type ComponentId>
+template <typename FloatType, std::size_t NumComponents,
+          std::size_t ComponentId>
 struct basic_float_traits<truncated<FloatType, NumComponents, ComponentId>> {
     using type = truncated<FloatType, NumComponents, ComponentId>;
     static constexpr int sign_bits = ComponentId == 0 ? 1 : 0;
@@ -281,7 +286,7 @@ struct precision_converter<SourceType, ResultType, false> {
 class half {
 public:
     // create half value from the bits directly.
-    static constexpr half create_from_bits(uint16 bits) noexcept
+    static constexpr half create_from_bits(std::uint16_t bits) noexcept
     {
         half result;
         result.data_ = bits;
@@ -376,19 +381,19 @@ class half {
     }
 
 private:
-    using f16_traits = detail::float_traits<float16>;
-    using f32_traits = detail::float_traits<float32>;
+    using f16_traits = detail::float_traits<half>;
+    using f32_traits = detail::float_traits<float>;
 
     void float2half(float val) noexcept
     {
-        uint32 bit_val(0);
+        std::uint32_t bit_val(0);
         std::memcpy(&bit_val, &val, sizeof(float));
         data_ = float2half(bit_val);
     }
 
-    static constexpr uint16 float2half(uint32 data_) noexcept
+    static constexpr std::uint16_t float2half(std::uint32_t data_) noexcept
     {
-        using conv = detail::precision_converter<float32, float16>;
+        using conv = detail::precision_converter<float, half>;
         if (f32_traits::is_inf(data_)) {
             return conv::shift_sign(data_) | f16_traits::exponent_mask;
         } else if (f32_traits::is_nan(data_)) {
@@ -417,9 +422,9 @@ class half {
         }
     }
 
-    static constexpr uint32 half2float(uint16 data_) noexcept
+    static constexpr std::uint32_t half2float(std::uint16_t data_) noexcept
     {
-        using conv = detail::precision_converter<float16, float32>;
+        using conv = detail::precision_converter<half, float>;
         if (f16_traits::is_inf(data_)) {
             return conv::shift_sign(data_) | f32_traits::exponent_mask;
         } else if (f16_traits::is_nan(data_)) {
@@ -434,7 +439,7 @@ class half {
         }
     }
 
-    uint16 data_;
+    std::uint16_t data_;
 };
 
 
diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp
index 72dd8a93584..1d5963c0fe8 100644
--- a/include/ginkgo/core/base/types.hpp
+++ b/include/ginkgo/core/base/types.hpp
@@ -17,6 +17,8 @@
 #include <string>
 #include <type_traits>
 
+#include <ginkgo/core/base/half.hpp>
+
 
 #ifdef __HIPCC__
 #include <hip/hip_runtime.h>
@@ -138,9 +140,6 @@ using uint64 = std::uint64_t;
 using uintptr = std::uintptr_t;
 
 
-class half;
-
-
 /**
  * Half precision floating point type.
  */

From 8508fbb97010e1b8a819999a5989c61b3bf4c4f8 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 24 Oct 2024 14:02:22 +0200
Subject: [PATCH 09/13] collect the reused part and undef after usage

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .pre-commit-config.yaml                |   4 +
 core/test/base/extended_float.cpp      | 108 ++++------------------
 core/test/base/floating_bit_helper.hpp |  82 +++++++++++++++++
 core/test/base/half.cpp                | 123 +++++--------------------
 include/ginkgo/core/base/half.hpp      |   7 ++
 5 files changed, 133 insertions(+), 191 deletions(-)
 create mode 100644 core/test/base/floating_bit_helper.hpp

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fca3a1ef28f..8eccb113759 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,8 @@
 repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -  id: end-of-file-fixer
 - repo: https://github.com/pre-commit/mirrors-clang-format
   rev: 'v14.0.0'  # The default in Ubuntu 22.04, which is used in our CI
   hooks:
diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp
index bdb7a58ed84..764f5fc0c8d 100644
--- a/core/test/base/extended_float.cpp
+++ b/core/test/base/extended_float.cpp
@@ -9,86 +9,22 @@
 
 #include <gtest/gtest.h>
 
-#include <ginkgo/core/base/half.hpp>
-
-
-namespace {
-
-
-template <std::size_t N>
-struct floating_impl;
-
-template <>
-struct floating_impl<16> {
-    using type = gko::half;
-};
-
-template <>
-struct floating_impl<32> {
-    using type = float;
-};
-
-template <>
-struct floating_impl<64> {
-    using type = double;
-};
-
-template <std::size_t N>
-using floating = typename floating_impl<N>::type;
-
-
-class ExtendedFloatTestBase : public ::testing::Test {
-protected:
-    using half = gko::half;
-    template <typename T, std::size_t NumComponents, std::size_t ComponentId>
-    using truncated = gko::truncated<T, NumComponents, ComponentId>;
-
-    static constexpr auto byte_size = gko::byte_size;
-
-    template <std::size_t N>
-    static floating<N - 1> create_from_bits(const char (&s)[N])
-    {
-        auto bits = std::bitset<N - 1>(s).to_ullong();
-        // We cast to the same size of integer type first.
-        // Otherwise, the first memory chunk is different when we use
-        // reinterpret_cast or memcpy to get the smaller type out of unsigned
-        // long long.
-        using bits_type =
-            typename gko::detail::float_traits<floating<N - 1>>::bits_type;
-        auto bits_val = static_cast<bits_type>(bits);
-        floating<N - 1> result;
-        static_assert(sizeof(floating<N - 1>) == sizeof(bits_type),
-                      "the type should have the same size as its bits_type");
-        std::memcpy(&result, &bits_val, sizeof(bits_type));
-        return result;
-    }
-
-    template <typename T>
-    static std::bitset<sizeof(T) * byte_size> get_bits(T val)
-    {
-        using bits_type = typename gko::detail::float_traits<T>::bits_type;
-        bits_type bits;
-        static_assert(sizeof(T) == sizeof(bits_type),
-                      "the type should have the same size as its bits_type");
-        std::memcpy(&bits, &val, sizeof(T));
-        return std::bitset<sizeof(T) * byte_size>(bits);
-    }
-
-    template <std::size_t N>
-    static std::bitset<N - 1> get_bits(const char (&s)[N])
-    {
-        return std::bitset<N - 1>(s);
-    }
-};
-
-
-class TruncatedDouble : public ExtendedFloatTestBase {};
+#include "core/test/base/floating_bit_helper.hpp"
+
+
+using namespace floating_bit_helper;
+
+using half = gko::half;
+
+template <typename T, std::size_t NumComponents, std::size_t ComponentId>
+using truncated = gko::truncated<T, NumComponents, ComponentId>;
+
 
 // clang-format does terrible formatting of string literal concatenation
 // clang-format off
 
 
-TEST_F(TruncatedDouble, SplitsDoubleToHalves)
+TEST(TruncatedDouble, SplitsDoubleToHalves)
 {
     double x = create_from_bits("1" "11110100100" "1111" "1000110110110101"
                                 "1100101011010101" "1001011101110111");
@@ -102,7 +38,7 @@ TEST_F(TruncatedDouble, SplitsDoubleToHalves)
 }
 
 
-TEST_F(TruncatedDouble, AssemblesDoubleFromHalves)
+TEST(TruncatedDouble, AssemblesDoubleFromHalves)
 {
     double x = create_from_bits("1" "11110100100" "1111" "1000110110110101"
                                 "1100101011010101" "1001011101110111");
@@ -121,7 +57,7 @@ TEST_F(TruncatedDouble, AssemblesDoubleFromHalves)
 }
 
 
-TEST_F(TruncatedDouble, SplitsDoubleToQuarters)
+TEST(TruncatedDouble, SplitsDoubleToQuarters)
 {
     double x = create_from_bits("1" "11110100100" "1111" "1000110110110101"
                                 "1100101011010101" "1001011101110111");
@@ -138,7 +74,7 @@ TEST_F(TruncatedDouble, SplitsDoubleToQuarters)
 }
 
 
-TEST_F(TruncatedDouble, AssemblesDoubleFromQuarters)
+TEST(TruncatedDouble, AssemblesDoubleFromQuarters)
 {
     double x = create_from_bits("1" "11110100100" "1111" "1000110110110101"
                                 "1100101011010101" "1001011101110111");
@@ -167,16 +103,7 @@ TEST_F(TruncatedDouble, AssemblesDoubleFromQuarters)
 }
 
 
-// clang-format on
-
-
-class TruncatedFloat : public ExtendedFloatTestBase {};
-
-
-// clang-format off
-
-
-TEST_F(TruncatedFloat, SplitsFloatToHalves)
+TEST(TruncatedFloat, SplitsFloatToHalves)
 {
     float x = create_from_bits("1" "11110100" "1001111" "1000110110110101");
 
@@ -188,7 +115,7 @@ TEST_F(TruncatedFloat, SplitsFloatToHalves)
 }
 
 
-TEST_F(TruncatedFloat, AssemblesFloatFromHalves)
+TEST(TruncatedFloat, AssemblesFloatFromHalves)
 {
     float x = create_from_bits("1" "11110100" "1001111" "1000110110110101");
     auto p1 = static_cast<truncated<float, 2, 0>>(x);
@@ -205,6 +132,3 @@ TEST_F(TruncatedFloat, AssemblesFloatFromHalves)
 
 
 // clang-format on
-
-
-}  // namespace
diff --git a/core/test/base/floating_bit_helper.hpp b/core/test/base/floating_bit_helper.hpp
new file mode 100644
index 00000000000..bbdc76ee9c2
--- /dev/null
+++ b/core/test/base/floating_bit_helper.hpp
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: 2024 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#ifndef GKO_CORE_TEST_BASE_FLOATING_BIT_HELPER_HPP_
+#define GKO_CORE_TEST_BASE_FLOATING_BIT_HELPER_HPP_
+
+
+#include <bitset>
+#include <cstring>
+
+#include <ginkgo/core/base/half.hpp>
+
+namespace floating_bit_helper {
+
+
+constexpr auto byte_size = gko::detail::byte_size;
+
+
+template <std::size_t N>
+struct floating_impl;
+
+template <>
+struct floating_impl<16> {
+    using type = gko::half;
+};
+
+template <>
+struct floating_impl<32> {
+    using type = float;
+};
+
+template <>
+struct floating_impl<64> {
+    using type = double;
+};
+
+
+template <std::size_t N>
+using floating = typename floating_impl<N>::type;
+
+
+template <std::size_t N>
+floating<N - 1> create_from_bits(const char (&s)[N])
+{
+    auto bits = std::bitset<N - 1>(s).to_ullong();
+    // We cast to the same size of integer type first.
+    // Otherwise, the first memory chunk is different when we use
+    // reinterpret_cast or memcpy to get the smaller type out of unsigned
+    // long long.
+    using bits_type =
+        typename gko::detail::float_traits<floating<N - 1>>::bits_type;
+    auto bits_val = static_cast<bits_type>(bits);
+    floating<N - 1> result;
+    static_assert(sizeof(floating<N - 1>) == sizeof(bits_type),
+                  "the type should have the same size as its bits_type");
+    std::memcpy(&result, &bits_val, sizeof(bits_type));
+    return result;
+}
+
+
+template <typename T>
+std::bitset<sizeof(T) * byte_size> get_bits(T val)
+{
+    using bits_type = typename gko::detail::float_traits<T>::bits_type;
+    bits_type bits;
+    static_assert(sizeof(T) == sizeof(bits_type),
+                  "the type should have the same size as its bits_type");
+    std::memcpy(&bits, &val, sizeof(T));
+    return std::bitset<sizeof(T) * byte_size>(bits);
+}
+
+template <std::size_t N>
+std::bitset<N - 1> get_bits(const char (&s)[N])
+{
+    return std::bitset<N - 1>(s);
+}
+
+
+}  // namespace floating_bit_helper
+
+#endif  // GKO_CORE_TEST_BASE_FLOATING_BIT_HELPER_HPP_
diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp
index 51d3e60ce40..39c47c49e15 100644
--- a/core/test/base/half.cpp
+++ b/core/test/base/half.cpp
@@ -2,88 +2,22 @@
 //
 // SPDX-License-Identifier: BSD-3-Clause
 
-#include <bitset>
-#include <cstring>
-#include <string>
-
 #include <gtest/gtest.h>
 
 #include <ginkgo/core/base/half.hpp>
 
+#include "core/test/base/floating_bit_helper.hpp"
+
 
-template <std::size_t N>
-struct floating_impl;
-
-template <>
-struct floating_impl<16> {
-    using type = gko::half;
-};
-
-template <>
-struct floating_impl<32> {
-    using type = float;
-};
-
-template <>
-struct floating_impl<64> {
-    using type = double;
-};
-
-template <std::size_t N>
-using floating = typename floating_impl<N>::type;
-
-
-class ExtendedFloatTestBase : public ::testing::Test {
-protected:
-    using half = gko::half;
-
-    static constexpr auto byte_size = gko::detail::byte_size;
-
-    template <std::size_t N>
-    static floating<N - 1> create_from_bits(const char (&s)[N])
-    {
-        auto bits = std::bitset<N - 1>(s).to_ullong();
-        // We cast to the same size of integer type first.
-        // Otherwise, the first memory chunk is different when we use
-        // reinterpret_cast or memcpy to get the smaller type out of unsigned
-        // long long.
-        using bits_type =
-            typename gko::detail::float_traits<floating<N - 1>>::bits_type;
-        auto bits_val = static_cast<bits_type>(bits);
-        floating<N - 1> result;
-        static_assert(sizeof(floating<N - 1>) == sizeof(bits_type),
-                      "the type should have the same size as its bits_type");
-        std::memcpy(&result, &bits_val, sizeof(bits_type));
-        return result;
-    }
-
-    template <typename T>
-    static std::bitset<sizeof(T) * byte_size> get_bits(T val)
-    {
-        using bits_type = typename gko::detail::float_traits<T>::bits_type;
-        bits_type bits;
-        static_assert(sizeof(T) == sizeof(bits_type),
-                      "the type should have the same size as its bits_type");
-        std::memcpy(&bits, &val, sizeof(T));
-        return std::bitset<sizeof(T) * byte_size>(bits);
-    }
-
-    template <std::size_t N>
-    static std::bitset<N - 1> get_bits(const char (&s)[N])
-    {
-        return std::bitset<N - 1>(s);
-    }
-};
-
-
-class FloatToHalf : public ExtendedFloatTestBase {};
+using half = gko::half;
+using namespace floating_bit_helper;
 
 
 // clang-format does terrible formatting of string literal concatenation
 // clang-format off
 
 
-TEST_F(FloatToHalf, ConvertsOne)
+TEST(FloatToHalf, ConvertsOne)
 {
     half x = create_from_bits("0" "01111111" "00000000000000000000000");
 
@@ -91,7 +25,7 @@ TEST_F(FloatToHalf, ConvertsOne)
 }
 
 
-TEST_F(FloatToHalf, ConvertsZero)
+TEST(FloatToHalf, ConvertsZero)
 {
     half x = create_from_bits("0" "00000000" "00000000000000000000000");
 
@@ -99,7 +33,7 @@ TEST_F(FloatToHalf, ConvertsZero)
 }
 
 
-TEST_F(FloatToHalf, ConvertsInf)
+TEST(FloatToHalf, ConvertsInf)
 {
     half x = create_from_bits("0" "11111111" "00000000000000000000000");
 
@@ -107,7 +41,7 @@ TEST_F(FloatToHalf, ConvertsInf)
 }
 
 
-TEST_F(FloatToHalf, ConvertsNegInf)
+TEST(FloatToHalf, ConvertsNegInf)
 {
     half x = create_from_bits("1" "11111111" "00000000000000000000000");
 
@@ -115,7 +49,7 @@ TEST_F(FloatToHalf, ConvertsNegInf)
 }
 
 
-TEST_F(FloatToHalf, ConvertsNan)
+TEST(FloatToHalf, ConvertsNan)
 {
     half x = create_from_bits("0" "11111111" "00000000000000000000001");
 
@@ -128,7 +62,7 @@ TEST_F(FloatToHalf, ConvertsNan)
 }
 
 
-TEST_F(FloatToHalf, ConvertsNegNan)
+TEST(FloatToHalf, ConvertsNegNan)
 {
     half x = create_from_bits("1" "11111111" "00010000000000000000000");
 
@@ -141,7 +75,7 @@ TEST_F(FloatToHalf, ConvertsNegNan)
 }
 
 
-TEST_F(FloatToHalf, FlushesToZero)
+TEST(FloatToHalf, FlushesToZero)
 {
     half x = create_from_bits("0" "00000111" "00010001000100000001000");
 
@@ -149,7 +83,7 @@ TEST_F(FloatToHalf, FlushesToZero)
 }
 
 
-TEST_F(FloatToHalf, FlushesToNegZero)
+TEST(FloatToHalf, FlushesToNegZero)
 {
     half x = create_from_bits("1" "00000010" "00010001000100000001000");
 
@@ -157,7 +91,7 @@ TEST_F(FloatToHalf, FlushesToNegZero)
 }
 
 
-TEST_F(FloatToHalf, FlushesToInf)
+TEST(FloatToHalf, FlushesToInf)
 {
     half x = create_from_bits("0" "10100000" "10010000000000010000100");
 
@@ -165,7 +99,7 @@ TEST_F(FloatToHalf, FlushesToInf)
 }
 
 
-TEST_F(FloatToHalf, FlushesToNegInf)
+TEST(FloatToHalf, FlushesToNegInf)
 {
     half x = create_from_bits("1" "11000000" "10010000000000010000100");
 
@@ -173,7 +107,7 @@ TEST_F(FloatToHalf, FlushesToNegInf)
 }
 
 
-TEST_F(FloatToHalf, TruncatesSmallNumber)
+TEST(FloatToHalf, TruncatesSmallNumber)
 {
     half x = create_from_bits("0" "01110001" "10010000000000010000100");
 
@@ -181,7 +115,7 @@ TEST_F(FloatToHalf, TruncatesSmallNumber)
 }
 
 
-TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven)
+TEST(FloatToHalf, TruncatesLargeNumberRoundToEven)
 {
     half neg_x = create_from_bits("1" "10001110" "10010011111000010000100");
     half neg_x2 = create_from_bits("1" "10001110" "10010011101000010000100");
@@ -199,16 +133,7 @@ TEST_F(FloatToHalf, TruncatesLargeNumberRoundToEven)
 }
 
 
-// clang-format on
-
-
-class HalfToFloat : public ExtendedFloatTestBase {};
-
-
-// clang-format off
-
-
-TEST_F(HalfToFloat, ConvertsOne)
+TEST(HalfToFloat, ConvertsOne)
 {
     float x = create_from_bits("0" "01111" "0000000000");
 
@@ -216,7 +141,7 @@ TEST_F(HalfToFloat, ConvertsOne)
 }
 
 
-TEST_F(HalfToFloat, ConvertsZero)
+TEST(HalfToFloat, ConvertsZero)
 {
     float x = create_from_bits("0" "00000" "0000000000");
 
@@ -224,7 +149,7 @@ TEST_F(HalfToFloat, ConvertsZero)
 }
 
 
-TEST_F(HalfToFloat, ConvertsInf)
+TEST(HalfToFloat, ConvertsInf)
 {
     float x = create_from_bits("0" "11111" "0000000000");
 
@@ -232,7 +157,7 @@ TEST_F(HalfToFloat, ConvertsInf)
 }
 
 
-TEST_F(HalfToFloat, ConvertsNegInf)
+TEST(HalfToFloat, ConvertsNegInf)
 {
     float x = create_from_bits("1" "11111" "0000000000");
 
@@ -240,7 +165,7 @@ TEST_F(HalfToFloat, ConvertsNegInf)
 }
 
 
-TEST_F(HalfToFloat, ConvertsNan)
+TEST(HalfToFloat, ConvertsNan)
 {
     float x = create_from_bits("0" "11111" "0001001000");
 
@@ -253,7 +178,7 @@ TEST_F(HalfToFloat, ConvertsNan)
 }
 
 
-TEST_F(HalfToFloat, ConvertsNegNan)
+TEST(HalfToFloat, ConvertsNegNan)
 {
     float x = create_from_bits("1" "11111" "0000000001");
 
@@ -266,7 +191,7 @@ TEST_F(HalfToFloat, ConvertsNegNan)
 }
 
 
-TEST_F(HalfToFloat, ExtendsSmallNumber)
+TEST(HalfToFloat, ExtendsSmallNumber)
 {
     float x = create_from_bits("0" "00001" "1000010001");
 
@@ -274,7 +199,7 @@ TEST_F(HalfToFloat, ExtendsSmallNumber)
 }
 
 
-TEST_F(HalfToFloat, ExtendsLargeNumber)
+TEST(HalfToFloat, ExtendsLargeNumber)
 {
     float x = create_from_bits("1" "11110" "1001001111");
 
diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp
index fb5761c51fb..b559ad2cfc5 100644
--- a/include/ginkgo/core/base/half.hpp
+++ b/include/ginkgo/core/base/half.hpp
@@ -334,11 +334,14 @@ class half {
         data_ = result.data_;                                      \
         return *this;                                              \
     }
+
     HALF_OPERATOR(+, +=)
     HALF_OPERATOR(-, -=)
     HALF_OPERATOR(*, *=)
     HALF_OPERATOR(/, /=)
 
+#undef HALF_OPERATOR
+
     // Do operation with different type
     // If it is floating point, using floating point as type.
     // If it is integer, using half as type
@@ -373,6 +376,8 @@ class half {
     HALF_FRIEND_OPERATOR(*, *=)
     HALF_FRIEND_OPERATOR(/, /=)
 
+#undef HALF_FRIEND_OPERATOR
+
     // the negative
     half operator-() const
     {
@@ -588,6 +593,8 @@ class complex<gko::half> {
     COMPLEX_HALF_OPERATOR(*, *=)
     COMPLEX_HALF_OPERATOR(/, /=)
 
+#undef COMPLEX_HALF_OPERATOR
+
 private:
     value_type real_;
     value_type imag_;

From 03eb0227d3f167d33c5f28dd38a01b1b50a15545 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 24 Oct 2024 12:14:54 +0200
Subject: [PATCH 10/13] use memcpy not std::memcpy in hip

---
 hip/components/memory.hip.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hip/components/memory.hip.hpp b/hip/components/memory.hip.hpp
index d8238c11795..8a98ee822b8 100644
--- a/hip/components/memory.hip.hpp
+++ b/hip/components/memory.hip.hpp
@@ -99,7 +99,7 @@ __device__ __forceinline__ ValueType load_generic(const ValueType* ptr)
     auto cast_value = HIP_ATOMIC_LOAD(reinterpret_cast<const atomic_type*>(ptr),
                                       memorder, scope);
     ValueType result{};
-    std::memcpy(&result, &cast_value, sizeof(ValueType));
+    memcpy(&result, &cast_value, sizeof(ValueType));
     return result;
 }
 
@@ -122,7 +122,7 @@ __device__ __forceinline__ void store_generic(ValueType* ptr, ValueType value)
     static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map");
     static_assert(alignof(atomic_type) == alignof(ValueType), "invalid map");
     atomic_type cast_value{};
-    std::memcpy(&cast_value, &value, sizeof(ValueType));
+    memcpy(&cast_value, &value, sizeof(ValueType));
     HIP_ATOMIC_STORE(reinterpret_cast<atomic_type*>(ptr), cast_value, memorder,
                      scope);
 }

From 1f0f6195dcc2ee47d6c1c1a3c826aec1d40e71ef Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Mon, 18 Nov 2024 11:10:13 +0100
Subject: [PATCH 11/13] add alignment

---
 core/test/base/half.cpp           | 8 ++++++++
 include/ginkgo/core/base/half.hpp | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp
index 39c47c49e15..7fcd0ffa70f 100644
--- a/core/test/base/half.cpp
+++ b/core/test/base/half.cpp
@@ -2,6 +2,8 @@
 //
 // SPDX-License-Identifier: BSD-3-Clause
 
+#include <cstdint>
+
 #include <gtest/gtest.h>
 
 #include <ginkgo/core/base/half.hpp>
@@ -13,6 +15,12 @@ using half = gko::half;
 using namespace floating_bit_helper;
 
 
+TEST(Half, SizeAndAlign)
+{
+    ASSERT_EQ(sizeof(half), sizeof(std::uint16_t));
+    ASSERT_EQ(alignof(half), alignof(std::uint16_t));
+}
+
 // clang-format does terrible formatting of string literal concatenation
 // clang-format off
 
diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp
index b559ad2cfc5..bd04d2da832 100644
--- a/include/ginkgo/core/base/half.hpp
+++ b/include/ginkgo/core/base/half.hpp
@@ -283,7 +283,7 @@ struct precision_converter<SourceType, ResultType, false> {
  * For now the only features are reduced storage compared to single precision
  * and conversions from and to single precision floating point type.
  */
-class half {
+class alignas(std::uint16_t) half {
 public:
     // create half value from the bits directly.
     static constexpr half create_from_bits(std::uint16_t bits) noexcept

From acbae4a56d3f49e866c6d04c619b7c1629dc3e23 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Mon, 18 Nov 2024 13:45:44 +0100
Subject: [PATCH 12/13] delete the sycl half test as we do not enable it
 directly

---
 core/test/base/half.cpp | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/core/test/base/half.cpp b/core/test/base/half.cpp
index 7fcd0ffa70f..82732c62d16 100644
--- a/core/test/base/half.cpp
+++ b/core/test/base/half.cpp
@@ -61,12 +61,7 @@ TEST(FloatToHalf, ConvertsNan)
 {
     half x = create_from_bits("0" "11111111" "00000000000000000000001");
 
-    #if defined(SYCL_LANGUAGE_VERSION) 
-    // Sycl put the 1000000000, but ours put mask
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1000000000"));
-    #else
     ASSERT_EQ(get_bits(x), get_bits("0" "11111" "1111111111"));
-    #endif
 }
 
 
@@ -74,12 +69,7 @@ TEST(FloatToHalf, ConvertsNegNan)
 {
     half x = create_from_bits("1" "11111111" "00010000000000000000000");
 
-    #if defined(SYCL_LANGUAGE_VERSION)
-    // Sycl put the 1000000000, but ours put mask
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1000000000"));
-    #else
     ASSERT_EQ(get_bits(x), get_bits("1" "11111" "1111111111"));
-    #endif
 }
 
 
@@ -177,12 +167,7 @@ TEST(HalfToFloat, ConvertsNan)
 {
     float x = create_from_bits("0" "11111" "0001001000");
 
-    #if defined(SYCL_LANGUAGE_VERSION) 
-    // sycl keeps significand
-    ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "00010010000000000000000"));
-    #else
     ASSERT_EQ(get_bits(x), get_bits("0" "11111111" "11111111111111111111111"));
-    #endif
 }
 
 
@@ -190,12 +175,7 @@ TEST(HalfToFloat, ConvertsNegNan)
 {
     float x = create_from_bits("1" "11111" "0000000001");
 
-    #if defined(SYCL_LANGUAGE_VERSION) 
-    // sycl keeps significand
-    ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "00000000010000000000000"));
-    #else
     ASSERT_EQ(get_bits(x), get_bits("1" "11111111" "11111111111111111111111"));
-    #endif
 }
 
 

From b5afcacb81808782226d8560de176f39b4828bf0 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Sat, 30 Nov 2024 00:25:05 +0100
Subject: [PATCH 13/13] use reference for half when it is possible

---
 include/ginkgo/core/base/half.hpp | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/include/ginkgo/core/base/half.hpp b/include/ginkgo/core/base/half.hpp
index bd04d2da832..b6d11dd7c64 100644
--- a/include/ginkgo/core/base/half.hpp
+++ b/include/ginkgo/core/base/half.hpp
@@ -286,7 +286,7 @@ struct precision_converter<SourceType, ResultType, false> {
 class alignas(std::uint16_t) half {
 public:
     // create half value from the bits directly.
-    static constexpr half create_from_bits(std::uint16_t bits) noexcept
+    static constexpr half create_from_bits(const std::uint16_t& bits) noexcept
     {
         half result;
         result.data_ = bits;
@@ -299,13 +299,13 @@ class alignas(std::uint16_t) half {
     constexpr half() noexcept : data_(0){};
 
     template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
-    half(const T val) : data_(0)
+    half(const T& val) : data_(0)
     {
         this->float2half(static_cast<float>(val));
     }
 
     template <typename V>
-    half& operator=(const V val)
+    half& operator=(const V& val)
     {
         this->float2half(static_cast<float>(val));
         return *this;
@@ -323,7 +323,7 @@ class alignas(std::uint16_t) half {
     // operation will cast it to float and then do float operation such that it
     // becomes float in the end.
 #define HALF_OPERATOR(_op, _opeq)                                  \
-    friend half operator _op(const half lhf, const half rhf)       \
+    friend half operator _op(const half& lhf, const half& rhf)     \
     {                                                              \
         return static_cast<half>(static_cast<float>(lhf)           \
                                      _op static_cast<float>(rhf)); \
@@ -350,7 +350,7 @@ class alignas(std::uint16_t) half {
     friend std::enable_if_t<                                               \
         !std::is_same<T, half>::value && std::is_scalar<T>::value,         \
         std::conditional_t<std::is_floating_point<T>::value, T, half>>     \
-    operator _op(const half hf, const T val)                               \
+    operator _op(const half& hf, const T& val)                             \
     {                                                                      \
         using type =                                                       \
             std::conditional_t<std::is_floating_point<T>::value, T, half>; \
@@ -362,7 +362,7 @@ class alignas(std::uint16_t) half {
     friend std::enable_if_t<                                               \
         !std::is_same<T, half>::value && std::is_scalar<T>::value,         \
         std::conditional_t<std::is_floating_point<T>::value, T, half>>     \
-    operator _op(const T val, const half hf)                               \
+    operator _op(const T& val, const half& hf)                             \
     {                                                                      \
         using type =                                                       \
             std::conditional_t<std::is_floating_point<T>::value, T, half>; \
@@ -389,7 +389,7 @@ class alignas(std::uint16_t) half {
     using f16_traits = detail::float_traits<half>;
     using f32_traits = detail::float_traits<float>;
 
-    void float2half(float val) noexcept
+    void float2half(const float& val) noexcept
     {
         std::uint32_t bit_val(0);
         std::memcpy(&bit_val, &val, sizeof(float));
@@ -576,16 +576,12 @@ class complex<gko::half> {
         return *this;
     }
 
-// It's for MacOS.
-// TODO: check whether mac compiler always use complex version even when real
-// half
-#define COMPLEX_HALF_OPERATOR(_op, _opeq)                                \
-    friend complex<gko::half> operator _op(const complex<gko::half> lhf, \
-                                           const complex<gko::half> rhf) \
-    {                                                                    \
-        auto a = lhf;                                                    \
-        a _opeq rhf;                                                     \
-        return a;                                                        \
+#define COMPLEX_HALF_OPERATOR(_op, _opeq)                               \
+    friend complex operator _op(const complex& lhf, const complex& rhf) \
+    {                                                                   \
+        auto a = lhf;                                                   \
+        a _opeq rhf;                                                    \
+        return a;                                                       \
     }
 
     COMPLEX_HALF_OPERATOR(+, +=)