From 6a58f1c09ea0e19a953976f8cfaa96d5e5bf62cf Mon Sep 17 00:00:00 2001 From: Daniel Arndt Date: Thu, 11 Jul 2024 09:05:35 -0400 Subject: [PATCH] Unify alignPtrTo implementation --- common/src/KokkosKernels_Utils.hpp | 37 ++++++------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/common/src/KokkosKernels_Utils.hpp b/common/src/KokkosKernels_Utils.hpp index 89aeabb823..92419424b6 100644 --- a/common/src/KokkosKernels_Utils.hpp +++ b/common/src/KokkosKernels_Utils.hpp @@ -1527,41 +1527,18 @@ struct array_sum_reduce { } }; -/* Several alternatives were considered for SYCL, including - -unsigned int f1(unsigned int i, unsigned int align) -{ - return ((i + align - 1) / align * align); -} - -unsigned int f2(unsigned int i, unsigned int align) -{ - return (i + align - 1) & (-align); -} - -f1 should be equivalent to the below, but it produces incorrect results on SYCL -f2 is how GCC does std::align, but it also produces incorrect results on SYCL -possibly alignof(T) is not a power-of-2 on SYCL? Or a compiler error. -*/ -#if defined(KOKKOS_ENABLE_SYCL) template -KOKKOS_INLINE_FUNCTION T *alignPtrTo(InPtr p) { - std::uintptr_t ptrVal = reinterpret_cast(p); - while (ptrVal % alignof(T)) { - ++ptrVal; - } - return reinterpret_cast(ptrVal); -} -#else -template -KOKKOS_INLINE_FUNCTION T *alignPtrTo(InPtr p) { +KOKKOS_INLINE_FUNCTION T *alignPtrTo(InPtr *p) { // ugly but computationally free and the "right" way to do this in C++ - std::uintptr_t ptrVal = reinterpret_cast(p); + const std::uintptr_t ptrVal = reinterpret_cast(p); // ptrVal + (align - 1) lands inside the next valid aligned scalar_t, // and the mask produces the start of that scalar_t. - return reinterpret_cast((ptrVal + alignof(T) - 1) & (~(alignof(T) - 1))); + const std::uintptr_t ptrValNew = + (ptrVal + alignof(T) - 1) & (~(alignof(T) - 1)); + return reinterpret_cast( + reinterpret_cast(const_cast *>(p)) + + (ptrValNew - ptrVal)); } -#endif } // namespace Impl } // namespace KokkosKernels