add HIP support

- add function attribute to define macros for HIP - add unroll support for HIP - fix missing function defines for function friend declarations
alpaka-group · Dec 9, 2022 · 63594a0 · 63594a0
1 parent e1c2426
commit 63594a0
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 6 deletions.
diff --git a/include/llama/RecordRef.hpp b/include/llama/RecordRef.hpp
@@ -742,9 +742,12 @@ namespace llama
         // to find subsequent elements. This is not a great design for now and the SIMD load/store functions should
         // probably take iterators to records.
         template<typename T, typename Simd, typename RecordCoord>
-        friend void internal::loadSimdRecord(const T& srcRef, Simd& dstSimd, RecordCoord rc);
+        friend LLAMA_FN_HOST_ACC_INLINE void internal::loadSimdRecord(const T& srcRef, Simd& dstSimd, RecordCoord rc);
         template<typename Simd, typename T, typename RecordCoord>
-        friend void internal::storeSimdRecord(const Simd& srcSimd, T&& dstRef, RecordCoord rc);
+        friend LLAMA_FN_HOST_ACC_INLINE void internal::storeSimdRecord(
+            const Simd& srcSimd,
+            T&& dstRef,
+            RecordCoord rc);
     };
 
     // swap for heterogeneous RecordRef

diff --git a/include/llama/macros.hpp b/include/llama/macros.hpp
@@ -34,7 +34,7 @@
 #endif
 
 #ifndef LLAMA_FORCE_INLINE
-#    if defined(__NVCC__)
+#    if defined(__NVCC__) || defined(__HIP__)
 #        define LLAMA_FORCE_INLINE __forceinline__
 #    elif defined(__GNUC__) || defined(__clang__)
 #        define LLAMA_FORCE_INLINE inline __attribute__((always_inline))
@@ -52,7 +52,8 @@
 #endif
 
 #ifndef LLAMA_UNROLL
-#    if defined(__NVCC__) || defined(__NVCOMPILER) || defined(__clang__) || defined(__INTEL_LLVM_COMPILER)
+#    if defined(__HIP__) || defined(__NVCC__) || defined(__NVCOMPILER) || defined(__clang__)                          \
+        || defined(__INTEL_LLVM_COMPILER)
 #        define LLAMA_UNROLL(...) LLAMA_PRAGMA(unroll __VA_ARGS__)
 #    elif defined(__GNUG__)
 #        define LLAMA_UNROLL(...) LLAMA_PRAGMA(GCC unroll __VA_ARGS__)
@@ -68,7 +69,7 @@
 #endif
 
 #ifndef LLAMA_ACC
-#    if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
+#    if defined(__HIP__) || defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
 #        define LLAMA_ACC __device__
 #    elif defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
 #        define LLAMA_ACC
@@ -79,7 +80,7 @@
 #endif
 
 #ifndef LLAMA_HOST_ACC
-#    if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
+#    if defined(__HIP__) || defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
 #        define LLAMA_HOST_ACC __host__ __device__
 #    elif defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
 #        define LLAMA_HOST_ACC