Skip to content

Commit

Permalink
add HIP support
Browse files Browse the repository at this point in the history
- add function attribute to define macros for HIP
- add unroll support for HIP
- fix missing function defines for function friend declarations
  • Loading branch information
psychocoderHPC authored and bernhardmgruber committed Dec 9, 2022
1 parent e1c2426 commit 63594a0
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
7 changes: 5 additions & 2 deletions include/llama/RecordRef.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,9 +742,12 @@ namespace llama
// to find subsequent elements. This is not a great design for now and the SIMD load/store functions should
// probably take iterators to records.
template<typename T, typename Simd, typename RecordCoord>
friend void internal::loadSimdRecord(const T& srcRef, Simd& dstSimd, RecordCoord rc);
friend LLAMA_FN_HOST_ACC_INLINE void internal::loadSimdRecord(const T& srcRef, Simd& dstSimd, RecordCoord rc);
template<typename Simd, typename T, typename RecordCoord>
friend void internal::storeSimdRecord(const Simd& srcSimd, T&& dstRef, RecordCoord rc);
friend LLAMA_FN_HOST_ACC_INLINE void internal::storeSimdRecord(
const Simd& srcSimd,
T&& dstRef,
RecordCoord rc);
};

// swap for heterogeneous RecordRef
Expand Down
9 changes: 5 additions & 4 deletions include/llama/macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#endif

#ifndef LLAMA_FORCE_INLINE
# if defined(__NVCC__)
# if defined(__NVCC__) || defined(__HIP__)
# define LLAMA_FORCE_INLINE __forceinline__
# elif defined(__GNUC__) || defined(__clang__)
# define LLAMA_FORCE_INLINE inline __attribute__((always_inline))
Expand All @@ -52,7 +52,8 @@
#endif

#ifndef LLAMA_UNROLL
# if defined(__NVCC__) || defined(__NVCOMPILER) || defined(__clang__) || defined(__INTEL_LLVM_COMPILER)
# if defined(__HIP__) || defined(__NVCC__) || defined(__NVCOMPILER) || defined(__clang__) \
|| defined(__INTEL_LLVM_COMPILER)
# define LLAMA_UNROLL(...) LLAMA_PRAGMA(unroll __VA_ARGS__)
# elif defined(__GNUG__)
# define LLAMA_UNROLL(...) LLAMA_PRAGMA(GCC unroll __VA_ARGS__)
Expand All @@ -68,7 +69,7 @@
#endif

#ifndef LLAMA_ACC
# if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
# if defined(__HIP__) || defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
# define LLAMA_ACC __device__
# elif defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
# define LLAMA_ACC
Expand All @@ -79,7 +80,7 @@
#endif

#ifndef LLAMA_HOST_ACC
# if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
# if defined(__HIP__) || defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
# define LLAMA_HOST_ACC __host__ __device__
# elif defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
# define LLAMA_HOST_ACC
Expand Down

0 comments on commit 63594a0

Please sign in to comment.