Skip to content

Commit

Permalink
Merge pull request #1846 from emankov/HIPIFY
Browse files Browse the repository at this point in the history
[HIPIFY][Device][#1796][tests][fix] Fix device test failures on CUDA 11.8.0, 12.0.x, and 12.1.x
  • Loading branch information
emankov authored Jan 28, 2025
2 parents cb2c328 + 27f88e1 commit ac6ce7e
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 65 deletions.
20 changes: 10 additions & 10 deletions docs/reference/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
|`__bfloat162ushort_rn`|11.0| | | | | | | | | |
|`__bfloat162ushort_ru`|11.0| | | | | | | | | |
|`__bfloat162ushort_rz`|11.0| | | | | | | | | |
|`__bfloat16_as_short`|11.0| | | |`__bfloat16_as_short`|5.7.0| | | | |
|`__bfloat16_as_ushort`|11.0| | | |`__bfloat16_as_ushort`|5.7.0| | | | |
|`__bfloat16_as_short`|11.0| |12.2| |`__bfloat16_as_short`|5.7.0| | | | |
|`__bfloat16_as_ushort`|11.0| |12.2| |`__bfloat16_as_ushort`|5.7.0| | | | |
|`__brev`| | | | |`__brev`|1.6.0| | | | |
|`__brevll`| | | | |`__brevll`|1.6.0| | | | |
|`__brkpt`| | | | | | | | | | |
Expand Down Expand Up @@ -224,7 +224,7 @@
|`__half2ushort_rz`| | | | |`__half2ushort_rz`|1.6.0| | | | |
|`__half_as_short`| | | | |`__half_as_short`|1.6.0| | | | |
|`__half_as_ushort`| | | | |`__half_as_ushort`|1.6.0| | | | |
|`__halves2bfloat162`|11.0| | | |`__halves2bfloat162`|5.7.0| | | | |
|`__halves2bfloat162`|11.0| |12.2| |`__halves2bfloat162`|5.7.0| | | | |
|`__halves2half2`| | | | |`__halves2half2`|1.6.0| | | | |
|`__hbeq2`| | | | |`__hbeq2`|1.6.0| | | | |
|`__hbequ2`| | | | |`__hbequ2`|1.9.0| | | | |
Expand Down Expand Up @@ -265,7 +265,7 @@
|`__hgtu2`| | | | |`__hgtu2`|1.9.0| | | | |
|`__hgtu2_mask`|12.0| | | | | | | | | |
|`__high2bfloat16`|11.0| | | |`__high2bfloat16`|5.7.0| | | | |
|`__high2bfloat162`|11.0| | | |`__high2bfloat162`|5.7.0| | | | |
|`__high2bfloat162`|11.0| |12.2| |`__high2bfloat162`|5.7.0| | | | |
|`__high2float`| | | | |`__high2float`|1.6.0| | | | |
|`__high2half`| | | | |`__high2half`|1.6.0| | | | |
|`__high2half2`| | | | |`__high2half2`|1.6.0| | | | |
Expand All @@ -288,11 +288,11 @@
|`__hltu2`| | | | |`__hltu2`|1.9.0| | | | |
|`__hltu2_mask`|12.0| | | | | | | | | |
|`__hmax`|11.0| | | |`__hmax`|5.5.0| | | | |
|`__hmax2`|11.0| | | |`__hmax2`|5.7.0| | | | |
|`__hmax2`|11.0| |12.2| |`__hmax2`|5.7.0| | | | |
|`__hmax2_nan`|11.0| | | | | | | | | |
|`__hmax_nan`|11.0| | | |`__hmax_nan`|5.5.0| | | | |
|`__hmin`|11.0| | | |`__hmin`|5.5.0| | | | |
|`__hmin2`|11.0| | | |`__hmin2`|5.7.0| | | | |
|`__hmin2`|11.0| |12.2| |`__hmin2`|5.7.0| | | | |
|`__hmin2_nan`|11.0| | | | | | | | | |
|`__hmin_nan`|11.0| | | |`__hmin_nan`|5.5.0| | | | |
|`__hmul`| | | | |`__hmul`|1.6.0| | | | |
Expand Down Expand Up @@ -361,8 +361,8 @@
|`__log2f`| | | | |`__log2f`|1.6.0| | | | |
|`__logf`| | | | |`__logf`|1.6.0| | | | |
|`__longlong_as_double`| | | | |`__longlong_as_double`|1.6.0| | | | |
|`__low2bfloat16`|11.0| | | |`__low2bfloat16`|5.7.0| | | | |
|`__low2bfloat162`|11.0| | | |`__low2bfloat162`|5.7.0| | | | |
|`__low2bfloat16`|11.0| |12.2| |`__low2bfloat16`|5.7.0| | | | |
|`__low2bfloat162`|11.0| |12.2| |`__low2bfloat162`|5.7.0| | | | |
|`__low2float`| | | | |`__low2float`|1.6.0| | | | |
|`__low2half`| | | | |`__low2half`|1.6.0| | | | |
|`__low2half2`| | | | |`__low2half2`|1.6.0| | | | |
Expand Down Expand Up @@ -411,7 +411,7 @@
|`__short2half_rn`| | | | |`__short2half_rn`|1.6.0| | | | |
|`__short2half_ru`| | | | |`__short2half_ru`|1.6.0| | | | |
|`__short2half_rz`| | | | |`__short2half_rz`|1.6.0| | | | |
|`__short_as_bfloat16`|11.0| | | |`__short_as_bfloat16`|5.7.0| | | | |
|`__short_as_bfloat16`|11.0| |12.2| |`__short_as_bfloat16`|5.7.0| | | | |
|`__short_as_half`| | | | |`__short_as_half`|1.9.0| | | | |
|`__signbit`| | | | | | | | | | |
|`__signbitf`| | | | | | | | | | |
Expand Down Expand Up @@ -475,7 +475,7 @@
|`__ushort2half_rn`| | | | |`__ushort2half_rn`|1.6.0| | | | |
|`__ushort2half_ru`| | | | |`__ushort2half_ru`|1.6.0| | | | |
|`__ushort2half_rz`| | | | |`__ushort2half_rz`|1.6.0| | | | |
|`__ushort_as_bfloat16`|11.0| | | |`__ushort_as_bfloat16`|5.7.0| | | | |
|`__ushort_as_bfloat16`|11.0| |12.2| |`__ushort_as_bfloat16`|5.7.0| | | | |
|`__ushort_as_half`| | | | |`__ushort_as_half`|1.6.0| | | | |
|`__vabs2`| | | | | | | | | | |
|`__vabs4`| | | | | | | | | | |
Expand Down
1 change: 1 addition & 0 deletions src/CUDA2HIP.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ extern const std::map<llvm::StringRef, hipAPIversions> HIP_CAFFE2_TYPE_NAME_VER_
extern const std::map<llvm::StringRef, hipAPIversions> HIP_CAFFE2_FUNCTION_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP;
extern const std::map<llvm::StringRef, cudaAPIChangedVersions> CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_CUB_TYPE_NAME_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_CUB_FUNCTION_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_RTC_TYPE_NAME_VER_MAP;
Expand Down
13 changes: 13 additions & 0 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,19 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
{"__float22bfloat162_rn", {HIP_5070, HIP_0, HIP_0 }},
};

const std::map<llvm::StringRef, cudaAPIChangedVersions> CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP {
{"__hmax2", {CUDA_122}},
{"__hmin2", {CUDA_122}},
{"__low2bfloat16", {CUDA_122}},
{"__halves2bfloat162", {CUDA_122}},
{"__low2bfloat162", {CUDA_122}},
{"__high2bfloat162", {CUDA_122}},
{"__bfloat16_as_short", {CUDA_122}},
{"__bfloat16_as_ushort", {CUDA_122}},
{"__short_as_bfloat16", {CUDA_122}},
{"__ushort_as_bfloat16", {CUDA_122}},
};

const std::map<unsigned int, llvm::StringRef> CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
{1, "Device Functions"},
{2, "Device Types"},
Expand Down
1 change: 1 addition & 0 deletions src/CUDA2HIP_Doc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,7 @@ namespace doc {
const typeMap &getTypes() const override { return CUDA_DEVICE_TYPE_NAME_MAP; }
const versionMap &getFunctionVersions() const override { return CUDA_DEVICE_FUNCTION_VER_MAP; }
const hipVersionMap &getHipFunctionVersions() const override { return HIP_DEVICE_FUNCTION_VER_MAP; }
const cudaChangedVersionMap &getCudaChangedFunctionVersions() const override { return CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP; }
const versionMap &getTypeVersions() const override { return CUDA_DEVICE_TYPE_NAME_VER_MAP; }
const hipVersionMap &getHipTypeVersions() const override { return HIP_DEVICE_TYPE_NAME_VER_MAP; }
const string &getName() const override { return sCUDEVICE; }
Expand Down
50 changes: 0 additions & 50 deletions tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -69,56 +69,6 @@ int main() {
// CHECK: f2 = __bfloat1622float2(bf162);
f2 = __bfloat1622float2(bf162);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmax2(bf162a, bf162b);
bf162 = __hmax2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmin2(bf162a, bf162b);
bf162 = __hmin2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a);
// CHECK: _bf16 = __low2bfloat16(bf162a);
_bf16 = __low2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b);
// CHECK: bf162 = __halves2bfloat162(bf16a, bf16b);
bf162 = __halves2bfloat162(bf16a, bf16b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __low2bfloat162(bf162a);
bf162 = __low2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __high2bfloat162(bf162a);
bf162 = __high2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h);
// CHECK: shi = __bfloat16_as_short(_bf16);
shi = __bfloat16_as_short(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h);
// CHECK: ushi = __bfloat16_as_ushort(_bf16);
ushi = __bfloat16_as_ushort(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a);
// CHECK: _bf16 = __short_as_bfloat16(shi);
_bf16 = __short_as_bfloat16(shi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a);
// CHECK: _bf16 = __ushort_as_bfloat16(ushi);
_bf16 = __ushort_as_bfloat16(ushi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __float22bfloat162_rn(const float2 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a);
// CHECK: bf162 = __float22bfloat162_rn(f2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ int main() {
double dx = 0.0f;
float fa = 0.0f;
float fx = 0.0f;
short int shi = 0;
unsigned short int ushi = 0;
double2 d2 = { 0.0f, 0.0f };
float2 f2 = { 0.0f, 0.0f };
__half_raw hrx = { 0 };
Expand All @@ -23,18 +25,25 @@ int main() {
#if CUDA_VERSION >= 11000
// CHECK: __hip_bfloat16 bf16 = { 0 };
__nv_bfloat16 bf16 = { 0 };
// CHECK: __hip_bfloat16 _bf16 = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16a = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16b = { 0.0f };
__nv_bfloat16 _bf16 = { 0.0f };
__nv_bfloat16 bf16a = { 0.0f };
__nv_bfloat16 bf16b = { 0.0f };

// CHECK: __hip_bfloat162 bf162 = { 0, 0 };
// CHECK-NEXT: __hip_bfloat162 bf162a = { 0, 0 };
// CHECK-NEXT: __hip_bfloat162 bf162b = { 0, 0 };
__nv_bfloat162 bf162 = { 0, 0 };
__nv_bfloat162 bf162a = { 0, 0 };
__nv_bfloat162 bf162b = { 0, 0 };

#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12000
#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12020
// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __bfloat162bfloat162(const __nv_bfloat16 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a);
// CHECK: bf162 = __bfloat162bfloat162(bf16);
bf162 = __bfloat162bfloat162(bf16);
// CHECK: bf162 = __bfloat162bfloat162(_bf16);
bf162 = __bfloat162bfloat162(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __lows2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
Expand All @@ -48,8 +57,58 @@ int main() {

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __high2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a);
// CHECK: bf16 = __high2bfloat16(bf162a);
bf16 = __high2bfloat16(bf162a);
// CHECK: _bf16 = __high2bfloat16(bf162a);
_bf16 = __high2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmax2(bf162a, bf162b);
bf162 = __hmax2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmin2(bf162a, bf162b);
bf162 = __hmin2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a);
// CHECK: _bf16 = __low2bfloat16(bf162a);
_bf16 = __low2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b);
// CHECK: bf162 = __halves2bfloat162(bf16a, bf16b);
bf162 = __halves2bfloat162(bf16a, bf16b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __low2bfloat162(bf162a);
bf162 = __low2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __high2bfloat162(bf162a);
bf162 = __high2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h);
// CHECK: shi = __bfloat16_as_short(_bf16);
shi = __bfloat16_as_short(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h);
// CHECK: ushi = __bfloat16_as_ushort(_bf16);
ushi = __bfloat16_as_ushort(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a);
// CHECK: _bf16 = __short_as_bfloat16(shi);
_bf16 = __short_as_bfloat16(shi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a);
// CHECK: _bf16 = __ushort_as_bfloat16(ushi);
_bf16 = __ushort_as_bfloat16(ushi);
#endif

#endif
Expand Down

0 comments on commit ac6ce7e

Please sign in to comment.