Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HIPIFY][Device][#1796][tests][fix] Fix device test failures on CUDA 11.8.0, 12.0.x, and 12.1.x #1846

Merged
merged 1 commit into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions docs/reference/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
|`__bfloat162ushort_rn`|11.0| | | | | | | | | |
|`__bfloat162ushort_ru`|11.0| | | | | | | | | |
|`__bfloat162ushort_rz`|11.0| | | | | | | | | |
|`__bfloat16_as_short`|11.0| | | |`__bfloat16_as_short`|5.7.0| | | | |
|`__bfloat16_as_ushort`|11.0| | | |`__bfloat16_as_ushort`|5.7.0| | | | |
|`__bfloat16_as_short`|11.0| |12.2| |`__bfloat16_as_short`|5.7.0| | | | |
|`__bfloat16_as_ushort`|11.0| |12.2| |`__bfloat16_as_ushort`|5.7.0| | | | |
|`__brev`| | | | |`__brev`|1.6.0| | | | |
|`__brevll`| | | | |`__brevll`|1.6.0| | | | |
|`__brkpt`| | | | | | | | | | |
Expand Down Expand Up @@ -224,7 +224,7 @@
|`__half2ushort_rz`| | | | |`__half2ushort_rz`|1.6.0| | | | |
|`__half_as_short`| | | | |`__half_as_short`|1.6.0| | | | |
|`__half_as_ushort`| | | | |`__half_as_ushort`|1.6.0| | | | |
|`__halves2bfloat162`|11.0| | | |`__halves2bfloat162`|5.7.0| | | | |
|`__halves2bfloat162`|11.0| |12.2| |`__halves2bfloat162`|5.7.0| | | | |
|`__halves2half2`| | | | |`__halves2half2`|1.6.0| | | | |
|`__hbeq2`| | | | |`__hbeq2`|1.6.0| | | | |
|`__hbequ2`| | | | |`__hbequ2`|1.9.0| | | | |
Expand Down Expand Up @@ -265,7 +265,7 @@
|`__hgtu2`| | | | |`__hgtu2`|1.9.0| | | | |
|`__hgtu2_mask`|12.0| | | | | | | | | |
|`__high2bfloat16`|11.0| | | |`__high2bfloat16`|5.7.0| | | | |
|`__high2bfloat162`|11.0| | | |`__high2bfloat162`|5.7.0| | | | |
|`__high2bfloat162`|11.0| |12.2| |`__high2bfloat162`|5.7.0| | | | |
|`__high2float`| | | | |`__high2float`|1.6.0| | | | |
|`__high2half`| | | | |`__high2half`|1.6.0| | | | |
|`__high2half2`| | | | |`__high2half2`|1.6.0| | | | |
Expand All @@ -288,11 +288,11 @@
|`__hltu2`| | | | |`__hltu2`|1.9.0| | | | |
|`__hltu2_mask`|12.0| | | | | | | | | |
|`__hmax`|11.0| | | |`__hmax`|5.5.0| | | | |
|`__hmax2`|11.0| | | |`__hmax2`|5.7.0| | | | |
|`__hmax2`|11.0| |12.2| |`__hmax2`|5.7.0| | | | |
|`__hmax2_nan`|11.0| | | | | | | | | |
|`__hmax_nan`|11.0| | | |`__hmax_nan`|5.5.0| | | | |
|`__hmin`|11.0| | | |`__hmin`|5.5.0| | | | |
|`__hmin2`|11.0| | | |`__hmin2`|5.7.0| | | | |
|`__hmin2`|11.0| |12.2| |`__hmin2`|5.7.0| | | | |
|`__hmin2_nan`|11.0| | | | | | | | | |
|`__hmin_nan`|11.0| | | |`__hmin_nan`|5.5.0| | | | |
|`__hmul`| | | | |`__hmul`|1.6.0| | | | |
Expand Down Expand Up @@ -361,8 +361,8 @@
|`__log2f`| | | | |`__log2f`|1.6.0| | | | |
|`__logf`| | | | |`__logf`|1.6.0| | | | |
|`__longlong_as_double`| | | | |`__longlong_as_double`|1.6.0| | | | |
|`__low2bfloat16`|11.0| | | |`__low2bfloat16`|5.7.0| | | | |
|`__low2bfloat162`|11.0| | | |`__low2bfloat162`|5.7.0| | | | |
|`__low2bfloat16`|11.0| |12.2| |`__low2bfloat16`|5.7.0| | | | |
|`__low2bfloat162`|11.0| |12.2| |`__low2bfloat162`|5.7.0| | | | |
|`__low2float`| | | | |`__low2float`|1.6.0| | | | |
|`__low2half`| | | | |`__low2half`|1.6.0| | | | |
|`__low2half2`| | | | |`__low2half2`|1.6.0| | | | |
Expand Down Expand Up @@ -411,7 +411,7 @@
|`__short2half_rn`| | | | |`__short2half_rn`|1.6.0| | | | |
|`__short2half_ru`| | | | |`__short2half_ru`|1.6.0| | | | |
|`__short2half_rz`| | | | |`__short2half_rz`|1.6.0| | | | |
|`__short_as_bfloat16`|11.0| | | |`__short_as_bfloat16`|5.7.0| | | | |
|`__short_as_bfloat16`|11.0| |12.2| |`__short_as_bfloat16`|5.7.0| | | | |
|`__short_as_half`| | | | |`__short_as_half`|1.9.0| | | | |
|`__signbit`| | | | | | | | | | |
|`__signbitf`| | | | | | | | | | |
Expand Down Expand Up @@ -475,7 +475,7 @@
|`__ushort2half_rn`| | | | |`__ushort2half_rn`|1.6.0| | | | |
|`__ushort2half_ru`| | | | |`__ushort2half_ru`|1.6.0| | | | |
|`__ushort2half_rz`| | | | |`__ushort2half_rz`|1.6.0| | | | |
|`__ushort_as_bfloat16`|11.0| | | |`__ushort_as_bfloat16`|5.7.0| | | | |
|`__ushort_as_bfloat16`|11.0| |12.2| |`__ushort_as_bfloat16`|5.7.0| | | | |
|`__ushort_as_half`| | | | |`__ushort_as_half`|1.6.0| | | | |
|`__vabs2`| | | | | | | | | | |
|`__vabs4`| | | | | | | | | | |
Expand Down
1 change: 1 addition & 0 deletions src/CUDA2HIP.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ extern const std::map<llvm::StringRef, hipAPIversions> HIP_CAFFE2_TYPE_NAME_VER_
extern const std::map<llvm::StringRef, hipAPIversions> HIP_CAFFE2_FUNCTION_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP;
extern const std::map<llvm::StringRef, cudaAPIChangedVersions> CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_CUB_TYPE_NAME_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_CUB_FUNCTION_VER_MAP;
extern const std::map<llvm::StringRef, hipAPIversions> HIP_RTC_TYPE_NAME_VER_MAP;
Expand Down
13 changes: 13 additions & 0 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,19 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
{"__float22bfloat162_rn", {HIP_5070, HIP_0, HIP_0 }},
};

const std::map<llvm::StringRef, cudaAPIChangedVersions> CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP {
{"__hmax2", {CUDA_122}},
{"__hmin2", {CUDA_122}},
{"__low2bfloat16", {CUDA_122}},
{"__halves2bfloat162", {CUDA_122}},
{"__low2bfloat162", {CUDA_122}},
{"__high2bfloat162", {CUDA_122}},
{"__bfloat16_as_short", {CUDA_122}},
{"__bfloat16_as_ushort", {CUDA_122}},
{"__short_as_bfloat16", {CUDA_122}},
{"__ushort_as_bfloat16", {CUDA_122}},
};

const std::map<unsigned int, llvm::StringRef> CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
{1, "Device Functions"},
{2, "Device Types"},
Expand Down
1 change: 1 addition & 0 deletions src/CUDA2HIP_Doc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,7 @@ namespace doc {
const typeMap &getTypes() const override { return CUDA_DEVICE_TYPE_NAME_MAP; }
const versionMap &getFunctionVersions() const override { return CUDA_DEVICE_FUNCTION_VER_MAP; }
const hipVersionMap &getHipFunctionVersions() const override { return HIP_DEVICE_FUNCTION_VER_MAP; }
const cudaChangedVersionMap &getCudaChangedFunctionVersions() const override { return CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP; }
const versionMap &getTypeVersions() const override { return CUDA_DEVICE_TYPE_NAME_VER_MAP; }
const hipVersionMap &getHipTypeVersions() const override { return HIP_DEVICE_TYPE_NAME_VER_MAP; }
const string &getName() const override { return sCUDEVICE; }
Expand Down
50 changes: 0 additions & 50 deletions tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -69,56 +69,6 @@ int main() {
// CHECK: f2 = __bfloat1622float2(bf162);
f2 = __bfloat1622float2(bf162);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmax2(bf162a, bf162b);
bf162 = __hmax2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmin2(bf162a, bf162b);
bf162 = __hmin2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a);
// CHECK: _bf16 = __low2bfloat16(bf162a);
_bf16 = __low2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b);
// CHECK: bf162 = __halves2bfloat162(bf16a, bf16b);
bf162 = __halves2bfloat162(bf16a, bf16b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __low2bfloat162(bf162a);
bf162 = __low2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __high2bfloat162(bf162a);
bf162 = __high2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h);
// CHECK: shi = __bfloat16_as_short(_bf16);
shi = __bfloat16_as_short(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h);
// CHECK: ushi = __bfloat16_as_ushort(_bf16);
ushi = __bfloat16_as_ushort(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a);
// CHECK: _bf16 = __short_as_bfloat16(shi);
_bf16 = __short_as_bfloat16(shi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a);
// CHECK: _bf16 = __ushort_as_bfloat16(ushi);
_bf16 = __ushort_as_bfloat16(ushi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __float22bfloat162_rn(const float2 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a);
// CHECK: bf162 = __float22bfloat162_rn(f2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ int main() {
double dx = 0.0f;
float fa = 0.0f;
float fx = 0.0f;
short int shi = 0;
unsigned short int ushi = 0;
double2 d2 = { 0.0f, 0.0f };
float2 f2 = { 0.0f, 0.0f };
__half_raw hrx = { 0 };
Expand All @@ -23,18 +25,25 @@ int main() {
#if CUDA_VERSION >= 11000
// CHECK: __hip_bfloat16 bf16 = { 0 };
__nv_bfloat16 bf16 = { 0 };
// CHECK: __hip_bfloat16 _bf16 = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16a = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16b = { 0.0f };
__nv_bfloat16 _bf16 = { 0.0f };
__nv_bfloat16 bf16a = { 0.0f };
__nv_bfloat16 bf16b = { 0.0f };

// CHECK: __hip_bfloat162 bf162 = { 0, 0 };
// CHECK-NEXT: __hip_bfloat162 bf162a = { 0, 0 };
// CHECK-NEXT: __hip_bfloat162 bf162b = { 0, 0 };
__nv_bfloat162 bf162 = { 0, 0 };
__nv_bfloat162 bf162a = { 0, 0 };
__nv_bfloat162 bf162b = { 0, 0 };

#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12000
#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12020
// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __bfloat162bfloat162(const __nv_bfloat16 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a);
// CHECK: bf162 = __bfloat162bfloat162(bf16);
bf162 = __bfloat162bfloat162(bf16);
// CHECK: bf162 = __bfloat162bfloat162(_bf16);
bf162 = __bfloat162bfloat162(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __lows2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b);
Expand All @@ -48,8 +57,58 @@ int main() {

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __high2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a);
// CHECK: bf16 = __high2bfloat16(bf162a);
bf16 = __high2bfloat16(bf162a);
// CHECK: _bf16 = __high2bfloat16(bf162a);
_bf16 = __high2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmax2(bf162a, bf162b);
bf162 = __hmax2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmin2(bf162a, bf162b);
bf162 = __hmin2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a);
// CHECK: _bf16 = __low2bfloat16(bf162a);
_bf16 = __low2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b);
// CHECK: bf162 = __halves2bfloat162(bf16a, bf16b);
bf162 = __halves2bfloat162(bf16a, bf16b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __low2bfloat162(bf162a);
bf162 = __low2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __high2bfloat162(bf162a);
bf162 = __high2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h);
// CHECK: shi = __bfloat16_as_short(_bf16);
shi = __bfloat16_as_short(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h);
// CHECK: ushi = __bfloat16_as_ushort(_bf16);
ushi = __bfloat16_as_ushort(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a);
// CHECK: _bf16 = __short_as_bfloat16(shi);
_bf16 = __short_as_bfloat16(shi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a);
// CHECK: _bf16 = __ushort_as_bfloat16(ushi);
_bf16 = __ushort_as_bfloat16(ushi);
#endif

#endif
Expand Down