diff --git a/docs/reference/tables/CUDA_Device_API_supported_by_HIP.md b/docs/reference/tables/CUDA_Device_API_supported_by_HIP.md index 27aa483b..0f871305 100644 --- a/docs/reference/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/reference/tables/CUDA_Device_API_supported_by_HIP.md @@ -47,8 +47,8 @@ |`__bfloat162ushort_rn`|11.0| | | | | | | | | | |`__bfloat162ushort_ru`|11.0| | | | | | | | | | |`__bfloat162ushort_rz`|11.0| | | | | | | | | | -|`__bfloat16_as_short`|11.0| | | |`__bfloat16_as_short`|5.7.0| | | | | -|`__bfloat16_as_ushort`|11.0| | | |`__bfloat16_as_ushort`|5.7.0| | | | | +|`__bfloat16_as_short`|11.0| |12.2| |`__bfloat16_as_short`|5.7.0| | | | | +|`__bfloat16_as_ushort`|11.0| |12.2| |`__bfloat16_as_ushort`|5.7.0| | | | | |`__brev`| | | | |`__brev`|1.6.0| | | | | |`__brevll`| | | | |`__brevll`|1.6.0| | | | | |`__brkpt`| | | | | | | | | | | @@ -224,7 +224,7 @@ |`__half2ushort_rz`| | | | |`__half2ushort_rz`|1.6.0| | | | | |`__half_as_short`| | | | |`__half_as_short`|1.6.0| | | | | |`__half_as_ushort`| | | | |`__half_as_ushort`|1.6.0| | | | | -|`__halves2bfloat162`|11.0| | | |`__halves2bfloat162`|5.7.0| | | | | +|`__halves2bfloat162`|11.0| |12.2| |`__halves2bfloat162`|5.7.0| | | | | |`__halves2half2`| | | | |`__halves2half2`|1.6.0| | | | | |`__hbeq2`| | | | |`__hbeq2`|1.6.0| | | | | |`__hbequ2`| | | | |`__hbequ2`|1.9.0| | | | | @@ -265,7 +265,7 @@ |`__hgtu2`| | | | |`__hgtu2`|1.9.0| | | | | |`__hgtu2_mask`|12.0| | | | | | | | | | |`__high2bfloat16`|11.0| | | |`__high2bfloat16`|5.7.0| | | | | -|`__high2bfloat162`|11.0| | | |`__high2bfloat162`|5.7.0| | | | | +|`__high2bfloat162`|11.0| |12.2| |`__high2bfloat162`|5.7.0| | | | | |`__high2float`| | | | |`__high2float`|1.6.0| | | | | |`__high2half`| | | | |`__high2half`|1.6.0| | | | | |`__high2half2`| | | | |`__high2half2`|1.6.0| | | | | @@ -288,11 +288,11 @@ |`__hltu2`| | | | |`__hltu2`|1.9.0| | | | | |`__hltu2_mask`|12.0| | | | | | | | | | |`__hmax`|11.0| | | |`__hmax`|5.5.0| | | | | -|`__hmax2`|11.0| | | |`__hmax2`|5.7.0| | | | | +|`__hmax2`|11.0| |12.2| |`__hmax2`|5.7.0| | | | | |`__hmax2_nan`|11.0| | | | | | | | | | |`__hmax_nan`|11.0| | | |`__hmax_nan`|5.5.0| | | | | |`__hmin`|11.0| | | |`__hmin`|5.5.0| | | | | -|`__hmin2`|11.0| | | |`__hmin2`|5.7.0| | | | | +|`__hmin2`|11.0| |12.2| |`__hmin2`|5.7.0| | | | | |`__hmin2_nan`|11.0| | | | | | | | | | |`__hmin_nan`|11.0| | | |`__hmin_nan`|5.5.0| | | | | |`__hmul`| | | | |`__hmul`|1.6.0| | | | | @@ -361,8 +361,8 @@ |`__log2f`| | | | |`__log2f`|1.6.0| | | | | |`__logf`| | | | |`__logf`|1.6.0| | | | | |`__longlong_as_double`| | | | |`__longlong_as_double`|1.6.0| | | | | -|`__low2bfloat16`|11.0| | | |`__low2bfloat16`|5.7.0| | | | | -|`__low2bfloat162`|11.0| | | |`__low2bfloat162`|5.7.0| | | | | +|`__low2bfloat16`|11.0| |12.2| |`__low2bfloat16`|5.7.0| | | | | +|`__low2bfloat162`|11.0| |12.2| |`__low2bfloat162`|5.7.0| | | | | |`__low2float`| | | | |`__low2float`|1.6.0| | | | | |`__low2half`| | | | |`__low2half`|1.6.0| | | | | |`__low2half2`| | | | |`__low2half2`|1.6.0| | | | | @@ -411,7 +411,7 @@ |`__short2half_rn`| | | | |`__short2half_rn`|1.6.0| | | | | |`__short2half_ru`| | | | |`__short2half_ru`|1.6.0| | | | | |`__short2half_rz`| | | | |`__short2half_rz`|1.6.0| | | | | -|`__short_as_bfloat16`|11.0| | | |`__short_as_bfloat16`|5.7.0| | | | | +|`__short_as_bfloat16`|11.0| |12.2| |`__short_as_bfloat16`|5.7.0| | | | | |`__short_as_half`| | | | |`__short_as_half`|1.9.0| | | | | |`__signbit`| | | | | | | | | | | |`__signbitf`| | | | | | | | | | | @@ -475,7 +475,7 @@ |`__ushort2half_rn`| | | | |`__ushort2half_rn`|1.6.0| | | | | |`__ushort2half_ru`| | | | |`__ushort2half_ru`|1.6.0| | | | | |`__ushort2half_rz`| | | | |`__ushort2half_rz`|1.6.0| | | | | -|`__ushort_as_bfloat16`|11.0| | | |`__ushort_as_bfloat16`|5.7.0| | | | | +|`__ushort_as_bfloat16`|11.0| |12.2| |`__ushort_as_bfloat16`|5.7.0| | | | | |`__ushort_as_half`| | | | |`__ushort_as_half`|1.6.0| | | | | |`__vabs2`| | | | | | | | | | | |`__vabs4`| | | | | | | | | | | diff --git a/src/CUDA2HIP.h b/src/CUDA2HIP.h index bee5e8bf..7efe2462 100644 --- a/src/CUDA2HIP.h +++ b/src/CUDA2HIP.h @@ -161,6 +161,7 @@ extern const std::map HIP_CAFFE2_TYPE_NAME_VER_ extern const std::map HIP_CAFFE2_FUNCTION_VER_MAP; extern const std::map HIP_DEVICE_TYPE_NAME_VER_MAP; extern const std::map HIP_DEVICE_FUNCTION_VER_MAP; +extern const std::map CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP; extern const std::map HIP_CUB_TYPE_NAME_VER_MAP; extern const std::map HIP_CUB_FUNCTION_VER_MAP; extern const std::map HIP_RTC_TYPE_NAME_VER_MAP; diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 2742f405..394bcea1 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -1530,6 +1530,19 @@ const std::map HIP_DEVICE_FUNCTION_VER_MAP { {"__float22bfloat162_rn", {HIP_5070, HIP_0, HIP_0 }}, }; +const std::map CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP { + {"__hmax2", {CUDA_122}}, + {"__hmin2", {CUDA_122}}, + {"__low2bfloat16", {CUDA_122}}, + {"__halves2bfloat162", {CUDA_122}}, + {"__low2bfloat162", {CUDA_122}}, + {"__high2bfloat162", {CUDA_122}}, + {"__bfloat16_as_short", {CUDA_122}}, + {"__bfloat16_as_ushort", {CUDA_122}}, + {"__short_as_bfloat16", {CUDA_122}}, + {"__ushort_as_bfloat16", {CUDA_122}}, +}; + const std::map CUDA_DEVICE_FUNCTION_API_SECTION_MAP { {1, "Device Functions"}, {2, "Device Types"}, diff --git a/src/CUDA2HIP_Doc.cpp b/src/CUDA2HIP_Doc.cpp index 9b72b4d0..f61f513d 100644 --- a/src/CUDA2HIP_Doc.cpp +++ b/src/CUDA2HIP_Doc.cpp @@ -801,6 +801,7 @@ namespace doc { const typeMap &getTypes() const override { return CUDA_DEVICE_TYPE_NAME_MAP; } const versionMap &getFunctionVersions() const override { return CUDA_DEVICE_FUNCTION_VER_MAP; } const hipVersionMap &getHipFunctionVersions() const override { return HIP_DEVICE_FUNCTION_VER_MAP; } + const cudaChangedVersionMap &getCudaChangedFunctionVersions() const override { return CUDA_DEVICE_FUNCTION_CHANGED_VER_MAP; } const versionMap &getTypeVersions() const override { return CUDA_DEVICE_TYPE_NAME_VER_MAP; } const hipVersionMap &getHipTypeVersions() const override { return HIP_DEVICE_TYPE_NAME_VER_MAP; } const string &getName() const override { return sCUDEVICE; } diff --git a/tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu b/tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu index 12afdd7b..671b85a4 100644 --- a/tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu +++ b/tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu @@ -69,56 +69,6 @@ int main() { // CHECK: f2 = __bfloat1622float2(bf162); f2 = __bfloat1622float2(bf162); - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b); - // CHECK: bf162 = __hmax2(bf162a, bf162b); - bf162 = __hmax2(bf162a, bf162b); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b); - // CHECK: bf162 = __hmin2(bf162a, bf162b); - bf162 = __hmin2(bf162a, bf162b); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a); - // CHECK: _bf16 = __low2bfloat16(bf162a); - _bf16 = __low2bfloat16(bf162a); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b); - // CHECK: bf162 = __halves2bfloat162(bf16a, bf16b); - bf162 = __halves2bfloat162(bf16a, bf16b); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a); - // CHECK: bf162 = __low2bfloat162(bf162a); - bf162 = __low2bfloat162(bf162a); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a); - // CHECK: bf162 = __high2bfloat162(bf162a); - bf162 = __high2bfloat162(bf162a); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h); - // HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h); - // CHECK: shi = __bfloat16_as_short(_bf16); - shi = __bfloat16_as_short(_bf16); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h); - // HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h); - // CHECK: ushi = __bfloat16_as_ushort(_bf16); - ushi = __bfloat16_as_ushort(_bf16); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a); - // CHECK: _bf16 = __short_as_bfloat16(shi); - _bf16 = __short_as_bfloat16(shi); - - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i); - // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a); - // CHECK: _bf16 = __ushort_as_bfloat16(ushi); - _bf16 = __ushort_as_bfloat16(ushi); - // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __float22bfloat162_rn(const float2 a); // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a); // CHECK: bf162 = __float22bfloat162_rn(f2); diff --git a/tests/unit_tests/synthetic/libraries/cudevice2hipdevice_before_11080_after_12011.cu b/tests/unit_tests/synthetic/libraries/cudevice2hipdevice_before_11080_after_12011.cu index fd0ae08c..fb1b962b 100644 --- a/tests/unit_tests/synthetic/libraries/cudevice2hipdevice_before_11080_after_12011.cu +++ b/tests/unit_tests/synthetic/libraries/cudevice2hipdevice_before_11080_after_12011.cu @@ -15,6 +15,8 @@ int main() { double dx = 0.0f; float fa = 0.0f; float fx = 0.0f; + short int shi = 0; + unsigned short int ushi = 0; double2 d2 = { 0.0f, 0.0f }; float2 f2 = { 0.0f, 0.0f }; __half_raw hrx = { 0 }; @@ -23,6 +25,13 @@ int main() { #if CUDA_VERSION >= 11000 // CHECK: __hip_bfloat16 bf16 = { 0 }; __nv_bfloat16 bf16 = { 0 }; + // CHECK: __hip_bfloat16 _bf16 = { 0.0f }; + // CHECK-NEXT: __hip_bfloat16 bf16a = { 0.0f }; + // CHECK-NEXT: __hip_bfloat16 bf16b = { 0.0f }; + __nv_bfloat16 _bf16 = { 0.0f }; + __nv_bfloat16 bf16a = { 0.0f }; + __nv_bfloat16 bf16b = { 0.0f }; + // CHECK: __hip_bfloat162 bf162 = { 0, 0 }; // CHECK-NEXT: __hip_bfloat162 bf162a = { 0, 0 }; // CHECK-NEXT: __hip_bfloat162 bf162b = { 0, 0 }; @@ -30,11 +39,11 @@ int main() { __nv_bfloat162 bf162a = { 0, 0 }; __nv_bfloat162 bf162b = { 0, 0 }; -#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12000 +#if CUDA_VERSION < 11080 || CUDA_VERSION >= 12020 // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __bfloat162bfloat162(const __nv_bfloat16 a); // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a); - // CHECK: bf162 = __bfloat162bfloat162(bf16); - bf162 = __bfloat162bfloat162(bf16); + // CHECK: bf162 = __bfloat162bfloat162(_bf16); + bf162 = __bfloat162bfloat162(_bf16); // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __lows2bfloat162(const __nv_bfloat162 a, const __nv_bfloat162 b); // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b); @@ -48,8 +57,58 @@ int main() { // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __high2bfloat16(const __nv_bfloat162 a); // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a); - // CHECK: bf16 = __high2bfloat16(bf162a); - bf16 = __high2bfloat16(bf162a); + // CHECK: _bf16 = __high2bfloat16(bf162a); + _bf16 = __high2bfloat16(bf162a); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b); + // CHECK: bf162 = __hmax2(bf162a, bf162b); + bf162 = __hmax2(bf162a, bf162b); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b); + // CHECK: bf162 = __hmin2(bf162a, bf162b); + bf162 = __hmin2(bf162a, bf162b); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a); + // CHECK: _bf16 = __low2bfloat16(bf162a); + _bf16 = __low2bfloat16(bf162a); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b); + // CHECK: bf162 = __halves2bfloat162(bf16a, bf16b); + bf162 = __halves2bfloat162(bf16a, bf16b); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a); + // CHECK: bf162 = __low2bfloat162(bf162a); + bf162 = __low2bfloat162(bf162a); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a); + // CHECK: bf162 = __high2bfloat162(bf162a); + bf162 = __high2bfloat162(bf162a); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h); + // HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h); + // CHECK: shi = __bfloat16_as_short(_bf16); + shi = __bfloat16_as_short(_bf16); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h); + // HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h); + // CHECK: ushi = __bfloat16_as_ushort(_bf16); + ushi = __bfloat16_as_ushort(_bf16); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a); + // CHECK: _bf16 = __short_as_bfloat16(shi); + _bf16 = __short_as_bfloat16(shi); + + // CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i); + // HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a); + // CHECK: _bf16 = __ushort_as_bfloat16(ushi); + _bf16 = __ushort_as_bfloat16(ushi); #endif #endif