Skip to content

Commit

Permalink
Merge pull request #1831 from emankov/HIPIFY
Browse files Browse the repository at this point in the history
[HIPIFY][#1769][fp16][feature] Support for `fp16` math - Part 4 - final
  • Loading branch information
emankov authored Jan 15, 2025
2 parents 0fca9a7 + 337f53e commit 793bfa5
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 26 deletions.
13 changes: 9 additions & 4 deletions bin/hipify-perl
Original file line number Diff line number Diff line change
Expand Up @@ -6237,6 +6237,7 @@ sub simpleSubstitutions {
subst("__bfloat162bfloat162", "__bfloat162bfloat162", "device_function");
subst("__bfloat162float", "__bfloat162float", "device_function");
subst("__bfloat16_as_short", "__bfloat16_as_short", "device_function");
subst("__bfloat16_as_ushort", "__bfloat16_as_ushort", "device_function");
subst("__brev", "__brev", "device_function");
subst("__brevll", "__brevll", "device_function");
subst("__byte_perm", "__byte_perm", "device_function");
Expand Down Expand Up @@ -6280,6 +6281,7 @@ sub simpleSubstitutions {
subst("__fdividef", "__fdividef", "device_function");
subst("__ffs", "__ffs", "device_function");
subst("__ffsll", "__ffsll", "device_function");
subst("__float22bfloat162_rn", "__float22bfloat162_rn", "device_function");
subst("__float22half2_rn", "__float22half2_rn", "device_function");
subst("__float2bfloat16", "__float2bfloat16", "device_function");
subst("__float2half", "__float2half", "device_function");
Expand Down Expand Up @@ -6495,6 +6497,7 @@ sub simpleSubstitutions {
subst("__short2half_rn", "__short2half_rn", "device_function");
subst("__short2half_ru", "__short2half_ru", "device_function");
subst("__short2half_rz", "__short2half_rz", "device_function");
subst("__short_as_bfloat16", "__short_as_bfloat16", "device_function");
subst("__short_as_half", "__short_as_half", "device_function");
subst("__sincosf", "__sincosf", "device_function");
subst("__sinf", "__sinf", "device_function");
Expand Down Expand Up @@ -6538,6 +6541,7 @@ sub simpleSubstitutions {
subst("__ushort2half_rn", "__ushort2half_rn", "device_function");
subst("__ushort2half_ru", "__ushort2half_ru", "device_function");
subst("__ushort2half_rz", "__ushort2half_rz", "device_function");
subst("__ushort_as_bfloat16", "__ushort_as_bfloat16", "device_function");
subst("__ushort_as_half", "__ushort_as_half", "device_function");
subst("abs", "abs", "device_function");
subst("acos", "acos", "device_function");
Expand Down Expand Up @@ -6786,6 +6790,7 @@ sub simpleSubstitutions {
subst("__nv_fp8x4_e4m3", "__hip_fp8x4_e4m3_fnuz", "device_type");
subst("__nv_fp8x4_storage_t", "__hip_fp8x4_storage_t", "device_type");
subst("__nv_saturation_t", "__hip_saturation_t", "device_type");
subst("nv_bfloat16", "hip_bfloat16", "device_type");
subst("caffe2\/core\/common_cudnn.h", "caffe2\/core\/hip\/common_miopen.h", "include");
subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include");
subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include");
Expand Down Expand Up @@ -9279,6 +9284,7 @@ sub transformHostFunctions {
"acos",
"abs",
"__ushort_as_half",
"__ushort_as_bfloat16",
"__ushort2half_rz",
"__ushort2half_ru",
"__ushort2half_rn",
Expand Down Expand Up @@ -9322,6 +9328,7 @@ sub transformHostFunctions {
"__sinf",
"__sincosf",
"__short_as_half",
"__short_as_bfloat16",
"__short2half_rz",
"__short2half_ru",
"__short2half_rn",
Expand Down Expand Up @@ -9537,6 +9544,7 @@ sub transformHostFunctions {
"__float2half",
"__float2bfloat16",
"__float22half2_rn",
"__float22bfloat162_rn",
"__ffsll",
"__ffs",
"__fdividef",
Expand Down Expand Up @@ -9580,6 +9588,7 @@ sub transformHostFunctions {
"__byte_perm",
"__brevll",
"__brev",
"__bfloat16_as_ushort",
"__bfloat16_as_short",
"__bfloat162float",
"__bfloat162bfloat162",
Expand Down Expand Up @@ -9713,7 +9722,6 @@ sub countSupportedDeviceFunctions {
"__vabsdiffs2",
"__vabs4",
"__vabs2",
"__ushort_as_bfloat16",
"__ushort2bfloat16_rz",
"__ushort2bfloat16_ru",
"__ushort2bfloat16_rn",
Expand All @@ -9734,7 +9742,6 @@ sub countSupportedDeviceFunctions {
"__signbitl",
"__signbitf",
"__signbit",
"__short_as_bfloat16",
"__short2bfloat16_rz",
"__short2bfloat16_ru",
"__short2bfloat16_rn",
Expand Down Expand Up @@ -9809,7 +9816,6 @@ sub countSupportedDeviceFunctions {
"__float2bfloat16_rn",
"__float2bfloat16_rd",
"__float2bfloat162_rn",
"__float22bfloat162_rn",
"__finitel",
"__finitef",
"__finite",
Expand Down Expand Up @@ -9839,7 +9845,6 @@ sub countSupportedDeviceFunctions {
"__dadd_ru",
"__dadd_rd",
"__brkpt",
"__bfloat16_as_ushort",
"__bfloat162ushort_rz",
"__bfloat162ushort_ru",
"__bfloat162ushort_rn",
Expand Down
10 changes: 5 additions & 5 deletions docs/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
|`__bfloat162ushort_ru`|11.0| | | | | | | | | |
|`__bfloat162ushort_rz`|11.0| | | | | | | | | |
|`__bfloat16_as_short`|11.0| | | |`__bfloat16_as_short`|5.7.0| | | | |
|`__bfloat16_as_ushort`|11.0| | | | | | | | | |
|`__bfloat16_as_ushort`|11.0| | | |`__bfloat16_as_ushort`|5.7.0| | | | |
|`__brev`| | | | |`__brev`|1.6.0| | | | |
|`__brevll`| | | | |`__brevll`|1.6.0| | | | |
|`__brkpt`| | | | | | | | | | |
Expand Down Expand Up @@ -121,7 +121,7 @@
|`__finite`| | | | | | | | | | |
|`__finitef`| | | | | | | | | | |
|`__finitel`| | | | | | | | | | |
|`__float22bfloat162_rn`|11.0| | | | | | | | | |
|`__float22bfloat162_rn`|11.0| | | |`__float22bfloat162_rn`|5.7.0| | | | |
|`__float22half2_rn`| | | | |`__float22half2_rn`|1.6.0| | | | |
|`__float2bfloat16`|11.0| | | |`__float2bfloat16`|5.7.0| | | | |
|`__float2bfloat162_rn`|11.0| | | | | | | | | |
Expand Down Expand Up @@ -411,7 +411,7 @@
|`__short2half_rn`| | | | |`__short2half_rn`|1.6.0| | | | |
|`__short2half_ru`| | | | |`__short2half_ru`|1.6.0| | | | |
|`__short2half_rz`| | | | |`__short2half_rz`|1.6.0| | | | |
|`__short_as_bfloat16`|11.0| | | | | | | | | |
|`__short_as_bfloat16`|11.0| | | |`__short_as_bfloat16`|5.7.0| | | | |
|`__short_as_half`| | | | |`__short_as_half`|1.9.0| | | | |
|`__signbit`| | | | | | | | | | |
|`__signbitf`| | | | | | | | | | |
Expand Down Expand Up @@ -475,7 +475,7 @@
|`__ushort2half_rn`| | | | |`__ushort2half_rn`|1.6.0| | | | |
|`__ushort2half_ru`| | | | |`__ushort2half_ru`|1.6.0| | | | |
|`__ushort2half_rz`| | | | |`__ushort2half_rz`|1.6.0| | | | |
|`__ushort_as_bfloat16`|11.0| | | | | | | | | |
|`__ushort_as_bfloat16`|11.0| | | |`__ushort_as_bfloat16`|5.7.0| | | | |
|`__ushort_as_half`| | | | |`__ushort_as_half`|1.6.0| | | | |
|`__vabs2`| | | | | | | | | | |
|`__vabs4`| | | | | | | | | | |
Expand Down Expand Up @@ -836,6 +836,6 @@
|`__nv_fp8x4_e5m2`|11.8| | | |`__hip_fp8x4_e5m2_fnuz`|6.2.0| | | | |
|`__nv_fp8x4_storage_t`|11.8| | | |`__hip_fp8x4_storage_t`|6.2.0| | | | |
|`__nv_saturation_t`|11.8| | | |`__hip_saturation_t`|6.2.0| | | | |
|`nv_bfloat16`|11.0| | | | | | | | | |
|`nv_bfloat16`|11.0| | | |`hip_bfloat16`|3.5.0| | | | |
|`nv_bfloat162`|11.0| | | | | | | | | |

12 changes: 8 additions & 4 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,10 +781,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__low2bfloat162", {"__low2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__float22bfloat162_rn", {"__float22bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__float22bfloat162_rn", {"__float22bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__bfloat162char_rz", {"__bfloat162char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162uchar_rz", {"__bfloat162uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"make_bfloat162", {"make_bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
Expand Down Expand Up @@ -1524,6 +1524,10 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
{"__low2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__high2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__bfloat16_as_short", {HIP_5070, HIP_0, HIP_0 }},
{"__bfloat16_as_ushort", {HIP_5070, HIP_0, HIP_0 }},
{"__short_as_bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__ushort_as_bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__float22bfloat162_rn", {HIP_5070, HIP_0, HIP_0 }},
};

const std::map<unsigned int, llvm::StringRef> CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
Expand Down
3 changes: 2 additions & 1 deletion src/CUDA2HIP_Device_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_TYPE_NAME_MAP {
{"__half2_raw", {"__half2_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
// Bfloat16 Precision Device types
{"__nv_bfloat16", {"__hip_bfloat16", "rocblas_bfloat16", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"nv_bfloat16", {"hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
{"nv_bfloat16", {"hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"__nv_bfloat16_raw", {"__hip_bfloat16_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
{"nv_bfloat162", {"hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
Expand Down Expand Up @@ -102,6 +102,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_TYPE_NAME_VER_MAP {
{"__hip_bfloat16_raw", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_bfloat162_raw", {HIP_6020, HIP_0, HIP_0 }},
{"__hip_bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"hip_bfloat16", {HIP_3050, HIP_0, HIP_0 }},

{"rocblas_half", {HIP_1050, HIP_0, HIP_0 }},
{"rocblas_bfloat16", {HIP_3050, HIP_0, HIP_0 }},
Expand Down
48 changes: 36 additions & 12 deletions tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ int main() {
float fa = 0.0f;
float fx = 0.0f;
short int shi = 0;
unsigned short int ushi = 0;
double2 d2 = { 0.0f, 0.0f };
float2 f2 = { 0.0f, 0.0f };
__half hx = { 0.0f };
Expand All @@ -25,13 +26,16 @@ int main() {
__half2_raw h2rx = { 0, 0 };

#if CUDA_VERSION >= 11000
// CHECK: __hip_bfloat16 bf16 = { 0.0f };
// CHECK: __hip_bfloat16 _bf16 = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16a = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16b = { 0.0f };
__nv_bfloat16 bf16 = { 0.0f };
__nv_bfloat16 _bf16 = { 0.0f };
__nv_bfloat16 bf16a = { 0.0f };
__nv_bfloat16 bf16b = { 0.0f };

// CHECK: hip_bfloat16 bf16 = { 0 };
nv_bfloat16 bf16 = { 0 };

// CHECK: __hip_bfloat16_raw bf16r = { 0 };
__nv_bfloat16_raw bf16r = { 0 };

Expand All @@ -47,18 +51,18 @@ int main() {

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __double2bfloat16(const double a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __double2bfloat16(const double a)
// CHECK: bf16 = __double2bfloat16(da);
bf16 = __double2bfloat16(da);
// CHECK: _bf16 = __double2bfloat16(da);
_bf16 = __double2bfloat16(da);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __float2bfloat16(const float a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __float2bfloat16(float f);
// CHECK: bf16 = __float2bfloat16(fa);
bf16 = __float2bfloat16(fa);
// CHECK: _bf16 = __float2bfloat16(fa);
_bf16 = __float2bfloat16(fa);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ float __bfloat162float(const __nv_bfloat16 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ float __bfloat162float(__hip_bfloat16 a);
// CHECK: bf16 = __bfloat162float(fa);
bf16 = __bfloat162float(fa);
// CHECK: _bf16 = __bfloat162float(fa);
_bf16 = __bfloat162float(fa);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ float2 __bfloat1622float2(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a);
Expand All @@ -77,8 +81,8 @@ int main() {

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a);
// CHECK: bf16 = __low2bfloat16(bf162a);
bf16 = __low2bfloat16(bf162a);
// CHECK: _bf16 = __low2bfloat16(bf162a);
_bf16 = __low2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b);
Expand All @@ -97,8 +101,28 @@ int main() {

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h);
// CHECK: shi = __bfloat16_as_short(bf16);
shi = __bfloat16_as_short(bf16);
// CHECK: shi = __bfloat16_as_short(_bf16);
shi = __bfloat16_as_short(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ unsigned short int __bfloat16_as_ushort(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h);
// CHECK: ushi = __bfloat16_as_ushort(_bf16);
ushi = __bfloat16_as_ushort(_bf16);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __short_as_bfloat16(const short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a);
// CHECK: _bf16 = __short_as_bfloat16(shi);
_bf16 = __short_as_bfloat16(shi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __ushort_as_bfloat16(const unsigned short int i);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a);
// CHECK: _bf16 = __ushort_as_bfloat16(ushi);
_bf16 = __ushort_as_bfloat16(ushi);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __float22bfloat162_rn(const float2 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a);
// CHECK: bf162 = __float22bfloat162_rn(f2);
bf162 = __float22bfloat162_rn(f2);
#endif

#if CUDA_VERSION >= 11080
Expand Down

0 comments on commit 793bfa5

Please sign in to comment.