Skip to content

Commit

Permalink
Merge pull request #1828 from emankov/HIPIFY
Browse files Browse the repository at this point in the history
[HIPIFY][#1769][fp16][feature] Support for `fp16` math - Part 3 - Functions
  • Loading branch information
emankov authored Jan 14, 2025
2 parents 329574c + 054151c commit 1f8e03e
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 27 deletions.
24 changes: 16 additions & 8 deletions bin/hipify-perl
Original file line number Diff line number Diff line change
Expand Up @@ -6236,6 +6236,7 @@ sub simpleSubstitutions {
subst("__bfloat1622float2", "__bfloat1622float2", "device_function");
subst("__bfloat162bfloat162", "__bfloat162bfloat162", "device_function");
subst("__bfloat162float", "__bfloat162float", "device_function");
subst("__bfloat16_as_short", "__bfloat16_as_short", "device_function");
subst("__brev", "__brev", "device_function");
subst("__brevll", "__brevll", "device_function");
subst("__byte_perm", "__byte_perm", "device_function");
Expand Down Expand Up @@ -6353,6 +6354,7 @@ sub simpleSubstitutions {
subst("__half2ushort_rz", "__half2ushort_rz", "device_function");
subst("__half_as_short", "__half_as_short", "device_function");
subst("__half_as_ushort", "__half_as_ushort", "device_function");
subst("__halves2bfloat162", "__halves2bfloat162", "device_function");
subst("__halves2half2", "__halves2half2", "device_function");
subst("__hbeq2", "__hbeq2", "device_function");
subst("__hbequ2", "__hbequ2", "device_function");
Expand Down Expand Up @@ -6384,6 +6386,7 @@ sub simpleSubstitutions {
subst("__hgtu", "__hgtu", "device_function");
subst("__hgtu2", "__hgtu2", "device_function");
subst("__high2bfloat16", "__high2bfloat16", "device_function");
subst("__high2bfloat162", "__high2bfloat162", "device_function");
subst("__high2float", "__high2float", "device_function");
subst("__high2half", "__high2half", "device_function");
subst("__high2half2", "__high2half2", "device_function");
Expand All @@ -6402,8 +6405,10 @@ sub simpleSubstitutions {
subst("__hltu", "__hltu", "device_function");
subst("__hltu2", "__hltu2", "device_function");
subst("__hmax", "__hmax", "device_function");
subst("__hmax2", "__hmax2", "device_function");
subst("__hmax_nan", "__hmax_nan", "device_function");
subst("__hmin", "__hmin", "device_function");
subst("__hmin2", "__hmin2", "device_function");
subst("__hmin_nan", "__hmin_nan", "device_function");
subst("__hmul", "__hmul", "device_function");
subst("__hmul2", "__hmul2", "device_function");
Expand Down Expand Up @@ -6449,6 +6454,8 @@ sub simpleSubstitutions {
subst("__log2f", "__log2f", "device_function");
subst("__logf", "__logf", "device_function");
subst("__longlong_as_double", "__longlong_as_double", "device_function");
subst("__low2bfloat16", "__low2bfloat16", "device_function");
subst("__low2bfloat162", "__low2bfloat162", "device_function");
subst("__low2float", "__low2float", "device_function");
subst("__low2half", "__low2half", "device_function");
subst("__low2half2", "__low2half2", "device_function");
Expand Down Expand Up @@ -6687,6 +6694,7 @@ sub simpleSubstitutions {
subst("lrintf", "lrintf", "device_function");
subst("lround", "lround", "device_function");
subst("lroundf", "lroundf", "device_function");
subst("make_half2", "make_half2", "device_function");
subst("max", "max", "device_function");
subst("min", "min", "device_function");
subst("modf", "modf", "device_function");
Expand Down Expand Up @@ -9114,6 +9122,7 @@ sub transformHostFunctions {
"modf",
"min",
"max",
"make_half2",
"lroundf",
"lround",
"lrintf",
Expand Down Expand Up @@ -9352,6 +9361,8 @@ sub transformHostFunctions {
"__low2half2",
"__low2half",
"__low2float",
"__low2bfloat162",
"__low2bfloat16",
"__longlong_as_double",
"__logf",
"__log2f",
Expand Down Expand Up @@ -9397,8 +9408,10 @@ sub transformHostFunctions {
"__hmul2",
"__hmul",
"__hmin_nan",
"__hmin2",
"__hmin",
"__hmax_nan",
"__hmax2",
"__hmax",
"__hltu2",
"__hltu",
Expand All @@ -9417,6 +9430,7 @@ sub transformHostFunctions {
"__high2half2",
"__high2half",
"__high2float",
"__high2bfloat162",
"__high2bfloat16",
"__hgtu2",
"__hgtu",
Expand Down Expand Up @@ -9448,6 +9462,7 @@ sub transformHostFunctions {
"__hbequ2",
"__hbeq2",
"__halves2half2",
"__halves2bfloat162",
"__half_as_ushort",
"__half_as_short",
"__half2ushort_rz",
Expand Down Expand Up @@ -9565,6 +9580,7 @@ sub transformHostFunctions {
"__byte_perm",
"__brevll",
"__brev",
"__bfloat16_as_short",
"__bfloat162float",
"__bfloat162bfloat162",
"__bfloat1622float2",
Expand Down Expand Up @@ -9606,7 +9622,6 @@ sub countSupportedDeviceFunctions {
"mulhi",
"mul64hi",
"mul24",
"make_half2",
"make_bfloat162",
"llmin",
"llmax",
Expand Down Expand Up @@ -9729,8 +9744,6 @@ sub countSupportedDeviceFunctions {
"__pm2",
"__pm1",
"__pm0",
"__low2bfloat162",
"__low2bfloat16",
"__ll2bfloat16_rz",
"__ll2bfloat16_ru",
"__ll2bfloat16_rn",
Expand All @@ -9754,14 +9767,11 @@ sub countSupportedDeviceFunctions {
"__hmul_rn",
"__hmul2_rn",
"__hmin2_nan",
"__hmin2",
"__hmax2_nan",
"__hmax2",
"__hltu2_mask",
"__hlt2_mask",
"__hleu2_mask",
"__hle2_mask",
"__high2bfloat162",
"__hgtu2_mask",
"__hgt2_mask",
"__hgeu2_mask",
Expand All @@ -9771,7 +9781,6 @@ sub countSupportedDeviceFunctions {
"__hequ2_mask",
"__heq2_mask",
"__hcmadd",
"__halves2bfloat162",
"__half2uchar_rz",
"__half2char_rz",
"__hadd_rn",
Expand Down Expand Up @@ -9831,7 +9840,6 @@ sub countSupportedDeviceFunctions {
"__dadd_rd",
"__brkpt",
"__bfloat16_as_ushort",
"__bfloat16_as_short",
"__bfloat162ushort_rz",
"__bfloat162ushort_ru",
"__bfloat162ushort_rn",
Expand Down
16 changes: 8 additions & 8 deletions docs/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
|`__bfloat162ushort_rn`|11.0| | | | | | | | | |
|`__bfloat162ushort_ru`|11.0| | | | | | | | | |
|`__bfloat162ushort_rz`|11.0| | | | | | | | | |
|`__bfloat16_as_short`|11.0| | | | | | | | | |
|`__bfloat16_as_short`|11.0| | | |`__bfloat16_as_short`|5.7.0| | | | |
|`__bfloat16_as_ushort`|11.0| | | | | | | | | |
|`__brev`| | | | |`__brev`|1.6.0| | | | |
|`__brevll`| | | | |`__brevll`|1.6.0| | | | |
Expand Down Expand Up @@ -224,7 +224,7 @@
|`__half2ushort_rz`| | | | |`__half2ushort_rz`|1.6.0| | | | |
|`__half_as_short`| | | | |`__half_as_short`|1.6.0| | | | |
|`__half_as_ushort`| | | | |`__half_as_ushort`|1.6.0| | | | |
|`__halves2bfloat162`|11.0| | | | | | | | | |
|`__halves2bfloat162`|11.0| | | |`__halves2bfloat162`|5.7.0| | | | |
|`__halves2half2`| | | | |`__halves2half2`|1.6.0| | | | |
|`__hbeq2`| | | | |`__hbeq2`|1.6.0| | | | |
|`__hbequ2`| | | | |`__hbequ2`|1.9.0| | | | |
Expand Down Expand Up @@ -265,7 +265,7 @@
|`__hgtu2`| | | | |`__hgtu2`|1.9.0| | | | |
|`__hgtu2_mask`|12.0| | | | | | | | | |
|`__high2bfloat16`|11.0| | | |`__high2bfloat16`|5.7.0| | | | |
|`__high2bfloat162`|11.0| | | | | | | | | |
|`__high2bfloat162`|11.0| | | |`__high2bfloat162`|5.7.0| | | | |
|`__high2float`| | | | |`__high2float`|1.6.0| | | | |
|`__high2half`| | | | |`__high2half`|1.6.0| | | | |
|`__high2half2`| | | | |`__high2half2`|1.6.0| | | | |
Expand All @@ -288,11 +288,11 @@
|`__hltu2`| | | | |`__hltu2`|1.9.0| | | | |
|`__hltu2_mask`|12.0| | | | | | | | | |
|`__hmax`|11.0| | | |`__hmax`|5.5.0| | | | |
|`__hmax2`|11.0| | | | | | | | | |
|`__hmax2`|11.0| | | |`__hmax2`|5.7.0| | | | |
|`__hmax2_nan`|11.0| | | | | | | | | |
|`__hmax_nan`|11.0| | | |`__hmax_nan`|5.5.0| | | | |
|`__hmin`|11.0| | | |`__hmin`|5.5.0| | | | |
|`__hmin2`|11.0| | | | | | | | | |
|`__hmin2`|11.0| | | |`__hmin2`|5.7.0| | | | |
|`__hmin2_nan`|11.0| | | | | | | | | |
|`__hmin_nan`|11.0| | | |`__hmin_nan`|5.5.0| | | | |
|`__hmul`| | | | |`__hmul`|1.6.0| | | | |
Expand Down Expand Up @@ -361,8 +361,8 @@
|`__log2f`| | | | |`__log2f`|1.6.0| | | | |
|`__logf`| | | | |`__logf`|1.6.0| | | | |
|`__longlong_as_double`| | | | |`__longlong_as_double`|1.6.0| | | | |
|`__low2bfloat16`|11.0| | | | | | | | | |
|`__low2bfloat162`|11.0| | | | | | | | | |
|`__low2bfloat16`|11.0| | | |`__low2bfloat16`|5.7.0| | | | |
|`__low2bfloat162`|11.0| | | |`__low2bfloat162`|5.7.0| | | | |
|`__low2float`| | | | |`__low2float`|1.6.0| | | | |
|`__low2half`| | | | |`__low2half`|1.6.0| | | | |
|`__low2half2`| | | | |`__low2half2`|1.6.0| | | | |
Expand Down Expand Up @@ -723,7 +723,7 @@
|`lround`| | | | |`lround`|1.6.0| | | | |
|`lroundf`| | | | |`lroundf`|1.6.0| | | | |
|`make_bfloat162`|12.2| | | | | | | | | |
|`make_half2`|12.2| | | | | | | | | |
|`make_half2`|12.2| | | |`make_half2`|4.5.0| | | | |
|`max`| | | | |`max`|1.6.0| | | | |
|`min`| | | | |`min`|1.6.0| | | | |
|`modf`| | | | |`modf`|1.9.0| | | | |
Expand Down
24 changes: 16 additions & 8 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,11 +687,11 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__double2half", {"__double2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__hmax", {"__hmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__hmax_nan", {"__hmax_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__hmax2", {"__hmax2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__hmax2", {"__hmax2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__hmax2_nan", {"__hmax2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__hmin", {"__hmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__hmin_nan", {"__hmin_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__hmin2", {"__hmin2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__hmin2", {"__hmin2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__hmin2_nan", {"__hmin2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__stwb", {"__stwb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__stcg", {"__stcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
Expand All @@ -710,7 +710,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__hgeu2_mask", {"__hgeu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__hltu2_mask", {"__hltu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__hgtu2_mask", {"__hgtu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"make_half2", {"make_half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"make_half2", {"make_half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__half2char_rz", {"__half2char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__half2uchar_rz", {"__half2uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// bfp16 functions
Expand Down Expand Up @@ -776,11 +776,11 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__low2bfloat162", {"__low2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__low2bfloat162", {"__low2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
Expand Down Expand Up @@ -1516,6 +1516,14 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
{"__lows2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__highs2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__high2bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__hmin2", {HIP_5070, HIP_0, HIP_0 }},
{"__hmax2", {HIP_5070, HIP_0, HIP_0 }},
{"make_half2", {HIP_4050, HIP_0, HIP_0 }},
{"__low2bfloat16", {HIP_5070, HIP_0, HIP_0 }},
{"__halves2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__low2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__high2bfloat162", {HIP_5070, HIP_0, HIP_0 }},
{"__bfloat16_as_short", {HIP_5070, HIP_0, HIP_0 }},
};

const std::map<unsigned int, llvm::StringRef> CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
Expand Down
56 changes: 53 additions & 3 deletions tests/unit_tests/synthetic/libraries/cudevice2hipdevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,22 @@ int main() {
double dx = 0.0f;
float fa = 0.0f;
float fx = 0.0f;
short int shi = 0;
double2 d2 = { 0.0f, 0.0f };
float2 f2 = { 0.0f, 0.0f };
__half hx = { 0.0f };
__half hy = { 0.0f };
__half2 h2 = { 0.0f, 0.0f };
__half_raw hrx = { 0 };
__half2_raw h2rx = { 0, 0 };

#if CUDA_VERSION >= 11000
// CHECK: __hip_bfloat16 bf16 = { 0 };
__nv_bfloat16 bf16 = { 0 };
// CHECK: __hip_bfloat16 bf16 = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16a = { 0.0f };
// CHECK-NEXT: __hip_bfloat16 bf16b = { 0.0f };
__nv_bfloat16 bf16 = { 0.0f };
__nv_bfloat16 bf16a = { 0.0f };
__nv_bfloat16 bf16b = { 0.0f };

// CHECK: __hip_bfloat16_raw bf16r = { 0 };
__nv_bfloat16_raw bf16r = { 0 };
Expand Down Expand Up @@ -56,6 +64,41 @@ int main() {
// HIP: __BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a);
// CHECK: f2 = __bfloat1622float2(bf162);
f2 = __bfloat1622float2(bf162);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmax2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmax2(bf162a, bf162b);
bf162 = __hmax2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __hmin2(const __nv_bfloat162 a, const __nv_bfloat162 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b);
// CHECK: bf162 = __hmin2(bf162a, bf162b);
bf162 = __hmin2(bf162a, bf162b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat16 __low2bfloat16(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a);
// CHECK: bf16 = __low2bfloat16(bf162a);
bf16 = __low2bfloat16(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __halves2bfloat162(const __nv_bfloat16 a, const __nv_bfloat16 b);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b);
// CHECK: bf162 = __halves2bfloat162(bf16a, bf16b);
bf162 = __halves2bfloat162(bf16a, bf16b);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __low2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __low2bfloat162(bf162a);
bf162 = __low2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ __nv_bfloat162 __high2bfloat162(const __nv_bfloat162 a);
// HIP: __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a);
// CHECK: bf162 = __high2bfloat162(bf162a);
bf162 = __high2bfloat162(bf162a);

// CUDA: __CUDA_HOSTDEVICE_BF16_DECL__ short int __bfloat16_as_short(const __nv_bfloat16 h);
// HIP: __BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h);
// CHECK: shi = __bfloat16_as_short(bf16);
shi = __bfloat16_as_short(bf16);
#endif

#if CUDA_VERSION >= 11080
Expand Down Expand Up @@ -146,10 +189,17 @@ int main() {
hrx = __nv_cvt_fp8_to_halfraw(fp8_storage_t, fp8_interpretation_t);

// CUDA: __CUDA_HOSTDEVICE_FP8_DECL__ __half2_raw __nv_cvt_fp8x2_to_halfraw2(const __nv_fp8x2_storage_t x, const __nv_fp8_interpretation_t fp8_interpretation);
// HIP: __FP8_HOST_DEVICE_STATIC__ __half2_raw __hip_cvt_fp8x2_to_halfraw2(const __hip_fp8x2_storage_t x, const __hip_fp8_interpretation_t type);
// HIP: __FP8_HOST_DEVICE_STATIC__ __half2_raw __hip_cvt_fp8x2_to_halfraw2(const __hip_fp8x2_storage_t x, const __hip_fp8_interpretation_t type);
// CHECK: h2rx = __hip_cvt_fp8x2_to_halfraw2(fp8x2_storage_t, fp8_interpretation_t);
h2rx = __nv_cvt_fp8x2_to_halfraw2(fp8x2_storage_t, fp8_interpretation_t);
#endif

#if CUDA_VERSION >= 12020
// CUDA: __CUDA_HOSTDEVICE_FP16_DECL__ __half2 make_half2(const __half x, const __half y);
// HIP: __HOST_DEVICE__ __half2 make_half2(__half x, __half y);
// CHECK: h2 = make_half2(hx, hy);
h2 = make_half2(hx, hy);
#endif

return 0;
}

0 comments on commit 1f8e03e

Please sign in to comment.