Skip to content

Commit

Permalink
Merge pull request #1748 from emankov/HIPIFY
Browse files Browse the repository at this point in the history
[HIPIFY][SWDEV-493184][6.2.0][device][fix] Added missing support for device intrinsics and built-ins that appeared in HIP `6.2.0`
  • Loading branch information
emankov authored Nov 11, 2024
2 parents 45a91ee + b2265ff commit fbd55d4
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 13 deletions.
16 changes: 11 additions & 5 deletions bin/hipify-perl
Original file line number Diff line number Diff line change
Expand Up @@ -8644,8 +8644,12 @@ sub countSupportedDeviceFunctions {
"__short2half_ru",
"__short2half_rn",
"__short2half_rd",
"__shfl_xor_sync",
"__shfl_xor",
"__shfl_up_sync",
"__shfl_up",
"__shfl_sync",
"__shfl_down_sync",
"__shfl_down",
"__shfl",
"__saturatef",
Expand All @@ -8657,6 +8661,8 @@ sub countSupportedDeviceFunctions {
"__mulhi",
"__mul64hi",
"__mul24",
"__match_any_sync",
"__match_all_sync",
"__lows2half2",
"__lowhigh2highlow",
"__low2half2",
Expand Down Expand Up @@ -8871,11 +8877,15 @@ sub countSupportedDeviceFunctions {
"__byte_perm",
"__brevll",
"__brev",
"__ballot_sync",
"__ballot",
"__assertfail",
"__assert_fail",
"__any_sync",
"__any",
"__all"
"__all_sync",
"__all",
"__activemask"
)
{
# match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...));
Expand Down Expand Up @@ -9022,10 +9032,6 @@ sub warnUnsupportedDeviceFunctions {
"__short2bfloat16_ru",
"__short2bfloat16_rn",
"__short2bfloat16_rd",
"__shfl_xor_sync",
"__shfl_up_sync",
"__shfl_sync",
"__shfl_down_sync",
"__prof_trigger",
"__pm3",
"__pm2",
Expand Down
14 changes: 10 additions & 4 deletions docs/tables/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
|**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|
|:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|
|`_Pow_int`| | | | | | | | | | |
|`__activemask`|9.0| | | |`__activemask`|6.2.0| | | | |
|`__all`| | | | |`__all`|1.6.0| | | | |
|`__all_sync`|9.0| | | |`__all_sync`|6.2.0| | | | |
|`__any`| | | | |`__any`|1.6.0| | | | |
|`__any_sync`|9.0| | | |`__any_sync`|6.2.0| | | | |
|`__assert_fail`| | | | |`__assert_fail`|1.9.0| | | | |
|`__assertfail`| | | | |`__assertfail`|1.9.0| | | | |
|`__ballot`| | | | |`__ballot`|1.6.0| | | | |
|`__ballot_sync`|9.0| | | |`__ballot_sync`|6.2.0| | | | |
|`__bfloat1622float2`|11.0| | | | | | | | | |
|`__bfloat162bfloat162`|11.0| | | | | | | | | |
|`__bfloat162char_rz`|12.2| | | | | | | | | |
Expand Down Expand Up @@ -361,6 +365,8 @@
|`__lowhigh2highlow`| | | | |`__lowhigh2highlow`|1.6.0| | | | |
|`__lows2bfloat162`|11.0| | | | | | | | | |
|`__lows2half2`| | | | |`__lows2half2`|1.6.0| | | | |
|`__match_all_sync`|9.0| | | |`__match_all_sync`|6.2.0| | | | |
|`__match_any_sync`|9.0| | | |`__match_any_sync`|6.2.0| | | | |
|`__mul24`| | | | |`__mul24`|1.6.0| | | | |
|`__mul64hi`| | | | |`__mul64hi`|1.6.0| | | | |
|`__mulhi`| | | | |`__mulhi`|1.6.0| | | | |
Expand All @@ -387,12 +393,12 @@
|`__saturatef`| | | | |`__saturatef`|1.6.0| | | | |
|`__shfl`|7.5|9.0| | |`__shfl`|1.6.0| | | | |
|`__shfl_down`|7.5|9.0| | |`__shfl_down`|1.6.0| | | | |
|`__shfl_down_sync`| | | | | | | | | | |
|`__shfl_sync`| | | | | | | | | | |
|`__shfl_down_sync`|9.0| | | |`__shfl_down_sync`|6.2.0| | | | |
|`__shfl_sync`|9.0| | | |`__shfl_sync`|6.2.0| | | | |
|`__shfl_up`|7.5|9.0| | |`__shfl_up`|1.6.0| | | | |
|`__shfl_up_sync`| | | | | | | | | | |
|`__shfl_up_sync`|9.0| | | |`__shfl_up_sync`|6.2.0| | | | |
|`__shfl_xor`|7.5|9.0| | |`__shfl_xor`|1.6.0| | | | |
|`__shfl_xor_sync`| | | | | | | | | | |
|`__shfl_xor_sync`|9.0| | | |`__shfl_xor_sync`|6.2.0| | | | |
|`__short2bfloat16_rd`|11.0| | | | | | | | | |
|`__short2bfloat16_rn`|11.0| | | | | | | | | |
|`__short2bfloat16_ru`|11.0| | | | | | | | | |
Expand Down
36 changes: 32 additions & 4 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,13 +672,13 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__shfl_sync", {"__shfl_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
{"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__shfl_up_sync", {"__shfl_up_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
{"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__shfl_down_sync", {"__shfl_down_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
{"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__shfl_xor_sync", {"__shfl_xor_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
{"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
Expand Down Expand Up @@ -829,6 +829,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__nv_cvt_bfloat16raw2_to_fp8x2", {"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// intrinsics
{"__all_sync", {"__all_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__any_sync", {"__any_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__ballot_sync", {"__ballot_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__activemask", {"__activemask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
// built-ins
{"__match_any_sync", {"__match_any_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"__match_all_sync", {"__match_all_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
};

const std::map<llvm::StringRef, cudaAPIversions> CUDA_DEVICE_FUNCTION_VER_MAP {
Expand Down Expand Up @@ -959,6 +967,16 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_DEVICE_FUNCTION_VER_MAP {
{"make_half2", {CUDA_122, CUDA_0, CUDA_0 }},
{"__half2char_rz", {CUDA_122, CUDA_0, CUDA_0 }},
{"__half2uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }},
{"__all_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__any_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__ballot_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__activemask", {CUDA_90, CUDA_0, CUDA_0 }},
{"__match_any_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__match_all_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__shfl_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__shfl_up_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__shfl_down_sync", {CUDA_90, CUDA_0, CUDA_0 }},
{"__shfl_xor_sync", {CUDA_90, CUDA_0, CUDA_0 }},
};

const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
Expand Down Expand Up @@ -1470,6 +1488,16 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
{"__hmax_nan", {HIP_5050, HIP_0, HIP_0 }},
{"__hmin", {HIP_5050, HIP_0, HIP_0 }},
{"__hmin_nan", {HIP_5050, HIP_0, HIP_0 }},
{"__all_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__any_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__ballot_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__activemask", {HIP_6020, HIP_0, HIP_0 }},
{"__match_any_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__match_all_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__shfl_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__shfl_up_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__shfl_down_sync", {HIP_6020, HIP_0, HIP_0 }},
{"__shfl_xor_sync", {HIP_6020, HIP_0, HIP_0 }},
};

const std::map<unsigned int, llvm::StringRef> CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
Expand Down

0 comments on commit fbd55d4

Please sign in to comment.