From 032e2c24ac21ae67f3dbcdd4ba87cfc85e7ac28c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 10 Dec 2024 17:16:55 +0100 Subject: [PATCH] [HIPIFY][SWDEV-501192][RT][6.4.0] Added experimental support for `hipStreamBatchMemOp` API + Introduced `ROCm 6.4.0` in `HIPIFY` tools + Updated synthetic tests, the regenerated `hipify-perl`, and `Driver` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 13 +++++++++++-- .../CUDA_Driver_API_functions_supported_by_HIP.md | 10 +++++----- src/CUDA2HIP_Driver_API_functions.cpp | 5 +++-- src/CUDA2HIP_Driver_API_types.cpp | 8 +++++--- src/Statistics.cpp | 1 + src/Statistics.h | 3 ++- tests/unit_tests/synthetic/driver_functions.cu | 14 ++++++++++++++ tests/unit_tests/synthetic/driver_typedefs.cu | 3 +++ tests/unit_tests/synthetic/driver_unions.cu | 7 +++++++ .../synthetic/runtime_functions_12000.cu | 2 +- 10 files changed, 52 insertions(+), 14 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index c6e048a9..1d65632a 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1398,7 +1398,11 @@ my %removed_funcs = ( ); my %experimental_funcs = ( - + "cuStreamBatchMemOp_v2" => "6.4.0", + "cuStreamBatchMemOp" => "6.4.0", + "CUstreamBatchMemOpParams_v1" => "6.4.0", + "CUstreamBatchMemOpParams_union" => "6.4.0", + "CUstreamBatchMemOpParams" => "6.4.0" ); $print_stats = 1 if $examine; @@ -1536,6 +1540,11 @@ sub subst { } sub experimentalSubstitutions { + subst("cuStreamBatchMemOp", "hipStreamBatchMemOp", "stream_memory"); + subst("cuStreamBatchMemOp_v2", "hipStreamBatchMemOp", "stream_memory"); + subst("CUstreamBatchMemOpParams", "hipStreamBatchMemOpParams", "type"); + subst("CUstreamBatchMemOpParams_union", "hipStreamBatchMemOpParams_union", "type"); + subst("CUstreamBatchMemOpParams_v1", "hipStreamBatchMemOpParams", "type"); } sub rocSubstitutions { @@ -14190,7 +14199,7 @@ if ($help) { print STDERR "$USAGE\n"; } if ($version) { - print STDERR "HIP version 6.3.0\n"; + print STDERR "HIP version 6.4.0\n"; } while (@ARGV) { $fileName=shift (@ARGV); diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index 2df6936e..4107fb85 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1375,9 +1375,9 @@ |`CUstreamAttrValue`|11.0| | | | | | | | | | |`CUstreamAttrValue_union`|11.0| | | | | | | | | | |`CUstreamAttrValue_v1`|11.3| | | | | | | | | | -|`CUstreamBatchMemOpParams`|8.0| | | | | | | | | | -|`CUstreamBatchMemOpParams_union`|8.0| | | | | | | | | | -|`CUstreamBatchMemOpParams_v1`|11.3| | | | | | | | | | +|`CUstreamBatchMemOpParams`|8.0| | | |`hipStreamBatchMemOpParams`|6.4.0| | | |6.4.0| +|`CUstreamBatchMemOpParams_union`|8.0| | | |`hipStreamBatchMemOpParams_union`|6.4.0| | | |6.4.0| +|`CUstreamBatchMemOpParams_v1`|11.3| | | |`hipStreamBatchMemOpParams`|6.4.0| | | |6.4.0| |`CUstreamBatchMemOpType`|8.0| | | | | | | | | | |`CUstreamBatchMemOpType_enum`|8.0| | | | | | | | | | |`CUstreamCallback`| | | | |`hipStreamCallback_t`|1.6.0| | | | | @@ -1837,8 +1837,8 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| -|`cuStreamBatchMemOp`|8.0| | | | | | | | | | -|`cuStreamBatchMemOp_v2`|11.7| | | | | | | | | | +|`cuStreamBatchMemOp`|8.0| | | |`hipStreamBatchMemOp`|6.4.0| | | |6.4.0| +|`cuStreamBatchMemOp_v2`|11.7| | | |`hipStreamBatchMemOp`|6.4.0| | | |6.4.0| |`cuStreamWaitValue32`|8.0| | | |`hipStreamWaitValue32`|4.2.0| | | | | |`cuStreamWaitValue32_v2`|11.7| | | |`hipStreamWaitValue32`|4.2.0| | | | | |`cuStreamWaitValue64`|9.0| | | |`hipStreamWaitValue64`|4.2.0| | | | | diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 56a7326a..b482e20e 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -556,8 +556,8 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 21. Stream Memory Operations // no analogues - {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", "", CONV_STREAM_MEMORY, API_DRIVER, SEC::STREAM_MEMORY, HIP_UNSUPPORTED}}, - {"cuStreamBatchMemOp_v2", {"hipStreamBatchMemOp", "", CONV_STREAM_MEMORY, API_DRIVER, SEC::STREAM_MEMORY, HIP_UNSUPPORTED}}, + {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", "", CONV_STREAM_MEMORY, API_DRIVER, SEC::STREAM_MEMORY, HIP_EXPERIMENTAL}}, + {"cuStreamBatchMemOp_v2", {"hipStreamBatchMemOp", "", CONV_STREAM_MEMORY, API_DRIVER, SEC::STREAM_MEMORY, HIP_EXPERIMENTAL}}, // CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags); // hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags, uint32_t mask __dparm(0xFFFFFFFF)); {"cuStreamWaitValue32", {"hipStreamWaitValue32", "", CONV_STREAM_MEMORY, API_DRIVER, SEC::STREAM_MEMORY}}, @@ -1665,6 +1665,7 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipDrvGraphMemcpyNodeSetParams", {HIP_6030, HIP_0, HIP_0, }}, {"hipDrvGraphExecMemcpyNodeSetParams", {HIP_6030, HIP_0, HIP_0, }}, {"hipDrvGraphExecMemsetNodeSetParams", {HIP_6030, HIP_0, HIP_0, }}, + {"hipStreamBatchMemOp", {HIP_6040, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_DRIVER_FUNCTION_CHANGED_VER_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index ac0f0dde..7277c671 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -475,9 +475,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // 2. Unions - {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - {"CUstreamBatchMemOpParams_v1", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, + {"CUstreamBatchMemOpParams_v1", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, + {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams_union", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, // cudaKernelNodeAttrValue // NOTE: Starting from CUDA 11.8 CUlaunchAttributeValue is used instead of CUkernelNodeAttrValue: @@ -4304,4 +4304,6 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { {"HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT", {HIP_6020, HIP_0, HIP_0, }}, {"HIP_MEMSET_NODE_PARAMS", {HIP_6010, HIP_0, HIP_0 }}, {"hipStreamLegacy", {HIP_6020, HIP_0, HIP_0, }}, + {"hipStreamBatchMemOpParams_union", {HIP_6040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipStreamBatchMemOpParams", {HIP_6040, HIP_0, HIP_0, HIP_LATEST}}, }; diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 20c5f4c2..f94a7ca2 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -641,6 +641,7 @@ std::string Statistics::getHipVersion(const hipVersions &ver) { case HIP_6011: return "6.1.1"; case HIP_6020: return "6.2.0"; case HIP_6030: return "6.3.0"; + case HIP_6040: return "6.4.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index 02f6bda7..095e21cf 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -391,7 +391,8 @@ enum hipVersions { HIP_6011 = 6011, HIP_6020 = 6020, HIP_6030 = 6030, - HIP_LATEST = HIP_6030, + HIP_6040 = 6040, + HIP_LATEST = HIP_6040, }; struct cudaAPIversions { diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 94c50d47..4fb6ea60 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -23,6 +23,7 @@ int main() { unsigned int flags = 0; unsigned int flags_2 = 0; + unsigned int icount = 0; uint64_t flags_64 = 0; int dim = 0; int count = 0; @@ -1081,6 +1082,14 @@ int main() { // HIP: hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice); // CHECK: result = hipDeviceGetP2PAttribute(value, deviceP2PAttribute, iBlockSize, iBlockSize_2); result = cuDeviceGetP2PAttribute(value, deviceP2PAttribute, iBlockSize, iBlockSize_2); + + // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams; + CUstreamBatchMemOpParams streamBatchMemOpParams; + + // CUDA: CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags); + // HIP: hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, hipStreamBatchMemOpParams* paramArray, unsigned int flags); + // CHECK: result = hipStreamBatchMemOp(stream, icount, &streamBatchMemOpParams, flags); + result = cuStreamBatchMemOp(stream, icount, &streamBatchMemOpParams, flags); #endif #if CUDA_VERSION >= 9000 @@ -1863,6 +1872,11 @@ int main() { // HIP: hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags, uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)); // CHECK: result = hipStreamWriteValue64(stream, deviceptr, u_value, flags); result = cuStreamWriteValue64_v2(stream, deviceptr, u_value, flags); + + // CUDA: CUresult CUDAAPI cuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags); + // HIP: hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, hipStreamBatchMemOpParams* paramArray, unsigned int flags); + // CHECK: result = hipStreamBatchMemOp(stream, icount, &streamBatchMemOpParams, flags); + result = cuStreamBatchMemOp_v2(stream, icount, &streamBatchMemOpParams, flags); #endif #if CUDA_VERSION >= 12000 diff --git a/tests/unit_tests/synthetic/driver_typedefs.cu b/tests/unit_tests/synthetic/driver_typedefs.cu index 69aa0975..f2496620 100644 --- a/tests/unit_tests/synthetic/driver_typedefs.cu +++ b/tests/unit_tests/synthetic/driver_typedefs.cu @@ -60,6 +60,9 @@ int main() { // CHECK: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS_v1; CUDA_MEMSET_NODE_PARAMS_v1 MEMSET_NODE_PARAMS_v1; + + // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams_v1; + CUstreamBatchMemOpParams_v1 streamBatchMemOpParams_v1; #endif return 0; diff --git a/tests/unit_tests/synthetic/driver_unions.cu b/tests/unit_tests/synthetic/driver_unions.cu index 894939a5..116f67b3 100644 --- a/tests/unit_tests/synthetic/driver_unions.cu +++ b/tests/unit_tests/synthetic/driver_unions.cu @@ -7,6 +7,13 @@ int main() { printf("10. CUDA Driver API Unions synthetic test\n"); +#if CUDA_VERSION >= 8000 + // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams; + // CHECK-NEXT: hipStreamBatchMemOpParams_union streamBatchMemOpParams_union; + CUstreamBatchMemOpParams streamBatchMemOpParams; + CUstreamBatchMemOpParams_union streamBatchMemOpParams_union; +#endif + #if CUDA_VERSION >= 11000 // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; CUkernelNodeAttrValue kernelNodeAttrValue; diff --git a/tests/unit_tests/synthetic/runtime_functions_12000.cu b/tests/unit_tests/synthetic/runtime_functions_12000.cu index d2e646ae..9d9f6e27 100644 --- a/tests/unit_tests/synthetic/runtime_functions_12000.cu +++ b/tests/unit_tests/synthetic/runtime_functions_12000.cu @@ -46,7 +46,7 @@ int main() { // TODO: detect cudaGetDriverEntryPoint signature and report warning/error for old (before CUDA 12.0) signature // HIP: hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, hipDriverProcAddressQueryResult* symbolStatus); // TODO: add an explicit static_cast for ull - // CHECK: result = hipGetProcAddress(symbol.c_str(), &pfn, 603, ull, &driverProcAddressQueryResult); + // CHECK: result = hipGetProcAddress(symbol.c_str(), &pfn, 604, ull, &driverProcAddressQueryResult); result = cudaGetDriverEntryPoint(symbol.c_str(), &pfn, ull, &driverProcAddressQueryResult); #endif