From efd880fa9573e618be1906933b8bb46d1f40cf9f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 3 Feb 2024 13:40:11 +0000 Subject: [PATCH] [HIPIFY][BLAS][6.1][sync] Sync with `hipBLAS` and `rocBLAS` - Step 8 - NRM2 64bit + Updated `BLAS` synthetic tests, the regenerated hipify-perl, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 32 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 24 +++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 ++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 ++++++++++++++++ 7 files changed, 112 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 5e4ceed1..4f9b5e66 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1909,7 +1909,9 @@ sub rocSubstitutions { subst("cublasDger", "rocblas_dger", "library"); subst("cublasDger_v2", "rocblas_dger", "library"); subst("cublasDnrm2", "rocblas_dnrm2", "library"); + subst("cublasDnrm2_64", "rocblas_dnrm2_64", "library"); subst("cublasDnrm2_v2", "rocblas_dnrm2", "library"); + subst("cublasDnrm2_v2_64", "rocblas_dnrm2_64", "library"); subst("cublasDotEx", "rocblas_dot_ex", "library"); subst("cublasDotcEx", "rocblas_dotc_ex", "library"); subst("cublasDrot", "rocblas_drot", "library"); @@ -1967,7 +1969,9 @@ sub rocSubstitutions { subst("cublasDzasum_v2", "rocblas_dzasum", "library"); subst("cublasDzasum_v2_64", "rocblas_dzasum_64", "library"); subst("cublasDznrm2", "rocblas_dznrm2", "library"); + subst("cublasDznrm2_64", "rocblas_dznrm2_64", "library"); subst("cublasDznrm2_v2", "rocblas_dznrm2", "library"); + subst("cublasDznrm2_v2_64", "rocblas_dznrm2_64", "library"); subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library"); subst("cublasGemmEx", "rocblas_gemm_ex", "library"); subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library"); @@ -2038,7 +2042,9 @@ sub rocSubstitutions { subst("cublasScasum_v2", "rocblas_scasum", "library"); subst("cublasScasum_v2_64", "rocblas_scasum_64", "library"); subst("cublasScnrm2", "rocblas_scnrm2", "library"); + subst("cublasScnrm2_64", "rocblas_scnrm2_64", "library"); subst("cublasScnrm2_v2", "rocblas_scnrm2", "library"); + subst("cublasScnrm2_v2_64", "rocblas_scnrm2_64", "library"); subst("cublasScopy", "rocblas_scopy", "library"); subst("cublasScopy_64", "rocblas_scopy_64", "library"); subst("cublasScopy_v2", "rocblas_scopy", "library"); @@ -2070,7 +2076,9 @@ sub rocSubstitutions { subst("cublasSger", "rocblas_sger", "library"); subst("cublasSger_v2", "rocblas_sger", "library"); subst("cublasSnrm2", "rocblas_snrm2", "library"); + subst("cublasSnrm2_64", "rocblas_snrm2_64", "library"); subst("cublasSnrm2_v2", "rocblas_snrm2", "library"); + subst("cublasSnrm2_v2_64", "rocblas_snrm2_64", "library"); subst("cublasSrot", "rocblas_srot", "library"); subst("cublasSrot_v2", "rocblas_srot", "library"); subst("cublasSrotg", "rocblas_srotg", "library"); @@ -3889,7 +3897,9 @@ sub simpleSubstitutions { subst("cublasDgetriBatched", "hipblasDgetriBatched", "library"); subst("cublasDgetrsBatched", "hipblasDgetrsBatched", "library"); subst("cublasDnrm2", "hipblasDnrm2", "library"); + subst("cublasDnrm2_64", "hipblasDnrm2_64", "library"); subst("cublasDnrm2_v2", "hipblasDnrm2", "library"); + subst("cublasDnrm2_v2_64", "hipblasDnrm2_64", "library"); subst("cublasDotEx", "hipblasDotEx_v2", "library"); subst("cublasDotcEx", "hipblasDotcEx_v2", "library"); subst("cublasDrot", "hipblasDrot", "library"); @@ -3947,7 +3957,9 @@ sub simpleSubstitutions { subst("cublasDzasum_v2", "hipblasDzasum_v2", "library"); subst("cublasDzasum_v2_64", "hipblasDzasum_v2_64", "library"); subst("cublasDznrm2", "hipblasDznrm2_v2", "library"); + subst("cublasDznrm2_64", "hipblasDznrm2_v2_64", "library"); subst("cublasDznrm2_v2", "hipblasDznrm2_v2", "library"); + subst("cublasDznrm2_v2_64", "hipblasDznrm2_v2_64", "library"); subst("cublasGemmBatchedEx", "hipblasGemmBatchedEx_v2", "library"); subst("cublasGemmEx", "hipblasGemmEx_v2", "library"); subst("cublasGemmStridedBatchedEx", "hipblasGemmStridedBatchedEx_v2", "library"); @@ -4012,7 +4024,9 @@ sub simpleSubstitutions { subst("cublasScasum_v2", "hipblasScasum_v2", "library"); subst("cublasScasum_v2_64", "hipblasScasum_v2_64", "library"); subst("cublasScnrm2", "hipblasScnrm2_v2", "library"); + subst("cublasScnrm2_64", "hipblasScnrm2_v2_64", "library"); subst("cublasScnrm2_v2", "hipblasScnrm2_v2", "library"); + subst("cublasScnrm2_v2_64", "hipblasScnrm2_v2_64", "library"); subst("cublasScopy", "hipblasScopy", "library"); subst("cublasScopy_64", "hipblasScopy_64", "library"); subst("cublasScopy_v2", "hipblasScopy", "library"); @@ -4049,7 +4063,9 @@ sub simpleSubstitutions { subst("cublasSgetriBatched", "hipblasSgetriBatched", "library"); subst("cublasSgetrsBatched", "hipblasSgetrsBatched", "library"); subst("cublasSnrm2", "hipblasSnrm2", "library"); + subst("cublasSnrm2_64", "hipblasSnrm2_64", "library"); subst("cublasSnrm2_v2", "hipblasSnrm2", "library"); + subst("cublasSnrm2_v2_64", "hipblasSnrm2_64", "library"); subst("cublasSrot", "hipblasSrot", "library"); subst("cublasSrot_v2", "hipblasSrot", "library"); subst("cublasSrotg", "hipblasSrotg", "library"); @@ -10896,8 +10912,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSrotm_64", "cublasSrot_v2_64", "cublasSrot_64", - "cublasSnrm2_v2_64", - "cublasSnrm2_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSger_v2_64", @@ -10925,8 +10939,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSdgmm_64", - "cublasScnrm2_v2_64", - "cublasScnrm2_64", "cublasScalEx_64", "cublasRotmgEx", "cublasRotmEx_64", @@ -10970,8 +10982,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGemmEx_64", "cublasGemmBatchedEx_64", "cublasFree", - "cublasDznrm2_v2_64", - "cublasDznrm2_64", "cublasDtrttp", "cublasDtrsv_v2_64", "cublasDtrsv_64", @@ -11022,8 +11032,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDrot_64", "cublasDotcEx_64", "cublasDotEx_64", - "cublasDnrm2_v2_64", - "cublasDnrm2_64", "cublasDmatinvBatched", "cublasDger_v2_64", "cublasDger_64", @@ -11361,8 +11369,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSrotm_64", "cublasSrot_v2_64", "cublasSrot_64", - "cublasSnrm2_v2_64", - "cublasSnrm2_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -11395,8 +11401,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSdgmm_64", - "cublasScnrm2_v2_64", - "cublasScnrm2_64", "cublasScalEx_64", "cublasRotmgEx", "cublasRotmEx_64", @@ -11434,8 +11438,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmEx_64", "cublasGemmBatchedEx_64", "cublasFree", - "cublasDznrm2_v2_64", - "cublasDznrm2_64", "cublasDtrttp", "cublasDtrsv_v2_64", "cublasDtrsv_64", @@ -11486,8 +11488,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDrot_64", "cublasDotcEx_64", "cublasDotEx_64", - "cublasDnrm2_v2_64", - "cublasDnrm2_64", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 77014c9b..885eb053 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -258,9 +258,9 @@ |`cublasDdot_v2`| | | | |`hipblasDdot`|3.0.0| | | | | |`cublasDdot_v2_64`|12.0| | | |`hipblasDdot_64`|6.1.0| | | | | |`cublasDnrm2`| | | | |`hipblasDnrm2`|1.8.2| | | | | -|`cublasDnrm2_64`|12.0| | | | | | | | | | +|`cublasDnrm2_64`|12.0| | | |`hipblasDnrm2_64`|6.1.0| | | | | |`cublasDnrm2_v2`| | | | |`hipblasDnrm2`|1.8.2| | | | | -|`cublasDnrm2_v2_64`|12.0| | | | | | | | | | +|`cublasDnrm2_v2_64`|12.0| | | |`hipblasDnrm2_64`|6.1.0| | | | | |`cublasDrot`| | | | |`hipblasDrot`|3.0.0| | | | | |`cublasDrot_64`|12.0| | | | | | | | | | |`cublasDrot_v2`| | | | |`hipblasDrot`|3.0.0| | | | | @@ -286,9 +286,9 @@ |`cublasDzasum_v2`| | | | |`hipblasDzasum_v2`|6.0.0| | | | | |`cublasDzasum_v2_64`|12.0| | | |`hipblasDzasum_v2_64`|6.1.0| | | | | |`cublasDznrm2`| | | | |`hipblasDznrm2_v2`|6.0.0| | | | | -|`cublasDznrm2_64`|12.0| | | | | | | | | | +|`cublasDznrm2_64`|12.0| | | |`hipblasDznrm2_v2_64`|6.1.0| | | | | |`cublasDznrm2_v2`| | | | |`hipblasDznrm2_v2`|6.0.0| | | | | -|`cublasDznrm2_v2_64`|12.0| | | | | | | | | | +|`cublasDznrm2_v2_64`|12.0| | | |`hipblasDznrm2_v2_64`|6.1.0| | | | | |`cublasIcamax`| | | | |`hipblasIcamax_v2`|6.0.0| | | | | |`cublasIcamax_64`|12.0| | | |`hipblasIcamax_v2_64`|6.1.0| | | | | |`cublasIcamax_v2`| | | | |`hipblasIcamax_v2`|6.0.0| | | | | @@ -336,9 +336,9 @@ |`cublasScasum_v2`| | | | |`hipblasScasum_v2`|6.0.0| | | | | |`cublasScasum_v2_64`|12.0| | | |`hipblasScasum_v2_64`|6.1.0| | | | | |`cublasScnrm2`| | | | |`hipblasScnrm2_v2`|6.0.0| | | | | -|`cublasScnrm2_64`|12.0| | | | | | | | | | +|`cublasScnrm2_64`|12.0| | | |`hipblasScnrm2_v2_64`|6.1.0| | | | | |`cublasScnrm2_v2`| | | | |`hipblasScnrm2_v2`|6.0.0| | | | | -|`cublasScnrm2_v2_64`|12.0| | | | | | | | | | +|`cublasScnrm2_v2_64`|12.0| | | |`hipblasScnrm2_v2_64`|6.1.0| | | | | |`cublasScopy`| | | | |`hipblasScopy`|1.8.2| | | | | |`cublasScopy_64`|12.0| | | |`hipblasScopy_64`|6.1.0| | | | | |`cublasScopy_v2`| | | | |`hipblasScopy`|1.8.2| | | | | @@ -348,9 +348,9 @@ |`cublasSdot_v2`| | | | |`hipblasSdot`|3.0.0| | | | | |`cublasSdot_v2_64`|12.0| | | |`hipblasSdot_64`|6.1.0| | | | | |`cublasSnrm2`| | | | |`hipblasSnrm2`|1.8.2| | | | | -|`cublasSnrm2_64`|12.0| | | | | | | | | | +|`cublasSnrm2_64`|12.0| | | |`hipblasSnrm2_64`|6.1.0| | | | | |`cublasSnrm2_v2`| | | | |`hipblasSnrm2`|1.8.2| | | | | -|`cublasSnrm2_v2_64`|12.0| | | | | | | | | | +|`cublasSnrm2_v2_64`|12.0| | | |`hipblasSnrm2_64`|6.1.0| | | | | |`cublasSrot`| | | | |`hipblasSrot`|3.0.0| | | | | |`cublasSrot_64`|12.0| | | | | | | | | | |`cublasSrot_v2`| | | | |`hipblasSrot`|3.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index c7632dd8..d4783aaf 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -258,9 +258,9 @@ |`cublasDdot_v2`| | | | |`hipblasDdot`|3.0.0| | | | |`rocblas_ddot`|1.5.0| | | | | |`cublasDdot_v2_64`|12.0| | | |`hipblasDdot_64`|6.1.0| | | | |`rocblas_ddot_64`|6.1.0| | | | | |`cublasDnrm2`| | | | |`hipblasDnrm2`|1.8.2| | | | |`rocblas_dnrm2`|1.5.0| | | | | -|`cublasDnrm2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDnrm2_64`|12.0| | | |`hipblasDnrm2_64`|6.1.0| | | | |`rocblas_dnrm2_64`|6.1.0| | | | | |`cublasDnrm2_v2`| | | | |`hipblasDnrm2`|1.8.2| | | | |`rocblas_dnrm2`|1.5.0| | | | | -|`cublasDnrm2_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDnrm2_v2_64`|12.0| | | |`hipblasDnrm2_64`|6.1.0| | | | |`rocblas_dnrm2_64`|6.1.0| | | | | |`cublasDrot`| | | | |`hipblasDrot`|3.0.0| | | | |`rocblas_drot`|3.5.0| | | | | |`cublasDrot_64`|12.0| | | | | | | | | | | | | | | | |`cublasDrot_v2`| | | | |`hipblasDrot`|3.0.0| | | | |`rocblas_drot`|3.5.0| | | | | @@ -286,9 +286,9 @@ |`cublasDzasum_v2`| | | | |`hipblasDzasum_v2`|6.0.0| | | | |`rocblas_dzasum`|1.5.0| | | | | |`cublasDzasum_v2_64`|12.0| | | |`hipblasDzasum_v2_64`|6.1.0| | | | |`rocblas_dzasum_64`|6.1.0| | | | | |`cublasDznrm2`| | | | |`hipblasDznrm2_v2`|6.0.0| | | | |`rocblas_dznrm2`|1.5.0| | | | | -|`cublasDznrm2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDznrm2_64`|12.0| | | |`hipblasDznrm2_v2_64`|6.1.0| | | | |`rocblas_dznrm2_64`|6.1.0| | | | | |`cublasDznrm2_v2`| | | | |`hipblasDznrm2_v2`|6.0.0| | | | |`rocblas_dznrm2`|1.5.0| | | | | -|`cublasDznrm2_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDznrm2_v2_64`|12.0| | | |`hipblasDznrm2_v2_64`|6.1.0| | | | |`rocblas_dznrm2_64`|6.1.0| | | | | |`cublasIcamax`| | | | |`hipblasIcamax_v2`|6.0.0| | | | |`rocblas_icamax`|3.5.0| | | | | |`cublasIcamax_64`|12.0| | | |`hipblasIcamax_v2_64`|6.1.0| | | | |`rocblas_icamax_64`|6.1.0| | | | | |`cublasIcamax_v2`| | | | |`hipblasIcamax_v2`|6.0.0| | | | |`rocblas_icamax`|3.5.0| | | | | @@ -336,9 +336,9 @@ |`cublasScasum_v2`| | | | |`hipblasScasum_v2`|6.0.0| | | | |`rocblas_scasum`|1.5.0| | | | | |`cublasScasum_v2_64`|12.0| | | |`hipblasScasum_v2_64`|6.1.0| | | | |`rocblas_scasum_64`|6.1.0| | | | | |`cublasScnrm2`| | | | |`hipblasScnrm2_v2`|6.0.0| | | | |`rocblas_scnrm2`|1.5.0| | | | | -|`cublasScnrm2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasScnrm2_64`|12.0| | | |`hipblasScnrm2_v2_64`|6.1.0| | | | |`rocblas_scnrm2_64`|6.1.0| | | | | |`cublasScnrm2_v2`| | | | |`hipblasScnrm2_v2`|6.0.0| | | | |`rocblas_scnrm2`|1.5.0| | | | | -|`cublasScnrm2_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasScnrm2_v2_64`|12.0| | | |`hipblasScnrm2_v2_64`|6.1.0| | | | |`rocblas_scnrm2_64`|6.1.0| | | | | |`cublasScopy`| | | | |`hipblasScopy`|1.8.2| | | | |`rocblas_scopy`|1.5.0| | | | | |`cublasScopy_64`|12.0| | | |`hipblasScopy_64`|6.1.0| | | | |`rocblas_scopy_64`|6.1.0| | | | | |`cublasScopy_v2`| | | | |`hipblasScopy`|1.8.2| | | | |`rocblas_scopy`|1.5.0| | | | | @@ -348,9 +348,9 @@ |`cublasSdot_v2`| | | | |`hipblasSdot`|3.0.0| | | | |`rocblas_sdot`|1.5.0| | | | | |`cublasSdot_v2_64`|12.0| | | |`hipblasSdot_64`|6.1.0| | | | |`rocblas_sdot_64`|6.1.0| | | | | |`cublasSnrm2`| | | | |`hipblasSnrm2`|1.8.2| | | | |`rocblas_snrm2`|1.5.0| | | | | -|`cublasSnrm2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSnrm2_64`|12.0| | | |`hipblasSnrm2_64`|6.1.0| | | | |`rocblas_snrm2_64`|6.1.0| | | | | |`cublasSnrm2_v2`| | | | |`hipblasSnrm2`|1.8.2| | | | |`rocblas_snrm2`|1.5.0| | | | | -|`cublasSnrm2_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSnrm2_v2_64`|12.0| | | |`hipblasSnrm2_64`|6.1.0| | | | |`rocblas_snrm2_64`|6.1.0| | | | | |`cublasSrot`| | | | |`hipblasSrot`|3.0.0| | | | |`rocblas_srot`|3.5.0| | | | | |`cublasSrot_64`|12.0| | | | | | | | | | | | | | | | |`cublasSrot_v2`| | | | |`hipblasSrot`|3.0.0| | | | |`rocblas_srot`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 0e388c5f..095662ea 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -258,9 +258,9 @@ |`cublasDdot_v2`| | | | |`rocblas_ddot`|1.5.0| | | | | |`cublasDdot_v2_64`|12.0| | | |`rocblas_ddot_64`|6.1.0| | | | | |`cublasDnrm2`| | | | |`rocblas_dnrm2`|1.5.0| | | | | -|`cublasDnrm2_64`|12.0| | | | | | | | | | +|`cublasDnrm2_64`|12.0| | | |`rocblas_dnrm2_64`|6.1.0| | | | | |`cublasDnrm2_v2`| | | | |`rocblas_dnrm2`|1.5.0| | | | | -|`cublasDnrm2_v2_64`|12.0| | | | | | | | | | +|`cublasDnrm2_v2_64`|12.0| | | |`rocblas_dnrm2_64`|6.1.0| | | | | |`cublasDrot`| | | | |`rocblas_drot`|3.5.0| | | | | |`cublasDrot_64`|12.0| | | | | | | | | | |`cublasDrot_v2`| | | | |`rocblas_drot`|3.5.0| | | | | @@ -286,9 +286,9 @@ |`cublasDzasum_v2`| | | | |`rocblas_dzasum`|1.5.0| | | | | |`cublasDzasum_v2_64`|12.0| | | |`rocblas_dzasum_64`|6.1.0| | | | | |`cublasDznrm2`| | | | |`rocblas_dznrm2`|1.5.0| | | | | -|`cublasDznrm2_64`|12.0| | | | | | | | | | +|`cublasDznrm2_64`|12.0| | | |`rocblas_dznrm2_64`|6.1.0| | | | | |`cublasDznrm2_v2`| | | | |`rocblas_dznrm2`|1.5.0| | | | | -|`cublasDznrm2_v2_64`|12.0| | | | | | | | | | +|`cublasDznrm2_v2_64`|12.0| | | |`rocblas_dznrm2_64`|6.1.0| | | | | |`cublasIcamax`| | | | |`rocblas_icamax`|3.5.0| | | | | |`cublasIcamax_64`|12.0| | | |`rocblas_icamax_64`|6.1.0| | | | | |`cublasIcamax_v2`| | | | |`rocblas_icamax`|3.5.0| | | | | @@ -336,9 +336,9 @@ |`cublasScasum_v2`| | | | |`rocblas_scasum`|1.5.0| | | | | |`cublasScasum_v2_64`|12.0| | | |`rocblas_scasum_64`|6.1.0| | | | | |`cublasScnrm2`| | | | |`rocblas_scnrm2`|1.5.0| | | | | -|`cublasScnrm2_64`|12.0| | | | | | | | | | +|`cublasScnrm2_64`|12.0| | | |`rocblas_scnrm2_64`|6.1.0| | | | | |`cublasScnrm2_v2`| | | | |`rocblas_scnrm2`|1.5.0| | | | | -|`cublasScnrm2_v2_64`|12.0| | | | | | | | | | +|`cublasScnrm2_v2_64`|12.0| | | |`rocblas_scnrm2_64`|6.1.0| | | | | |`cublasScopy`| | | | |`rocblas_scopy`|1.5.0| | | | | |`cublasScopy_64`|12.0| | | |`rocblas_scopy_64`|6.1.0| | | | | |`cublasScopy_v2`| | | | |`rocblas_scopy`|1.5.0| | | | | @@ -348,9 +348,9 @@ |`cublasSdot_v2`| | | | |`rocblas_sdot`|1.5.0| | | | | |`cublasSdot_v2_64`|12.0| | | |`rocblas_sdot_64`|6.1.0| | | | | |`cublasSnrm2`| | | | |`rocblas_snrm2`|1.5.0| | | | | -|`cublasSnrm2_64`|12.0| | | | | | | | | | +|`cublasSnrm2_64`|12.0| | | |`rocblas_snrm2_64`|6.1.0| | | | | |`cublasSnrm2_v2`| | | | |`rocblas_snrm2`|1.5.0| | | | | -|`cublasSnrm2_v2_64`|12.0| | | | | | | | | | +|`cublasSnrm2_v2_64`|12.0| | | |`rocblas_snrm2_64`|6.1.0| | | | | |`cublasSrot`| | | | |`rocblas_srot`|3.5.0| | | | | |`cublasSrot_64`|12.0| | | | | | | | | | |`cublasSrot_v2`| | | | |`rocblas_srot`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index eb84efb2..4e540107 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -88,13 +88,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // NRM2 // NRM2 functions' signatures differ from _v2 ones, hipblas and rocblas NRM2 functions have mapping to NRM2_v2 functions only {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSnrm2_64", {"hipblasSnrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasSnrm2_64", {"hipblasSnrm2_64", "rocblas_snrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDnrm2_64", {"hipblasDnrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDnrm2_64", {"hipblasDnrm2_64", "rocblas_dnrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasScnrm2", {"hipblasScnrm2_v2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasScnrm2_64", {"hipblasScnrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasScnrm2_64", {"hipblasScnrm2_v2_64", "rocblas_scnrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDznrm2", {"hipblasDznrm2_v2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDznrm2_64", {"hipblasDznrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDznrm2_64", {"hipblasDznrm2_v2_64", "rocblas_dznrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasNrm2Ex", {"hipblasNrm2Ex_v2", "rocblas_nrm2_ex", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasNrm2Ex_64", {"hipblasNrm2Ex_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, @@ -920,13 +920,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // NRM2 {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasSnrm2_v2_64", {"hipblasSnrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasSnrm2_v2_64", {"hipblasSnrm2_64", "rocblas_snrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDnrm2_v2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDnrm2_v2_64", {"hipblasDnrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDnrm2_v2_64", {"hipblasDnrm2_64", "rocblas_dnrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasScnrm2_v2", {"hipblasScnrm2_v2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasScnrm2_v2_64", {"hipblasScnrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasScnrm2_v2_64", {"hipblasScnrm2_v2_64", "rocblas_scnrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDznrm2_v2", {"hipblasDznrm2_v2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDznrm2_v2_64", {"hipblasDznrm2_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDznrm2_v2_64", {"hipblasDznrm2_v2_64", "rocblas_dznrm2_64", CONV_LIB_FUNC, API_BLAS, 5}}, // DOT {"cublasDotEx", {"hipblasDotEx_v2", "rocblas_dot_ex", CONV_LIB_FUNC, API_BLAS, 8}}, @@ -1897,6 +1897,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasCdotu_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"hipblasZdotc_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"hipblasZdotu_v2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasSnrm2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasDnrm2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasScnrm2_v2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasDznrm2_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2159,6 +2163,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_cdotu_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_zdotc_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_zdotu_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_snrm2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_dnrm2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_scnrm2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_dznrm2_64", {HIP_6010, HIP_0, HIP_0, }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 27bf4e9f..e0525e40 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2020,6 +2020,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZdotu_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexresult); blasStatus = cublasZdotu_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexresult); blasStatus = cublasZdotu_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2_64(hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result); + // CHECK: blasStatus = hipblasSnrm2_64(blasHandle, n_64, &fx, incx_64, &fresult); + // CHECK-NEXT: blasStatus = hipblasSnrm2_64(blasHandle, n_64, &fx, incx_64, &fresult); + blasStatus = cublasSnrm2_64(blasHandle, n_64, &fx, incx_64, &fresult); + blasStatus = cublasSnrm2_v2_64(blasHandle, n_64, &fx, incx_64, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2_64(hipblasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result); + // CHECK: blasStatus = hipblasDnrm2_64(blasHandle, n_64, &dx, incx_64, &dresult); + // CHECK-NEXT: blasStatus = hipblasDnrm2_64(blasHandle, n_64, &dx, incx_64, &dresult); + blasStatus = cublasDnrm2_64(blasHandle, n_64, &dx, incx_64, &dresult); + blasStatus = cublasDnrm2_v2_64(blasHandle, n_64, &dx, incx_64, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2_v2_64(hipblasHandle_t handle, int64_t n, const hipComplex* x, int64_t incx, float* result); + // CHECK: blasStatus = hipblasScnrm2_v2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + // CHECK-NEXT: blasStatus = hipblasScnrm2_v2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + blasStatus = cublasScnrm2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + blasStatus = cublasScnrm2_v2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2_v2_64(hipblasHandle_t handle, int64_t n, const hipDoubleComplex* x, int64_t incx, double* result); + // CHECK: blasStatus = hipblasDznrm2_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + // CHECK-NEXT: blasStatus = hipblasDznrm2_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + blasStatus = cublasDznrm2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + blasStatus = cublasDznrm2_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 2a070bf9..540f4f23 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2105,6 +2105,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zdotu_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexresult); blasStatus = cublasZdotu_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexresult); blasStatus = cublasZdotu_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_snrm2_64(rocblas_handle handle, int64_t n, const float* x, int64_t incx, float* result); + // CHECK: blasStatus = rocblas_snrm2_64(blasHandle, n_64, &fx, incx_64, &fresult); + // CHECK-NEXT: blasStatus = rocblas_snrm2_64(blasHandle, n_64, &fx, incx_64, &fresult); + blasStatus = cublasSnrm2_64(blasHandle, n_64, &fx, incx_64, &fresult); + blasStatus = cublasSnrm2_v2_64(blasHandle, n_64, &fx, incx_64, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dnrm2_64(rocblas_handle handle, int64_t n, const double* x, int64_t incx, double* result); + // CHECK: blasStatus = rocblas_dnrm2_64(blasHandle, n_64, &dx, incx_64, &dresult); + // CHECK-NEXT: blasStatus = rocblas_dnrm2_64(blasHandle, n_64, &dx, incx_64, &dresult); + blasStatus = cublasDnrm2_64(blasHandle, n_64, &dx, incx_64, &dresult); + blasStatus = cublasDnrm2_v2_64(blasHandle, n_64, &dx, incx_64, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, float* result); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scnrm2_64(rocblas_handle handle, int64_t n, const rocblas_float_complex* x, int64_t incx, float* result); + // CHECK: blasStatus = rocblas_scnrm2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + // CHECK-NEXT: blasStatus = rocblas_scnrm2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + blasStatus = cublasScnrm2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + blasStatus = cublasScnrm2_v2_64(blasHandle, n_64, &complexx, incx_64, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, double* result); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dznrm2_64(rocblas_handle handle, int64_t n, const rocblas_double_complex* x, int64_t incx, double* result); + // CHECK: blasStatus = rocblas_dznrm2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + // CHECK-NEXT: blasStatus = rocblas_dznrm2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + blasStatus = cublasDznrm2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + blasStatus = cublasDznrm2_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); #endif return 0;