From 021408a3769a449ab521c09aa2fa21c3e746289b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 13 Oct 2024 15:18:35 +0100 Subject: [PATCH] [HIPIFY][rocBLAS] 64-bit functions support - Step 18 + `rocblas_(s|d|c|z)ger(c|u)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 24 +++++------ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 24 +++++------ docs/tables/CUBLAS_API_supported_by_ROC.md | 24 +++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 30 +++++++------ .../synthetic/libraries/cublas2rocblas_v2.cu | 42 +++++++++++++++++++ 5 files changed, 96 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index d0707f69..14d8a14d 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1605,9 +1605,13 @@ sub rocSubstitutions { subst("cublasCgemv_v2", "rocblas_cgemv", "library"); subst("cublasCgemv_v2_64", "rocblas_cgemv_64", "library"); subst("cublasCgerc", "rocblas_cgerc", "library"); + subst("cublasCgerc_64", "rocblas_cgerc_64", "library"); subst("cublasCgerc_v2", "rocblas_cgerc", "library"); + subst("cublasCgerc_v2_64", "rocblas_cgerc_64", "library"); subst("cublasCgeru", "rocblas_cgeru", "library"); + subst("cublasCgeru_64", "rocblas_cgeru_64", "library"); subst("cublasCgeru_v2", "rocblas_cgeru", "library"); + subst("cublasCgeru_v2_64", "rocblas_cgeru_64", "library"); subst("cublasChbmv", "rocblas_chbmv", "library"); subst("cublasChbmv_64", "rocblas_chbmv_64", "library"); subst("cublasChbmv_v2", "rocblas_chbmv", "library"); @@ -1752,7 +1756,9 @@ sub rocSubstitutions { subst("cublasDgemv_v2", "rocblas_dgemv", "library"); subst("cublasDgemv_v2_64", "rocblas_dgemv_64", "library"); subst("cublasDger", "rocblas_dger", "library"); + subst("cublasDger_64", "rocblas_dger_64", "library"); subst("cublasDger_v2", "rocblas_dger", "library"); + subst("cublasDger_v2_64", "rocblas_dger_64", "library"); subst("cublasDnrm2", "rocblas_dnrm2", "library"); subst("cublasDnrm2_64", "rocblas_dnrm2_64", "library"); subst("cublasDnrm2_v2", "rocblas_dnrm2", "library"); @@ -1970,7 +1976,9 @@ sub rocSubstitutions { subst("cublasSgemv_v2", "rocblas_sgemv", "library"); subst("cublasSgemv_v2_64", "rocblas_sgemv_64", "library"); subst("cublasSger", "rocblas_sger", "library"); + subst("cublasSger_64", "rocblas_sger_64", "library"); subst("cublasSger_v2", "rocblas_sger", "library"); + subst("cublasSger_v2_64", "rocblas_sger_64", "library"); subst("cublasSnrm2", "rocblas_snrm2", "library"); subst("cublasSnrm2_64", "rocblas_snrm2_64", "library"); subst("cublasSnrm2_v2", "rocblas_snrm2", "library"); @@ -2110,9 +2118,13 @@ sub rocSubstitutions { subst("cublasZgemv_v2", "rocblas_zgemv", "library"); subst("cublasZgemv_v2_64", "rocblas_zgemv_64", "library"); subst("cublasZgerc", "rocblas_zgerc", "library"); + subst("cublasZgerc_64", "rocblas_zgerc_64", "library"); subst("cublasZgerc_v2", "rocblas_zgerc", "library"); + subst("cublasZgerc_v2_64", "rocblas_zgerc_64", "library"); subst("cublasZgeru", "rocblas_zgeru", "library"); + subst("cublasZgeru_64", "rocblas_zgeru_64", "library"); subst("cublasZgeru_v2", "rocblas_zgeru", "library"); + subst("cublasZgeru_v2_64", "rocblas_zgeru_64", "library"); subst("cublasZhbmv", "rocblas_zhbmv", "library"); subst("cublasZhbmv_64", "rocblas_zhbmv_64", "library"); subst("cublasZhbmv_v2", "rocblas_zhbmv", "library"); @@ -12687,10 +12699,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetrsBatched", "cublasZgetriBatched", "cublasZgetrfBatched", - "cublasZgeru_v2_64", - "cublasZgeru_64", - "cublasZgerc_v2_64", - "cublasZgerc_64", "cublasZgeqrfBatched", "cublasZgemm_v2_64", "cublasZgemm_64", @@ -12724,8 +12732,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgetrsBatched", "cublasSgetriBatched", "cublasSgetrfBatched", - "cublasSger_v2_64", - "cublasSger_64", "cublasSgeqrfBatched", "cublasSgemm_v2_64", "cublasSgemm_64", @@ -12858,8 +12864,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgetrsBatched", "cublasDgetriBatched", "cublasDgetrfBatched", - "cublasDger_v2_64", - "cublasDger_64", "cublasDgeqrfBatched", "cublasDgemm_v2_64", "cublasDgemm_64", @@ -12905,10 +12909,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgetrsBatched", "cublasCgetriBatched", "cublasCgetrfBatched", - "cublasCgeru_v2_64", - "cublasCgeru_64", - "cublasCgerc_v2_64", - "cublasCgerc_64", "cublasCgeqrfBatched", "cublasCgemm_v2_64", "cublasCgemm_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 05d3caaa..b24a6bb1 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -731,13 +731,13 @@ |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | |`cublasCgemv_v2_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | | |`rocblas_cgemv_64`|6.2.0| | | | | |`cublasCgerc`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgerc_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgerc_v2`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_v2_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgerc_v2_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgeru`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgeru_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasCgeru_v2`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasChbmv`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | |`cublasChbmv_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChbmv_v2`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | @@ -811,9 +811,9 @@ |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | |`cublasDgemv_v2_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | | |`rocblas_dgemv_64`|6.2.0| | | | | |`cublasDger`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | | | | | | | | +|`cublasDger_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | | | | | | | | +|`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDsbmv`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | |`cublasDsbmv_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDsbmv_v2`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | @@ -875,9 +875,9 @@ |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | |`cublasSgemv_v2_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | | |`rocblas_sgemv_64`|6.2.0| | | | | |`cublasSger`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | | | | | | | | +|`cublasSger_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | | | | | | | | +|`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSsbmv`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | |`cublasSsbmv_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSsbmv_v2`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | @@ -939,13 +939,13 @@ |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | |`cublasZgemv_v2_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | | |`rocblas_zgemv_64`|6.2.0| | | | | |`cublasZgerc`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgerc_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgerc_v2`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_v2_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgerc_v2_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgeru`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgeru_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZgeru_v2`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZhbmv`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | |`cublasZhbmv_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhbmv_v2`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 06eac47c..96e2612f 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -731,13 +731,13 @@ |`cublasCgemv_v2`| | | | |`rocblas_cgemv`|1.5.0| | | | | |`cublasCgemv_v2_64`|12.0| | | |`rocblas_cgemv_64`|6.2.0| | | | | |`cublasCgerc`| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_64`|12.0| | | | | | | | | | +|`cublasCgerc_64`|12.0| | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgerc_v2`| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_v2_64`|12.0| | | | | | | | | | +|`cublasCgerc_v2_64`|12.0| | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgeru`| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_64`|12.0| | | | | | | | | | +|`cublasCgeru_64`|12.0| | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasCgeru_v2`| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_v2_64`|12.0| | | | | | | | | | +|`cublasCgeru_v2_64`|12.0| | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasChbmv`| | | | |`rocblas_chbmv`|3.5.0| | | | | |`cublasChbmv_64`|12.0| | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChbmv_v2`| | | | |`rocblas_chbmv`|3.5.0| | | | | @@ -811,9 +811,9 @@ |`cublasDgemv_v2`| | | | |`rocblas_dgemv`|1.5.0| | | | | |`cublasDgemv_v2_64`|12.0| | | |`rocblas_dgemv_64`|6.2.0| | | | | |`cublasDger`| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_64`|12.0| | | | | | | | | | +|`cublasDger_64`|12.0| | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDger_v2`| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_v2_64`|12.0| | | | | | | | | | +|`cublasDger_v2_64`|12.0| | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDsbmv`| | | | |`rocblas_dsbmv`|3.5.0| | | | | |`cublasDsbmv_64`|12.0| | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDsbmv_v2`| | | | |`rocblas_dsbmv`|3.5.0| | | | | @@ -875,9 +875,9 @@ |`cublasSgemv_v2`| | | | |`rocblas_sgemv`|1.5.0| | | | | |`cublasSgemv_v2_64`|12.0| | | |`rocblas_sgemv_64`|6.2.0| | | | | |`cublasSger`| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_64`|12.0| | | | | | | | | | +|`cublasSger_64`|12.0| | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSger_v2`| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_v2_64`|12.0| | | | | | | | | | +|`cublasSger_v2_64`|12.0| | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSsbmv`| | | | |`rocblas_ssbmv`|3.5.0| | | | | |`cublasSsbmv_64`|12.0| | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSsbmv_v2`| | | | |`rocblas_ssbmv`|3.5.0| | | | | @@ -939,13 +939,13 @@ |`cublasZgemv_v2`| | | | |`rocblas_zgemv`|1.5.0| | | | | |`cublasZgemv_v2_64`|12.0| | | |`rocblas_zgemv_64`|6.2.0| | | | | |`cublasZgerc`| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_64`|12.0| | | | | | | | | | +|`cublasZgerc_64`|12.0| | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgerc_v2`| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_v2_64`|12.0| | | | | | | | | | +|`cublasZgerc_v2_64`|12.0| | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgeru`| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_64`|12.0| | | | | | | | | | +|`cublasZgeru_64`|12.0| | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZgeru_v2`| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_v2_64`|12.0| | | | | | | | | | +|`cublasZgeru_v2_64`|12.0| | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZhbmv`| | | | |`rocblas_zhbmv`|3.5.0| | | | | |`cublasZhbmv_64`|12.0| | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhbmv_v2`| | | | |`rocblas_zhbmv`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 8ea0d61e..c86c7e1a 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -336,17 +336,17 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GER {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSger_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSger_64", {"hipblasSger_64", "rocblas_sger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDger_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDger_64", {"hipblasDger_64", "rocblas_dger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgeru", {"hipblasCgeru_v2", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgeru_64", {"hipblasCgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgeru_64", {"hipblasCgeru_v2_64", "rocblas_cgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgerc", {"hipblasCgerc_v2", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgerc_64", {"hipblasCgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgerc_64", {"hipblasCgerc_v2_64", "rocblas_cgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgeru", {"hipblasZgeru_v2", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgeru_64", {"hipblasZgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgeru_64", {"hipblasZgeru_v2_64", "rocblas_zgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgerc", {"hipblasZgerc_v2", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgerc_64", {"hipblasZgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgerc_64", {"hipblasZgerc_v2_64", "rocblas_zgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYR/HER {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -754,17 +754,17 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GER {"cublasSger_v2", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSger_v2_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSger_v2_64", {"hipblasSger_64", "rocblas_sger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDger_v2", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDger_v2_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDger_v2_64", {"hipblasDger_64", "rocblas_dger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgeru_v2", {"hipblasCgeru_v2", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgeru_v2_64", {"hipblasCgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgeru_v2_64", {"hipblasCgeru_v2_64", "rocblas_cgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgerc_v2", {"hipblasCgerc_v2", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgerc_v2_64", {"hipblasCgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgerc_v2_64", {"hipblasCgerc_v2_64", "rocblas_cgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgeru_v2", {"hipblasZgeru_v2", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgeru_v2_64", {"hipblasZgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgeru_v2_64", {"hipblasZgeru_v2_64", "rocblas_zgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgerc_v2", {"hipblasZgerc_v2", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgerc_v2_64", {"hipblasZgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgerc_v2_64", {"hipblasZgerc_v2_64", "rocblas_zgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYR/HER {"cublasSsyr_v2", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -2397,6 +2397,12 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtpsv_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ctpsv_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ztpsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_sger_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dger_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cgeru_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zgeru_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cgerc_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zgerc_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 2b2f8881..9c9a08ee 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2937,6 +2937,48 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_ztpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); blasStatus = cublasZtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); blasStatus = cublasZtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sger_64(rocblas_handle handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // CHECK: blasStatus = rocblas_sger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + // CHECK-NEXT: blasStatus = rocblas_sger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + blasStatus = cublasSger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + blasStatus = cublasSger_v2_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dger_64(rocblas_handle handle, int64_t m, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // CHECK: blasStatus = rocblas_dger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_dger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDger_v2_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeru_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_cgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_cgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgeru_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgerc_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_cgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_cgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgerc_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeru_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgeru_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgerc_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); #endif return 0;