From 388569f070bf0873bfa8b4f7dead28e4cdc8cc93 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 21 Oct 2023 17:49:29 +0200 Subject: [PATCH] [HIPIFY][6.0.0][#1078][BLAS] Support for ROCm HIP 6.0.0 - Step 16 - Functions - ABI break + Support for `hipblas(S|D|C|z)trmm` and `rocblas_(s|d|c|z)trmm` (former `rocblas_(s|d|c|z)trmm_outofplace`), which breaks ABI + Updated synthetic tests, the regenerated hipify-perl, and docs --- bin/hipify-perl | 16 ++++---- docs/tables/CUBLAS_API_supported_by_HIP.md | 14 +++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 14 +++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 14 +++---- src/CUDA2HIP_BLAS_API_functions.cpp | 37 ++++++++++++------- .../synthetic/libraries/cublas2hipblas.cu | 24 ++++++++++++ .../synthetic/libraries/cublas2hipblas_v2.cu | 23 ++++++++++++ .../synthetic/libraries/cublas2rocblas.cu | 12 +++--- .../synthetic/libraries/cublas2rocblas_v2.cu | 20 +++++----- 9 files changed, 116 insertions(+), 58 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index fe0d0971..34dc96b9 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1363,8 +1363,8 @@ sub rocSubstitutions { subst("cublasCtpmv_v2", "rocblas_ctpmv", "library"); subst("cublasCtpsv", "rocblas_ctpsv", "library"); subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); - subst("cublasCtrmm", "rocblas_ctrmm_outofplace", "library"); - subst("cublasCtrmm_v2", "rocblas_ctrmm_outofplace", "library"); + subst("cublasCtrmm", "rocblas_ctrmm", "library"); + subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); @@ -1439,8 +1439,8 @@ sub rocSubstitutions { subst("cublasDtpmv_v2", "rocblas_dtpmv", "library"); subst("cublasDtpsv", "rocblas_dtpsv", "library"); subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); - subst("cublasDtrmm", "rocblas_dtrmm_outofplace", "library"); - subst("cublasDtrmm_v2", "rocblas_dtrmm_outofplace", "library"); + subst("cublasDtrmm", "rocblas_dtrmm", "library"); + subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); @@ -1566,6 +1566,7 @@ sub rocSubstitutions { subst("cublasStpmv_v2", "rocblas_stpmv", "library"); subst("cublasStpsv", "rocblas_stpsv", "library"); subst("cublasStpsv_v2", "rocblas_stpsv", "library"); + subst("cublasStrmm", "rocblas_strmm", "library"); subst("cublasStrmm_v2", "rocblas_strmm", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); @@ -1652,8 +1653,8 @@ sub rocSubstitutions { subst("cublasZtpmv_v2", "rocblas_ztpmv", "library"); subst("cublasZtpsv", "rocblas_ztpsv", "library"); subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); - subst("cublasZtrmm", "rocblas_ztrmm_outofplace", "library"); - subst("cublasZtrmm_v2", "rocblas_ztrmm_outofplace", "library"); + subst("cublasZtrmm", "rocblas_ztrmm", "library"); + subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); @@ -3229,6 +3230,7 @@ sub simpleSubstitutions { subst("cublasStpmv_v2", "hipblasStpmv", "library"); subst("cublasStpsv", "hipblasStpsv", "library"); subst("cublasStpsv_v2", "hipblasStpsv", "library"); + subst("cublasStrmm", "hipblasStrmm", "library"); subst("cublasStrmm_v2", "hipblasStrmm", "library"); subst("cublasStrmv", "hipblasStrmv", "library"); subst("cublasStrmv_v2", "hipblasStrmv", "library"); @@ -9444,7 +9446,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasStrmv_64", "cublasStrmm_v2_64", "cublasStrmm_64", - "cublasStrmm", "cublasStpttr", "cublasStpsv_v2_64", "cublasStpsv_64", @@ -9974,7 +9975,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasStrmv_64", "cublasStrmm_v2_64", "cublasStrmm_64", - "cublasStrmm", "cublasStpttr", "cublasStpsv_v2_64", "cublasStpsv_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 8435427a..9c94aeb6 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -755,9 +755,9 @@ |`cublasCsyrk_v2_64`|12.0| | | | | | | | | |`cublasCsyrkx`| | | |`hipblasCsyrkx_v2`|6.0.0| | | |6.0.0| |`cublasCsyrkx_64`|12.0| | | | | | | | | -|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0|5.6.0| | | | +|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasCtrmm_64`|12.0| | | | | | | | | -|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0|5.6.0| | | | +|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasCtrmm_v2_64`|12.0| | | | | | | | | |`cublasCtrsm`| | | |`hipblasCtrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | @@ -789,9 +789,9 @@ |`cublasDsyrk_v2_64`|12.0| | | | | | | | | |`cublasDsyrkx`| | | |`hipblasDsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | | | | | | | -|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0|5.6.0| | | | +|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0| |6.0.0| |6.0.0| |`cublasDtrmm_64`|12.0| | | | | | | | | -|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0|5.6.0| | | | +|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0| |6.0.0| |6.0.0| |`cublasDtrmm_v2_64`|12.0| | | | | | | | | |`cublasDtrsm`| | | |`hipblasDtrsm`|1.8.2| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | @@ -837,7 +837,7 @@ |`cublasSsyrk_v2_64`|12.0| | | | | | | | | |`cublasSsyrkx`| | | |`hipblasSsyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | | | | | | | -|`cublasStrmm`| | | | | | | | | | +|`cublasStrmm`| | | |`hipblasStrmm`|3.2.0| |6.0.0| |6.0.0| |`cublasStrmm_64`|12.0| | | | | | | | | |`cublasStrmm_v2`| | | |`hipblasStrmm`|3.2.0| |6.0.0| |6.0.0| |`cublasStrmm_v2_64`|12.0| | | | | | | | | @@ -895,9 +895,9 @@ |`cublasZsyrk_v2_64`|12.0| | | | | | | | | |`cublasZsyrkx`| | | |`hipblasZsyrkx_v2`|6.0.0| | | |6.0.0| |`cublasZsyrkx_64`|12.0| | | | | | | | | -|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0|5.6.0| | | | +|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasZtrmm_64`|12.0| | | | | | | | | -|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0|5.6.0| | | | +|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasZtrmm_v2_64`|12.0| | | | | | | | | |`cublasZtrsm`| | | |`hipblasZtrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 3b8e705c..8df18b7e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -755,9 +755,9 @@ |`cublasCsyrk_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasCsyrkx`| | | |`hipblasCsyrkx_v2`|6.0.0| | | |6.0.0|`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | | | | | | | | | | | | | -|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0|5.6.0| | | |`rocblas_ctrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0| |6.0.0| |6.0.0|`rocblas_ctrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasCtrmm_64`|12.0| | | | | | | | | | | | | | | -|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0|5.6.0| | | |`rocblas_ctrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0| |6.0.0| |6.0.0|`rocblas_ctrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasCtrmm_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasCtrsm`| | | |`hipblasCtrsm`|3.5.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | | | | | | | @@ -789,9 +789,9 @@ |`cublasDsyrk_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasDsyrkx`| | | |`hipblasDsyrkx`|3.5.0| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | | | | | | | | | | | | | -|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0|5.6.0| | | |`rocblas_dtrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0| |6.0.0| |6.0.0|`rocblas_dtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasDtrmm_64`|12.0| | | | | | | | | | | | | | | -|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0|5.6.0| | | |`rocblas_dtrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0| |6.0.0| |6.0.0|`rocblas_dtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasDtrmm_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasDtrsm`| | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | | | | | | | @@ -837,7 +837,7 @@ |`cublasSsyrk_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasSsyrkx`| | | |`hipblasSsyrkx`|3.5.0| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | | | | | | | | | | | | | -|`cublasStrmm`| | | | | | | | | | | | | | | | +|`cublasStrmm`| | | |`hipblasStrmm`|3.2.0| |6.0.0| |6.0.0|`rocblas_strmm`|3.5.0| |6.0.0| |6.0.0| |`cublasStrmm_64`|12.0| | | | | | | | | | | | | | | |`cublasStrmm_v2`| | | |`hipblasStrmm`|3.2.0| |6.0.0| |6.0.0|`rocblas_strmm`|3.5.0| |6.0.0| |6.0.0| |`cublasStrmm_v2_64`|12.0| | | | | | | | | | | | | | | @@ -895,9 +895,9 @@ |`cublasZsyrk_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasZsyrkx`| | | |`hipblasZsyrkx_v2`|6.0.0| | | |6.0.0|`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | | | | | | | | | | | | | -|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0|5.6.0| | | |`rocblas_ztrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0| |6.0.0| |6.0.0|`rocblas_ztrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasZtrmm_64`|12.0| | | | | | | | | | | | | | | -|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0|5.6.0| | | |`rocblas_ztrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0| |6.0.0| |6.0.0|`rocblas_ztrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasZtrmm_v2_64`|12.0| | | | | | | | | | | | | | | |`cublasZtrsm`| | | |`hipblasZtrsm`|3.5.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index d46e9771..40395be0 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -755,9 +755,9 @@ |`cublasCsyrk_v2_64`|12.0| | | | | | | | | |`cublasCsyrkx`| | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | | | | | | | -|`cublasCtrmm`| | | |`rocblas_ctrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasCtrmm`| | | |`rocblas_ctrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasCtrmm_64`|12.0| | | | | | | | | -|`cublasCtrmm_v2`| | | |`rocblas_ctrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasCtrmm_v2`| | | |`rocblas_ctrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasCtrmm_v2_64`|12.0| | | | | | | | | |`cublasCtrsm`| | | |`rocblas_ctrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | @@ -789,9 +789,9 @@ |`cublasDsyrk_v2_64`|12.0| | | | | | | | | |`cublasDsyrkx`| | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | | | | | | | -|`cublasDtrmm`| | | |`rocblas_dtrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasDtrmm`| | | |`rocblas_dtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasDtrmm_64`|12.0| | | | | | | | | -|`cublasDtrmm_v2`| | | |`rocblas_dtrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasDtrmm_v2`| | | |`rocblas_dtrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasDtrmm_v2_64`|12.0| | | | | | | | | |`cublasDtrsm`| | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | @@ -837,7 +837,7 @@ |`cublasSsyrk_v2_64`|12.0| | | | | | | | | |`cublasSsyrkx`| | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | | | | | | | -|`cublasStrmm`| | | | | | | | | | +|`cublasStrmm`| | | |`rocblas_strmm`|3.5.0| |6.0.0| |6.0.0| |`cublasStrmm_64`|12.0| | | | | | | | | |`cublasStrmm_v2`| | | |`rocblas_strmm`|3.5.0| |6.0.0| |6.0.0| |`cublasStrmm_v2_64`|12.0| | | | | | | | | @@ -895,9 +895,9 @@ |`cublasZsyrk_v2_64`|12.0| | | | | | | | | |`cublasZsyrkx`| | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | | | | | | | -|`cublasZtrmm`| | | |`rocblas_ztrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasZtrmm`| | | |`rocblas_ztrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasZtrmm_64`|12.0| | | | | | | | | -|`cublasZtrmm_v2`| | | |`rocblas_ztrmm_outofplace`|5.0.0|5.6.0| | | | +|`cublasZtrmm_v2`| | | |`rocblas_ztrmm`|3.5.0| |6.0.0| |6.0.0| |`cublasZtrmm_v2_64`|12.0| | | | | | | | | |`cublasZtrsm`| | | |`rocblas_ztrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index c9c1d464..0c73b64c 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -542,13 +542,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZtrsm_64", {"hipblasZtrsm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, // TRMM - {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED | ROC_DEPRECATED}}, + {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, {"cublasStrmm_64", {"hipblasStrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, - {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_DEPRECATED | ROC_DEPRECATED}}, + {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, {"cublasDtrmm_64", {"hipblasDtrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, - {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_DEPRECATED | ROC_DEPRECATED}}, + {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, {"cublasCtrmm_64", {"hipblasCtrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, - {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_DEPRECATED | ROC_DEPRECATED}}, + {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, {"cublasZtrmm_64", {"hipblasZtrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) @@ -911,11 +911,11 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMM {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}}, {"cublasStrmm_v2_64", {"hipblasStrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, - {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_DEPRECATED | ROC_DEPRECATED}}, + {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}}, {"cublasDtrmm_v2_64", {"hipblasDtrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, - {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_DEPRECATED | ROC_DEPRECATED}}, + {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}}, {"cublasCtrmm_v2_64", {"hipblasCtrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, - {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_DEPRECATED | ROC_DEPRECATED}}, + {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}}, {"cublasZtrmm_v2_64", {"hipblasZtrmm_64", "", CONV_LIB_FUNC, API_BLAS, 7, UNSUPPORTED}}, // NRM2 @@ -1706,9 +1706,9 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasCtrsm", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasZtrsm", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStrmm", {HIP_3020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipblasDtrmm", {HIP_3020, HIP_5060, HIP_0 }}, - {"hipblasCtrmm", {HIP_3050, HIP_5060, HIP_0 }}, - {"hipblasZtrmm", {HIP_3050, HIP_5060, HIP_0 }}, + {"hipblasDtrmm", {HIP_3020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrmm", {HIP_3050, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrmm", {HIP_3050, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasSgeam", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDgeam", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasCgeam", {HIP_3060, HIP_0, HIP_0 }}, @@ -1994,10 +1994,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dsyrkx", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_csyrkx", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_zsyrkx", {HIP_3050, HIP_0, HIP_0 }}, - {"rocblas_strmm_outofplace", {HIP_5000, HIP_5060, HIP_0 }}, - {"rocblas_dtrmm_outofplace", {HIP_5000, HIP_5060, HIP_0 }}, - {"rocblas_ctrmm_outofplace", {HIP_5000, HIP_5060, HIP_0 }}, - {"rocblas_ztrmm_outofplace", {HIP_5000, HIP_5060, HIP_0 }}, + {"rocblas_strmm_outofplace", {HIP_5000, HIP_5060, HIP_6000}}, + {"rocblas_dtrmm_outofplace", {HIP_5000, HIP_5060, HIP_6000}}, + {"rocblas_ctrmm_outofplace", {HIP_5000, HIP_5060, HIP_6000}}, + {"rocblas_ztrmm_outofplace", {HIP_5000, HIP_5060, HIP_6000}}, {"rocblas_strsm", {HIP_1050, HIP_0, HIP_0 }}, {"rocblas_dtrsm", {HIP_1050, HIP_0, HIP_0 }}, {"rocblas_ctrsm", {HIP_3050, HIP_0, HIP_0 }}, @@ -2066,11 +2066,20 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_cgemv_strided_batched", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_zgemv_strided_batched", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_strmm", {HIP_3050, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dtrmm", {HIP_3050, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ctrmm", {HIP_3050, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ztrmm", {HIP_3050, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { {"hipblasStrmm", {HIP_6000}}, + {"hipblasDtrmm", {HIP_6000}}, + {"hipblasCtrmm", {HIP_6000}}, + {"hipblasZtrmm", {HIP_6000}}, {"rocblas_strmm", {HIP_6000}}, + {"rocblas_dtrmm", {HIP_6000}}, + {"rocblas_ctrmm", {HIP_6000}}, + {"rocblas_ztrmm", {HIP_6000}}, }; const std::map CUDA_BLAS_API_SECTION_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 1d11a97e..54cb9ff0 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -1417,6 +1417,30 @@ int main() { // CHECK: blasStatus = hipblasZgelsBatched(blasHandle, blasOperation, m, n, nrhs, dcomplexAarray, lda, dcomplexCarray, ldc, &info, &deviceInfo, batchCount); blasStatus = cublasZgelsBatched(blasHandle, blasOperation, m, n, nrhs, dcomplexAarray, lda, dcomplexCarray, ldc, &info, &deviceInfo, batchCount); + // NOTE: void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, int m, int n, float alpha, const float* A, int lda, float* B, int ldb); is not supported by HIP + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); + // CHECK: blasStatus = hipblasStrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + blasStatus = cublasStrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + + // NOTE: void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, int m, int n, double alpha, const double* A, int lda, double* B, int ldb); is not supported by HIP + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); + // HIP: hipblasStatus_t hipblasDtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); + // CHECK: blasStatus = hipblasDtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + blasStatus = cublasDtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + + // NOTE: void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); is not supported by HIP + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc); + // CHECK: blasStatus = hipblasCtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + blasStatus = cublasCtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + + // NOTE: void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); is not supported by HIP + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc); + // CHECK: blasStatus = hipblasZtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + blasStatus = cublasZtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 870ffc6e..55ad68ed 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -1573,8 +1573,31 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); // CHECK: blasStatus = hipblasStrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasStrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + blasStatus = cublasStrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); blasStatus = cublasStrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); + // HIP: hipblasStatus_t hipblasDtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); + // CHECK: blasStatus = hipblasDtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + blasStatus = cublasDtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + blasStatus = cublasDtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc); + // CHECK: blasStatus = hipblasCtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + blasStatus = cublasCtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + blasStatus = cublasCtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc); + // CHECK: blasStatus = hipblasZtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + blasStatus = cublasZtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + blasStatus = cublasZtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu index 1d33f30f..3a8034bb 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu @@ -1450,22 +1450,22 @@ int main() { // TODO: #1281 // NOTE: void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, int m, int n, double alpha, const double* A, int lda, double* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm_outofplace(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); - // CHECK: blasStatus = rocblas_dtrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); + // CHECK: blasStatus = rocblas_dtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); blasStatus = cublasDtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); // TODO: #1281 // NOTE: void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex* C, int ldc); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm_outofplace(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); - // CHECK: blasStatus = rocblas_ctrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); + // CHECK: blasStatus = rocblas_ctrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); blasStatus = cublasCtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); // TODO: #1281 // NOTE: void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm_outofplace(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); - // CHECK: blasStatus = rocblas_ztrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); + // CHECK: blasStatus = rocblas_ztrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); // TODO: #1281 diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index dfb1de90..fa01ad57 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -1594,29 +1594,31 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_strmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + // CHECK-NEXT: blasStatus = rocblas_strmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); + blasStatus = cublasStrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); blasStatus = cublasStrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm_outofplace(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); - // CHECK: blasStatus = rocblas_dtrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); - // CHECK-NEXT: blasStatus = rocblas_dtrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); + // CHECK: blasStatus = rocblas_dtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); + // CHECK-NEXT: blasStatus = rocblas_dtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); blasStatus = cublasDtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); blasStatus = cublasDtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex* C, int ldc); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm_outofplace(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); - // CHECK: blasStatus = rocblas_ctrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); - // CHECK-NEXT: blasStatus = rocblas_ctrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); + // CHECK: blasStatus = rocblas_ctrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); + // CHECK-NEXT: blasStatus = rocblas_ctrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); blasStatus = cublasCtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); blasStatus = cublasCtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm_outofplace(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); - // CHECK: blasStatus = rocblas_ztrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); - // CHECK-NEXT: blasStatus = rocblas_ztrmm_outofplace(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); + // CHECK: blasStatus = rocblas_ztrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = rocblas_ztrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc);