From 8719384bf26d7aa38847aa95d8a6eac976bd57e4 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 13 Oct 2024 18:02:57 +0100 Subject: [PATCH] [HIPIFY][rocBLAS] 64-bit functions support - Step 19 + `rocblas_(s|d|c|z)trsm_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 20 ++++++++----- .../synthetic/libraries/cublas2rocblas_v2.cu | 29 +++++++++++++++++++ 5 files changed, 65 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 14d8a14d..c97c5533 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1714,7 +1714,9 @@ sub rocSubstitutions { subst("cublasCtrmv_v2_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); subst("cublasCtrsmBatched", "rocblas_ctrsm_batched", "library"); + subst("cublasCtrsm_64", "rocblas_ctrsm_64", "library"); subst("cublasCtrsm_v2", "rocblas_ctrsm", "library"); + subst("cublasCtrsm_v2_64", "rocblas_ctrsm_64", "library"); subst("cublasCtrsv", "rocblas_ctrsv", "library"); subst("cublasCtrsv_64", "rocblas_ctrsv_64", "library"); subst("cublasCtrsv_v2", "rocblas_ctrsv", "library"); @@ -1846,7 +1848,9 @@ sub rocSubstitutions { subst("cublasDtrmv_v2_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); subst("cublasDtrsmBatched", "rocblas_dtrsm_batched", "library"); + subst("cublasDtrsm_64", "rocblas_dtrsm_64", "library"); subst("cublasDtrsm_v2", "rocblas_dtrsm", "library"); + subst("cublasDtrsm_v2_64", "rocblas_dtrsm_64", "library"); subst("cublasDtrsv", "rocblas_dtrsv", "library"); subst("cublasDtrsv_64", "rocblas_dtrsv_64", "library"); subst("cublasDtrsv_v2", "rocblas_dtrsv", "library"); @@ -2062,7 +2066,9 @@ sub rocSubstitutions { subst("cublasStrmv_v2_64", "rocblas_strmv_64", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); subst("cublasStrsmBatched", "rocblas_strsm_batched", "library"); + subst("cublasStrsm_64", "rocblas_strsm_64", "library"); subst("cublasStrsm_v2", "rocblas_strsm", "library"); + subst("cublasStrsm_v2_64", "rocblas_strsm_64", "library"); subst("cublasStrsv", "rocblas_strsv", "library"); subst("cublasStrsv_64", "rocblas_strsv_64", "library"); subst("cublasStrsv_v2", "rocblas_strsv", "library"); @@ -2217,7 +2223,9 @@ sub rocSubstitutions { subst("cublasZtrmv_v2_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); subst("cublasZtrsmBatched", "rocblas_ztrsm_batched", "library"); + subst("cublasZtrsm_64", "rocblas_ztrsm_64", "library"); subst("cublasZtrsm_v2", "rocblas_ztrsm", "library"); + subst("cublasZtrsm_v2_64", "rocblas_ztrsm_64", "library"); subst("cublasZtrsv", "rocblas_ztrsv", "library"); subst("cublasZtrsv_64", "rocblas_ztrsv_64", "library"); subst("cublasZtrsv_v2", "rocblas_ztrsv", "library"); @@ -12675,8 +12683,6 @@ sub warnRocOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", - "cublasZtrsm_v2_64", - "cublasZtrsm_64", "cublasZtrsmBatched_64", "cublasZtrmm_v2_64", "cublasZtrmm_64", @@ -12714,8 +12720,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsm_v2_64", - "cublasStrsm_64", "cublasStrsmBatched_64", "cublasStrmm_v2_64", "cublasStrmm_64", @@ -12847,8 +12851,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsm_v2_64", - "cublasDtrsm_64", "cublasDtrsmBatched_64", "cublasDtrmm_v2_64", "cublasDtrmm_64", @@ -12875,8 +12877,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsm_v2_64", - "cublasCtrsm_64", "cublasCtrsmBatched_64", "cublasCtrmm_v2_64", "cublasCtrmm_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index b24a6bb1..adc47104 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1068,9 +1068,9 @@ |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsm_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`hipblasDgemmBatched`|1.8.2| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | |`cublasDgemmBatched_64`|12.0| | | | | | | | | | | | | | | | @@ -1104,9 +1104,9 @@ |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsm_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | | | | | | | |`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_batched`|6.0.0| | | | | @@ -1156,9 +1156,9 @@ |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsm_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsm_v2_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasTSSgemvBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | | | | | | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | @@ -1214,9 +1214,9 @@ |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsm_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | ## **8. BLAS-like Extension** diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 96e2612f..1ad86206 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1068,9 +1068,9 @@ |`cublasCtrmm_v2`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_v2_64`|12.0| | | | | | | | | | |`cublasCtrsm`| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_64`|12.0| | | | | | | | | | +|`cublasCtrsm_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrsm_v2_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | |`cublasDgemmBatched_64`|12.0| | | | | | | | | | @@ -1104,9 +1104,9 @@ |`cublasDtrmm_v2`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_v2_64`|12.0| | | | | | | | | | |`cublasDtrsm`| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_64`|12.0| | | | | | | | | | +|`cublasDtrsm_64`|12.0| | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrsm_v2_64`|12.0| | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | |`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | |`rocblas_hshgemv_batched`|6.0.0| | | | | @@ -1156,9 +1156,9 @@ |`cublasStrmm_v2`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_v2_64`|12.0| | | | | | | | | | |`cublasStrsm`| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_64`|12.0| | | | | | | | | | +|`cublasStrsm_64`|12.0| | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_v2_64`|12.0| | | | | | | | | | +|`cublasStrsm_v2_64`|12.0| | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasTSSgemvBatched`|11.6| | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | @@ -1214,9 +1214,9 @@ |`cublasZtrmm_v2`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_v2_64`|12.0| | | | | | | | | | |`cublasZtrsm`| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_64`|12.0| | | | | | | | | | +|`cublasZtrsm_64`|12.0| | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrsm_v2_64`|12.0| | | |`rocblas_ztrsm_64`|6.2.0| | | | | ## **8. BLAS-like Extension** diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index c86c7e1a..1dee8c87 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -541,13 +541,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrsm_64", {"hipblasStrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrsm_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrsm_64", {"hipblasDtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrsm_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasCtrsm", {"hipblasCtrsm_v2", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrsm_64", {"hipblasCtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrsm_64", {"hipblasCtrsm_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasZtrsm", {"hipblasZtrsm_v2", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrsm_64", {"hipblasZtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrsm_64", {"hipblasZtrsm_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, // TRMM {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -908,13 +908,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM {"cublasStrsm_v2", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasStrsm_v2_64", {"hipblasStrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrsm_v2_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasDtrsm_v2", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDtrsm_v2_64", {"hipblasDtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrsm_v2_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasCtrsm_v2", {"hipblasCtrsm_v2", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCtrsm_v2_64", {"hipblasCtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrsm_v2_64", {"hipblasCtrsm_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasZtrsm_v2", {"hipblasZtrsm_v2", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZtrsm_v2_64", {"hipblasZtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrsm_v2_64", {"hipblasZtrsm_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, // TRMM {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2403,6 +2403,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zgeru_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_cgerc_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_zgerc_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_strsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtrsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctrsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztrsm_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 9c9a08ee..3ef155d6 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -157,6 +157,7 @@ int main() { int num = 0; int lda = 0; int64_t lda_64 = 0; + int64_t ldb_64 = 0; int ldb = 0; int ldc = 0; int res = 0; @@ -2979,6 +2980,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); blasStatus = cublasZgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); blasStatus = cublasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, float* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, float* B, int64_t ldb); + // CHECK: blasStatus = rocblas_strsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_strsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + blasStatus = cublasStrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + blasStatus = cublasStrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag,int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, double* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, double* B, int64_t ldb); + // CHECK: blasStatus = rocblas_dtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_dtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + blasStatus = cublasDtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + blasStatus = cublasDtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, cuComplex* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* B, int64_t ldb); + // CHECK: blasStatus = rocblas_ctrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_ctrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + blasStatus = cublasCtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + blasStatus = cublasCtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* B, int64_t ldb); + // CHECK: blasStatus = rocblas_ztrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_ztrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + blasStatus = cublasZtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + blasStatus = cublasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); #endif return 0;