From b057f72b7fa0505b29f3e7d05273e7ca0dd35e08 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 8 Oct 2024 21:00:50 +0100 Subject: [PATCH] [HIPIFY][rocBLAS] 64-bit functions support - Step 12 + `rocblas_(s|d|c|z)trmv_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++------ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++------ docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 20 +++++++------ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 +++++++++++++++++++ 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 55759290..fe958346 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1662,7 +1662,9 @@ sub rocSubstitutions { subst("cublasCtrmm", "rocblas_ctrmm", "library"); subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); + subst("cublasCtrmv_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); + subst("cublasCtrmv_v2_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); subst("cublasCtrsmBatched", "rocblas_ctrsm_batched", "library"); subst("cublasCtrsm_v2", "rocblas_ctrsm", "library"); @@ -1780,7 +1782,9 @@ sub rocSubstitutions { subst("cublasDtrmm", "rocblas_dtrmm", "library"); subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); + subst("cublasDtrmv_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); + subst("cublasDtrmv_v2_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); subst("cublasDtrsmBatched", "rocblas_dtrsm_batched", "library"); subst("cublasDtrsm_v2", "rocblas_dtrsm", "library"); @@ -1982,7 +1986,9 @@ sub rocSubstitutions { subst("cublasStrmm", "rocblas_strmm", "library"); subst("cublasStrmm_v2", "rocblas_strmm", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); + subst("cublasStrmv_64", "rocblas_strmv_64", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); + subst("cublasStrmv_v2_64", "rocblas_strmv_64", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); subst("cublasStrsmBatched", "rocblas_strsm_batched", "library"); subst("cublasStrsm_v2", "rocblas_strsm", "library"); @@ -2121,7 +2127,9 @@ sub rocSubstitutions { subst("cublasZtrmm", "rocblas_ztrmm", "library"); subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); + subst("cublasZtrmv_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); + subst("cublasZtrmv_v2_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); subst("cublasZtrsmBatched", "rocblas_ztrsm_batched", "library"); subst("cublasZtrsm_v2", "rocblas_ztrsm", "library"); @@ -12596,8 +12604,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZtrsm_v2_64", "cublasZtrsm_64", "cublasZtrsmBatched_64", - "cublasZtrmv_v2_64", - "cublasZtrmv_64", "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", @@ -12651,8 +12657,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasStrsm_v2_64", "cublasStrsm_64", "cublasStrsmBatched_64", - "cublasStrmv_v2_64", - "cublasStrmv_64", "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", @@ -12798,8 +12802,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDtrsm_v2_64", "cublasDtrsm_64", "cublasDtrsmBatched_64", - "cublasDtrmv_v2_64", - "cublasDtrmv_64", "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", @@ -12840,8 +12842,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCtrsm_v2_64", "cublasCtrsm_64", "cublasCtrsmBatched_64", - "cublasCtrmv_v2_64", - "cublasCtrmv_64", "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 34b6750e..9e87047e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -795,9 +795,9 @@ |`cublasCtpsv_v2`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | |`cublasCtpsv_v2_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasCtrmv`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrmv_v2`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_v2_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtrmv_v2_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrsv`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | |`cublasCtrsv_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasCtrsv_v2`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | @@ -859,9 +859,9 @@ |`cublasDtpsv_v2`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | |`cublasDtpsv_v2_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | | | | | | | | |`cublasDtrmv`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrmv_v2`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_v2_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtrmv_v2_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrsv`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | |`cublasDtrsv_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | | | | | | | | | |`cublasDtrsv_v2`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | @@ -923,9 +923,9 @@ |`cublasStpsv_v2`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | |`cublasStpsv_v2_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | | | | | | | | |`cublasStrmv`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | | | | | | | | +|`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrmv_v2`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_v2_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | | | | | | | | +|`cublasStrmv_v2_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrsv`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | |`cublasStrsv_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | | | | | | | | | |`cublasStrsv_v2`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | @@ -1003,9 +1003,9 @@ |`cublasZtpsv_v2`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | |`cublasZtpsv_v2_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasZtrmv`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrmv_v2`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_v2_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtrmv_v2_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrsv`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | |`rocblas_ztrsv`|3.5.0| | | | | |`cublasZtrsv_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasZtrsv_v2`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | |`rocblas_ztrsv`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 4821308c..d1d61766 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -795,9 +795,9 @@ |`cublasCtpsv_v2`| | | | |`rocblas_ctpsv`|3.5.0| | | | | |`cublasCtpsv_v2_64`|12.0| | | | | | | | | | |`cublasCtrmv`| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_64`|12.0| | | | | | | | | | +|`cublasCtrmv_64`|12.0| | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrmv_v2`| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmv_v2_64`|12.0| | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrsv`| | | | |`rocblas_ctrsv`|3.5.0| | | | | |`cublasCtrsv_64`|12.0| | | | | | | | | | |`cublasCtrsv_v2`| | | | |`rocblas_ctrsv`|3.5.0| | | | | @@ -859,9 +859,9 @@ |`cublasDtpsv_v2`| | | | |`rocblas_dtpsv`|3.5.0| | | | | |`cublasDtpsv_v2_64`|12.0| | | | | | | | | | |`cublasDtrmv`| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_64`|12.0| | | | | | | | | | +|`cublasDtrmv_64`|12.0| | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrmv_v2`| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmv_v2_64`|12.0| | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrsv`| | | | |`rocblas_dtrsv`|3.5.0| | | | | |`cublasDtrsv_64`|12.0| | | | | | | | | | |`cublasDtrsv_v2`| | | | |`rocblas_dtrsv`|3.5.0| | | | | @@ -923,9 +923,9 @@ |`cublasStpsv_v2`| | | | |`rocblas_stpsv`|3.5.0| | | | | |`cublasStpsv_v2_64`|12.0| | | | | | | | | | |`cublasStrmv`| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_64`|12.0| | | | | | | | | | +|`cublasStrmv_64`|12.0| | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrmv_v2`| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_v2_64`|12.0| | | | | | | | | | +|`cublasStrmv_v2_64`|12.0| | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrsv`| | | | |`rocblas_strsv`|3.5.0| | | | | |`cublasStrsv_64`|12.0| | | | | | | | | | |`cublasStrsv_v2`| | | | |`rocblas_strsv`|3.5.0| | | | | @@ -1003,9 +1003,9 @@ |`cublasZtpsv_v2`| | | | |`rocblas_ztpsv`|3.5.0| | | | | |`cublasZtpsv_v2_64`|12.0| | | | | | | | | | |`cublasZtrmv`| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_64`|12.0| | | | | | | | | | +|`cublasZtrmv_64`|12.0| | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrmv_v2`| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmv_v2_64`|12.0| | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrsv`| | | | |`rocblas_ztrsv`|3.5.0| | | | | |`cublasZtrsv_64`|12.0| | | | | | | | | | |`cublasZtrsv_v2`| | | | |`rocblas_ztrsv`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 054ef412..08ef4be5 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -242,13 +242,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMV {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrmv_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStrmv_64", {"hipblasStrmv_64", "rocblas_strmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtrmv", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrmv_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtrmv_64", {"hipblasDtrmv_64", "rocblas_dtrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtrmv", {"hipblasCtrmv_v2", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrmv_64", {"hipblasCtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtrmv_64", {"hipblasCtrmv_v2_64", "rocblas_ctrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtrmv", {"hipblasZtrmv_v2", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrmv_64", {"hipblasZtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtrmv_64", {"hipblasZtrmv_v2_64", "rocblas_ztrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TBMV {"cublasStbmv", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -660,13 +660,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMV {"cublasStrmv_v2", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStrmv_v2_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStrmv_v2_64", {"hipblasStrmv_64", "rocblas_strmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtrmv_v2", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtrmv_v2_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtrmv_v2_64", {"hipblasDtrmv_64", "rocblas_dtrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtrmv_v2", {"hipblasCtrmv_v2", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtrmv_v2_64", {"hipblasCtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtrmv_v2_64", {"hipblasCtrmv_v2_64", "rocblas_ctrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtrmv_v2", {"hipblasZtrmv_v2", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtrmv_v2_64", {"hipblasZtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtrmv_v2_64", {"hipblasZtrmv_v2_64", "rocblas_ztrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TBMV {"cublasStbmv_v2", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -2373,6 +2373,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dspr2_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_chpr2_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_zhpr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_strmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtrmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctrmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztrmv_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 3ae5a1cc..383a4e4b 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2769,6 +2769,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zhpr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); blasStatus = cublasZhpr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); blasStatus = cublasZhpr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_strmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_strmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); #endif return 0;