From cbab1a8ca9db26e291f7ae6312090bf0cb6da9a8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 13 Oct 2024 14:54:29 +0100 Subject: [PATCH 01/51] [HIPIFY][rocBLAS] 64-bit functions support - Step 17 + `rocblas_(s|d|c|z)tpsv_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++------ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++------ docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 20 +++++++------ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 +++++++++++++++++++ 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 74249276..d0707f69 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1699,7 +1699,9 @@ sub rocSubstitutions { subst("cublasCtpmv_v2", "rocblas_ctpmv", "library"); subst("cublasCtpmv_v2_64", "rocblas_ctpmv_64", "library"); subst("cublasCtpsv", "rocblas_ctpsv", "library"); + subst("cublasCtpsv_64", "rocblas_ctpsv_64", "library"); subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); + subst("cublasCtpsv_v2_64", "rocblas_ctpsv_64", "library"); subst("cublasCtrmm", "rocblas_ctrmm", "library"); subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); @@ -1827,7 +1829,9 @@ sub rocSubstitutions { subst("cublasDtpmv_v2", "rocblas_dtpmv", "library"); subst("cublasDtpmv_v2_64", "rocblas_dtpmv_64", "library"); subst("cublasDtpsv", "rocblas_dtpsv", "library"); + subst("cublasDtpsv_64", "rocblas_dtpsv_64", "library"); subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); + subst("cublasDtpsv_v2_64", "rocblas_dtpsv_64", "library"); subst("cublasDtrmm", "rocblas_dtrmm", "library"); subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); @@ -2039,7 +2043,9 @@ sub rocSubstitutions { subst("cublasStpmv_v2", "rocblas_stpmv", "library"); subst("cublasStpmv_v2_64", "rocblas_stpmv_64", "library"); subst("cublasStpsv", "rocblas_stpsv", "library"); + subst("cublasStpsv_64", "rocblas_stpsv_64", "library"); subst("cublasStpsv_v2", "rocblas_stpsv", "library"); + subst("cublasStpsv_v2_64", "rocblas_stpsv_64", "library"); subst("cublasStrmm", "rocblas_strmm", "library"); subst("cublasStrmm_v2", "rocblas_strmm", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); @@ -2188,7 +2194,9 @@ sub rocSubstitutions { subst("cublasZtpmv_v2", "rocblas_ztpmv", "library"); subst("cublasZtpmv_v2_64", "rocblas_ztpmv_64", "library"); subst("cublasZtpsv", "rocblas_ztpsv", "library"); + subst("cublasZtpsv_64", "rocblas_ztpsv_64", "library"); subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); + subst("cublasZtpsv_v2_64", "rocblas_ztpsv_64", "library"); subst("cublasZtrmm", "rocblas_ztrmm", "library"); subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); @@ -12661,8 +12669,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", - "cublasZtpsv_v2_64", - "cublasZtpsv_64", "cublasZsyrkx_64", "cublasZsyrk_v2_64", "cublasZsyrk_64", @@ -12706,8 +12712,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", - "cublasStpsv_v2_64", - "cublasStpsv_64", "cublasSsyrkx_64", "cublasSsyrk_v2_64", "cublasSsyrk_64", @@ -12843,8 +12847,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", - "cublasDtpsv_v2_64", - "cublasDtpsv_64", "cublasDsyrkx_64", "cublasDsyrk_v2_64", "cublasDsyrk_64", @@ -12875,8 +12877,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", - "cublasCtpsv_v2_64", - "cublasCtpsv_64", "cublasCsyrkx_64", "cublasCsyrk_v2_64", "cublasCsyrk_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 8ad6be72..05d3caaa 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -791,9 +791,9 @@ |`cublasCtpmv_v2`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | |`rocblas_ctpmv`|3.5.0| | | | | |`cublasCtpmv_v2_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | | |`rocblas_ctpmv_64`|6.2.0| | | | | |`cublasCtpsv`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | -|`cublasCtpsv_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtpsv_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | |`rocblas_ctpsv_64`|6.2.0| | | | | |`cublasCtpsv_v2`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | -|`cublasCtpsv_v2_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtpsv_v2_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | |`rocblas_ctpsv_64`|6.2.0| | | | | |`cublasCtrmv`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | |`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrmv_v2`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | @@ -855,9 +855,9 @@ |`cublasDtpmv_v2`| | | | |`hipblasDtpmv`|3.5.0| | | | |`rocblas_dtpmv`|3.5.0| | | | | |`cublasDtpmv_v2_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | | |`rocblas_dtpmv_64`|6.2.0| | | | | |`cublasDtpsv`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | -|`cublasDtpsv_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | | | | | | | | +|`cublasDtpsv_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | |`rocblas_dtpsv_64`|6.2.0| | | | | |`cublasDtpsv_v2`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | -|`cublasDtpsv_v2_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | | | | | | | | +|`cublasDtpsv_v2_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | |`rocblas_dtpsv_64`|6.2.0| | | | | |`cublasDtrmv`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | |`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrmv_v2`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | @@ -919,9 +919,9 @@ |`cublasStpmv_v2`| | | | |`hipblasStpmv`|3.5.0| | | | |`rocblas_stpmv`|3.5.0| | | | | |`cublasStpmv_v2_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | | |`rocblas_stpmv_64`|6.2.0| | | | | |`cublasStpsv`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | -|`cublasStpsv_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | | | | | | | | +|`cublasStpsv_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | |`rocblas_stpsv_64`|6.2.0| | | | | |`cublasStpsv_v2`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | -|`cublasStpsv_v2_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | | | | | | | | +|`cublasStpsv_v2_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | |`rocblas_stpsv_64`|6.2.0| | | | | |`cublasStrmv`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | |`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrmv_v2`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | @@ -999,9 +999,9 @@ |`cublasZtpmv_v2`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | |`rocblas_ztpmv`|3.5.0| | | | | |`cublasZtpmv_v2_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | | |`rocblas_ztpmv_64`|6.2.0| | | | | |`cublasZtpsv`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | -|`cublasZtpsv_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtpsv_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | |`rocblas_ztpsv_64`|6.2.0| | | | | |`cublasZtpsv_v2`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | -|`cublasZtpsv_v2_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtpsv_v2_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | |`rocblas_ztpsv_64`|6.2.0| | | | | |`cublasZtrmv`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | |`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrmv_v2`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 6266492b..06eac47c 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -791,9 +791,9 @@ |`cublasCtpmv_v2`| | | | |`rocblas_ctpmv`|3.5.0| | | | | |`cublasCtpmv_v2_64`|12.0| | | |`rocblas_ctpmv_64`|6.2.0| | | | | |`cublasCtpsv`| | | | |`rocblas_ctpsv`|3.5.0| | | | | -|`cublasCtpsv_64`|12.0| | | | | | | | | | +|`cublasCtpsv_64`|12.0| | | |`rocblas_ctpsv_64`|6.2.0| | | | | |`cublasCtpsv_v2`| | | | |`rocblas_ctpsv`|3.5.0| | | | | -|`cublasCtpsv_v2_64`|12.0| | | | | | | | | | +|`cublasCtpsv_v2_64`|12.0| | | |`rocblas_ctpsv_64`|6.2.0| | | | | |`cublasCtrmv`| | | | |`rocblas_ctrmv`|3.5.0| | | | | |`cublasCtrmv_64`|12.0| | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrmv_v2`| | | | |`rocblas_ctrmv`|3.5.0| | | | | @@ -855,9 +855,9 @@ |`cublasDtpmv_v2`| | | | |`rocblas_dtpmv`|3.5.0| | | | | |`cublasDtpmv_v2_64`|12.0| | | |`rocblas_dtpmv_64`|6.2.0| | | | | |`cublasDtpsv`| | | | |`rocblas_dtpsv`|3.5.0| | | | | -|`cublasDtpsv_64`|12.0| | | | | | | | | | +|`cublasDtpsv_64`|12.0| | | |`rocblas_dtpsv_64`|6.2.0| | | | | |`cublasDtpsv_v2`| | | | |`rocblas_dtpsv`|3.5.0| | | | | -|`cublasDtpsv_v2_64`|12.0| | | | | | | | | | +|`cublasDtpsv_v2_64`|12.0| | | |`rocblas_dtpsv_64`|6.2.0| | | | | |`cublasDtrmv`| | | | |`rocblas_dtrmv`|3.5.0| | | | | |`cublasDtrmv_64`|12.0| | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrmv_v2`| | | | |`rocblas_dtrmv`|3.5.0| | | | | @@ -919,9 +919,9 @@ |`cublasStpmv_v2`| | | | |`rocblas_stpmv`|3.5.0| | | | | |`cublasStpmv_v2_64`|12.0| | | |`rocblas_stpmv_64`|6.2.0| | | | | |`cublasStpsv`| | | | |`rocblas_stpsv`|3.5.0| | | | | -|`cublasStpsv_64`|12.0| | | | | | | | | | +|`cublasStpsv_64`|12.0| | | |`rocblas_stpsv_64`|6.2.0| | | | | |`cublasStpsv_v2`| | | | |`rocblas_stpsv`|3.5.0| | | | | -|`cublasStpsv_v2_64`|12.0| | | | | | | | | | +|`cublasStpsv_v2_64`|12.0| | | |`rocblas_stpsv_64`|6.2.0| | | | | |`cublasStrmv`| | | | |`rocblas_strmv`|3.5.0| | | | | |`cublasStrmv_64`|12.0| | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrmv_v2`| | | | |`rocblas_strmv`|3.5.0| | | | | @@ -999,9 +999,9 @@ |`cublasZtpmv_v2`| | | | |`rocblas_ztpmv`|3.5.0| | | | | |`cublasZtpmv_v2_64`|12.0| | | |`rocblas_ztpmv_64`|6.2.0| | | | | |`cublasZtpsv`| | | | |`rocblas_ztpsv`|3.5.0| | | | | -|`cublasZtpsv_64`|12.0| | | | | | | | | | +|`cublasZtpsv_64`|12.0| | | |`rocblas_ztpsv_64`|6.2.0| | | | | |`cublasZtpsv_v2`| | | | |`rocblas_ztpsv`|3.5.0| | | | | -|`cublasZtpsv_v2_64`|12.0| | | | | | | | | | +|`cublasZtpsv_v2_64`|12.0| | | |`rocblas_ztpsv_64`|6.2.0| | | | | |`cublasZtrmv`| | | | |`rocblas_ztrmv`|3.5.0| | | | | |`cublasZtrmv_64`|12.0| | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrmv_v2`| | | | |`rocblas_ztrmv`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 1729e718..8ea0d61e 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -282,13 +282,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TPSV {"cublasStpsv", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStpsv_64", {"hipblasStpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStpsv_64", {"hipblasStpsv_64", "rocblas_stpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtpsv", {"hipblasDtpsv", "rocblas_dtpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtpsv_64", {"hipblasDtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtpsv_64", {"hipblasDtpsv_64", "rocblas_dtpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtpsv", {"hipblasCtpsv_v2", "rocblas_ctpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtpsv_64", {"hipblasCtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtpsv_64", {"hipblasCtpsv_v2_64", "rocblas_ctpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtpsv", {"hipblasZtpsv_v2", "rocblas_ztpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtpsv_64", {"hipblasZtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtpsv_64", {"hipblasZtpsv_v2_64", "rocblas_ztpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TBSV {"cublasStbsv", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -700,13 +700,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TPSV {"cublasStpsv_v2", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStpsv_v2_64", {"hipblasStpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStpsv_v2_64", {"hipblasStpsv_64", "rocblas_stpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtpsv_v2", {"hipblasDtpsv", "rocblas_dtpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtpsv_v2_64", {"hipblasDtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtpsv_v2_64", {"hipblasDtpsv_64", "rocblas_dtpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtpsv_v2", {"hipblasCtpsv_v2", "rocblas_ctpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtpsv_v2_64", {"hipblasCtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtpsv_v2_64", {"hipblasCtpsv_v2_64", "rocblas_ctpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtpsv_v2", {"hipblasZtpsv_v2", "rocblas_ztpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtpsv_v2_64", {"hipblasZtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtpsv_v2_64", {"hipblasZtpsv_v2_64", "rocblas_ztpsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TBSV {"cublasStbsv_v2", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -2393,6 +2393,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtrsv_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ctrsv_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ztrsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_stpsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtpsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctpsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztpsv_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index cc4fe1dd..2b2f8881 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2909,6 +2909,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_ztrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); blasStatus = cublasZtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); blasStatus = cublasZtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stpsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const float* AP, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_stpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_stpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtpsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const double* AP, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* AP, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctpsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_float_complex* AP, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* AP, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztpsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_double_complex* AP, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); #endif return 0; From 021408a3769a449ab521c09aa2fa21c3e746289b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 13 Oct 2024 15:18:35 +0100 Subject: [PATCH 02/51] [HIPIFY][rocBLAS] 64-bit functions support - Step 18 + `rocblas_(s|d|c|z)ger(c|u)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 24 +++++------ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 24 +++++------ docs/tables/CUBLAS_API_supported_by_ROC.md | 24 +++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 30 +++++++------ .../synthetic/libraries/cublas2rocblas_v2.cu | 42 +++++++++++++++++++ 5 files changed, 96 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index d0707f69..14d8a14d 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1605,9 +1605,13 @@ sub rocSubstitutions { subst("cublasCgemv_v2", "rocblas_cgemv", "library"); subst("cublasCgemv_v2_64", "rocblas_cgemv_64", "library"); subst("cublasCgerc", "rocblas_cgerc", "library"); + subst("cublasCgerc_64", "rocblas_cgerc_64", "library"); subst("cublasCgerc_v2", "rocblas_cgerc", "library"); + subst("cublasCgerc_v2_64", "rocblas_cgerc_64", "library"); subst("cublasCgeru", "rocblas_cgeru", "library"); + subst("cublasCgeru_64", "rocblas_cgeru_64", "library"); subst("cublasCgeru_v2", "rocblas_cgeru", "library"); + subst("cublasCgeru_v2_64", "rocblas_cgeru_64", "library"); subst("cublasChbmv", "rocblas_chbmv", "library"); subst("cublasChbmv_64", "rocblas_chbmv_64", "library"); subst("cublasChbmv_v2", "rocblas_chbmv", "library"); @@ -1752,7 +1756,9 @@ sub rocSubstitutions { subst("cublasDgemv_v2", "rocblas_dgemv", "library"); subst("cublasDgemv_v2_64", "rocblas_dgemv_64", "library"); subst("cublasDger", "rocblas_dger", "library"); + subst("cublasDger_64", "rocblas_dger_64", "library"); subst("cublasDger_v2", "rocblas_dger", "library"); + subst("cublasDger_v2_64", "rocblas_dger_64", "library"); subst("cublasDnrm2", "rocblas_dnrm2", "library"); subst("cublasDnrm2_64", "rocblas_dnrm2_64", "library"); subst("cublasDnrm2_v2", "rocblas_dnrm2", "library"); @@ -1970,7 +1976,9 @@ sub rocSubstitutions { subst("cublasSgemv_v2", "rocblas_sgemv", "library"); subst("cublasSgemv_v2_64", "rocblas_sgemv_64", "library"); subst("cublasSger", "rocblas_sger", "library"); + subst("cublasSger_64", "rocblas_sger_64", "library"); subst("cublasSger_v2", "rocblas_sger", "library"); + subst("cublasSger_v2_64", "rocblas_sger_64", "library"); subst("cublasSnrm2", "rocblas_snrm2", "library"); subst("cublasSnrm2_64", "rocblas_snrm2_64", "library"); subst("cublasSnrm2_v2", "rocblas_snrm2", "library"); @@ -2110,9 +2118,13 @@ sub rocSubstitutions { subst("cublasZgemv_v2", "rocblas_zgemv", "library"); subst("cublasZgemv_v2_64", "rocblas_zgemv_64", "library"); subst("cublasZgerc", "rocblas_zgerc", "library"); + subst("cublasZgerc_64", "rocblas_zgerc_64", "library"); subst("cublasZgerc_v2", "rocblas_zgerc", "library"); + subst("cublasZgerc_v2_64", "rocblas_zgerc_64", "library"); subst("cublasZgeru", "rocblas_zgeru", "library"); + subst("cublasZgeru_64", "rocblas_zgeru_64", "library"); subst("cublasZgeru_v2", "rocblas_zgeru", "library"); + subst("cublasZgeru_v2_64", "rocblas_zgeru_64", "library"); subst("cublasZhbmv", "rocblas_zhbmv", "library"); subst("cublasZhbmv_64", "rocblas_zhbmv_64", "library"); subst("cublasZhbmv_v2", "rocblas_zhbmv", "library"); @@ -12687,10 +12699,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetrsBatched", "cublasZgetriBatched", "cublasZgetrfBatched", - "cublasZgeru_v2_64", - "cublasZgeru_64", - "cublasZgerc_v2_64", - "cublasZgerc_64", "cublasZgeqrfBatched", "cublasZgemm_v2_64", "cublasZgemm_64", @@ -12724,8 +12732,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgetrsBatched", "cublasSgetriBatched", "cublasSgetrfBatched", - "cublasSger_v2_64", - "cublasSger_64", "cublasSgeqrfBatched", "cublasSgemm_v2_64", "cublasSgemm_64", @@ -12858,8 +12864,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgetrsBatched", "cublasDgetriBatched", "cublasDgetrfBatched", - "cublasDger_v2_64", - "cublasDger_64", "cublasDgeqrfBatched", "cublasDgemm_v2_64", "cublasDgemm_64", @@ -12905,10 +12909,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgetrsBatched", "cublasCgetriBatched", "cublasCgetrfBatched", - "cublasCgeru_v2_64", - "cublasCgeru_64", - "cublasCgerc_v2_64", - "cublasCgerc_64", "cublasCgeqrfBatched", "cublasCgemm_v2_64", "cublasCgemm_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 05d3caaa..b24a6bb1 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -731,13 +731,13 @@ |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | |`cublasCgemv_v2_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | | |`rocblas_cgemv_64`|6.2.0| | | | | |`cublasCgerc`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgerc_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgerc_v2`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_v2_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgerc_v2_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgeru`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgeru_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasCgeru_v2`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasChbmv`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | |`cublasChbmv_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChbmv_v2`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | @@ -811,9 +811,9 @@ |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | |`cublasDgemv_v2_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | | |`rocblas_dgemv_64`|6.2.0| | | | | |`cublasDger`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | | | | | | | | +|`cublasDger_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | | | | | | | | +|`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDsbmv`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | |`cublasDsbmv_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDsbmv_v2`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | @@ -875,9 +875,9 @@ |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | |`cublasSgemv_v2_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | | |`rocblas_sgemv_64`|6.2.0| | | | | |`cublasSger`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | | | | | | | | +|`cublasSger_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | | | | | | | | +|`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSsbmv`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | |`cublasSsbmv_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSsbmv_v2`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | @@ -939,13 +939,13 @@ |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | |`cublasZgemv_v2_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | | |`rocblas_zgemv_64`|6.2.0| | | | | |`cublasZgerc`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgerc_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgerc_v2`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_v2_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgerc_v2_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgeru`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgeru_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZgeru_v2`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZhbmv`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | |`cublasZhbmv_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhbmv_v2`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 06eac47c..96e2612f 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -731,13 +731,13 @@ |`cublasCgemv_v2`| | | | |`rocblas_cgemv`|1.5.0| | | | | |`cublasCgemv_v2_64`|12.0| | | |`rocblas_cgemv_64`|6.2.0| | | | | |`cublasCgerc`| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_64`|12.0| | | | | | | | | | +|`cublasCgerc_64`|12.0| | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgerc_v2`| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_v2_64`|12.0| | | | | | | | | | +|`cublasCgerc_v2_64`|12.0| | | |`rocblas_cgerc_64`|6.2.0| | | | | |`cublasCgeru`| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_64`|12.0| | | | | | | | | | +|`cublasCgeru_64`|12.0| | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasCgeru_v2`| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_v2_64`|12.0| | | | | | | | | | +|`cublasCgeru_v2_64`|12.0| | | |`rocblas_cgeru_64`|6.2.0| | | | | |`cublasChbmv`| | | | |`rocblas_chbmv`|3.5.0| | | | | |`cublasChbmv_64`|12.0| | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChbmv_v2`| | | | |`rocblas_chbmv`|3.5.0| | | | | @@ -811,9 +811,9 @@ |`cublasDgemv_v2`| | | | |`rocblas_dgemv`|1.5.0| | | | | |`cublasDgemv_v2_64`|12.0| | | |`rocblas_dgemv_64`|6.2.0| | | | | |`cublasDger`| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_64`|12.0| | | | | | | | | | +|`cublasDger_64`|12.0| | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDger_v2`| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_v2_64`|12.0| | | | | | | | | | +|`cublasDger_v2_64`|12.0| | | |`rocblas_dger_64`|6.2.0| | | | | |`cublasDsbmv`| | | | |`rocblas_dsbmv`|3.5.0| | | | | |`cublasDsbmv_64`|12.0| | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDsbmv_v2`| | | | |`rocblas_dsbmv`|3.5.0| | | | | @@ -875,9 +875,9 @@ |`cublasSgemv_v2`| | | | |`rocblas_sgemv`|1.5.0| | | | | |`cublasSgemv_v2_64`|12.0| | | |`rocblas_sgemv_64`|6.2.0| | | | | |`cublasSger`| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_64`|12.0| | | | | | | | | | +|`cublasSger_64`|12.0| | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSger_v2`| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_v2_64`|12.0| | | | | | | | | | +|`cublasSger_v2_64`|12.0| | | |`rocblas_sger_64`|6.2.0| | | | | |`cublasSsbmv`| | | | |`rocblas_ssbmv`|3.5.0| | | | | |`cublasSsbmv_64`|12.0| | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSsbmv_v2`| | | | |`rocblas_ssbmv`|3.5.0| | | | | @@ -939,13 +939,13 @@ |`cublasZgemv_v2`| | | | |`rocblas_zgemv`|1.5.0| | | | | |`cublasZgemv_v2_64`|12.0| | | |`rocblas_zgemv_64`|6.2.0| | | | | |`cublasZgerc`| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_64`|12.0| | | | | | | | | | +|`cublasZgerc_64`|12.0| | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgerc_v2`| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_v2_64`|12.0| | | | | | | | | | +|`cublasZgerc_v2_64`|12.0| | | |`rocblas_zgerc_64`|6.2.0| | | | | |`cublasZgeru`| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_64`|12.0| | | | | | | | | | +|`cublasZgeru_64`|12.0| | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZgeru_v2`| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_v2_64`|12.0| | | | | | | | | | +|`cublasZgeru_v2_64`|12.0| | | |`rocblas_zgeru_64`|6.2.0| | | | | |`cublasZhbmv`| | | | |`rocblas_zhbmv`|3.5.0| | | | | |`cublasZhbmv_64`|12.0| | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhbmv_v2`| | | | |`rocblas_zhbmv`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 8ea0d61e..c86c7e1a 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -336,17 +336,17 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GER {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSger_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSger_64", {"hipblasSger_64", "rocblas_sger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDger_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDger_64", {"hipblasDger_64", "rocblas_dger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgeru", {"hipblasCgeru_v2", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgeru_64", {"hipblasCgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgeru_64", {"hipblasCgeru_v2_64", "rocblas_cgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgerc", {"hipblasCgerc_v2", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgerc_64", {"hipblasCgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgerc_64", {"hipblasCgerc_v2_64", "rocblas_cgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgeru", {"hipblasZgeru_v2", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgeru_64", {"hipblasZgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgeru_64", {"hipblasZgeru_v2_64", "rocblas_zgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgerc", {"hipblasZgerc_v2", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgerc_64", {"hipblasZgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgerc_64", {"hipblasZgerc_v2_64", "rocblas_zgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYR/HER {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -754,17 +754,17 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GER {"cublasSger_v2", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSger_v2_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSger_v2_64", {"hipblasSger_64", "rocblas_sger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDger_v2", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDger_v2_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDger_v2_64", {"hipblasDger_64", "rocblas_dger_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgeru_v2", {"hipblasCgeru_v2", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgeru_v2_64", {"hipblasCgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgeru_v2_64", {"hipblasCgeru_v2_64", "rocblas_cgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgerc_v2", {"hipblasCgerc_v2", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgerc_v2_64", {"hipblasCgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgerc_v2_64", {"hipblasCgerc_v2_64", "rocblas_cgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgeru_v2", {"hipblasZgeru_v2", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgeru_v2_64", {"hipblasZgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgeru_v2_64", {"hipblasZgeru_v2_64", "rocblas_zgeru_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgerc_v2", {"hipblasZgerc_v2", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgerc_v2_64", {"hipblasZgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgerc_v2_64", {"hipblasZgerc_v2_64", "rocblas_zgerc_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYR/HER {"cublasSsyr_v2", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -2397,6 +2397,12 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtpsv_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ctpsv_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ztpsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_sger_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dger_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cgeru_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zgeru_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cgerc_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zgerc_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 2b2f8881..9c9a08ee 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2937,6 +2937,48 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_ztpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); blasStatus = cublasZtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); blasStatus = cublasZtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sger_64(rocblas_handle handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // CHECK: blasStatus = rocblas_sger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + // CHECK-NEXT: blasStatus = rocblas_sger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + blasStatus = cublasSger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + blasStatus = cublasSger_v2_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dger_64(rocblas_handle handle, int64_t m, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // CHECK: blasStatus = rocblas_dger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_dger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDger_v2_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeru_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_cgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_cgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgeru_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgerc_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_cgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_cgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgerc_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeru_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgeru_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgerc_64(rocblas_handle handle, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); #endif return 0; From 8719384bf26d7aa38847aa95d8a6eac976bd57e4 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 13 Oct 2024 18:02:57 +0100 Subject: [PATCH 03/51] [HIPIFY][rocBLAS] 64-bit functions support - Step 19 + `rocblas_(s|d|c|z)trsm_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 20 ++++++++----- .../synthetic/libraries/cublas2rocblas_v2.cu | 29 +++++++++++++++++++ 5 files changed, 65 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 14d8a14d..c97c5533 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1714,7 +1714,9 @@ sub rocSubstitutions { subst("cublasCtrmv_v2_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); subst("cublasCtrsmBatched", "rocblas_ctrsm_batched", "library"); + subst("cublasCtrsm_64", "rocblas_ctrsm_64", "library"); subst("cublasCtrsm_v2", "rocblas_ctrsm", "library"); + subst("cublasCtrsm_v2_64", "rocblas_ctrsm_64", "library"); subst("cublasCtrsv", "rocblas_ctrsv", "library"); subst("cublasCtrsv_64", "rocblas_ctrsv_64", "library"); subst("cublasCtrsv_v2", "rocblas_ctrsv", "library"); @@ -1846,7 +1848,9 @@ sub rocSubstitutions { subst("cublasDtrmv_v2_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); subst("cublasDtrsmBatched", "rocblas_dtrsm_batched", "library"); + subst("cublasDtrsm_64", "rocblas_dtrsm_64", "library"); subst("cublasDtrsm_v2", "rocblas_dtrsm", "library"); + subst("cublasDtrsm_v2_64", "rocblas_dtrsm_64", "library"); subst("cublasDtrsv", "rocblas_dtrsv", "library"); subst("cublasDtrsv_64", "rocblas_dtrsv_64", "library"); subst("cublasDtrsv_v2", "rocblas_dtrsv", "library"); @@ -2062,7 +2066,9 @@ sub rocSubstitutions { subst("cublasStrmv_v2_64", "rocblas_strmv_64", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); subst("cublasStrsmBatched", "rocblas_strsm_batched", "library"); + subst("cublasStrsm_64", "rocblas_strsm_64", "library"); subst("cublasStrsm_v2", "rocblas_strsm", "library"); + subst("cublasStrsm_v2_64", "rocblas_strsm_64", "library"); subst("cublasStrsv", "rocblas_strsv", "library"); subst("cublasStrsv_64", "rocblas_strsv_64", "library"); subst("cublasStrsv_v2", "rocblas_strsv", "library"); @@ -2217,7 +2223,9 @@ sub rocSubstitutions { subst("cublasZtrmv_v2_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); subst("cublasZtrsmBatched", "rocblas_ztrsm_batched", "library"); + subst("cublasZtrsm_64", "rocblas_ztrsm_64", "library"); subst("cublasZtrsm_v2", "rocblas_ztrsm", "library"); + subst("cublasZtrsm_v2_64", "rocblas_ztrsm_64", "library"); subst("cublasZtrsv", "rocblas_ztrsv", "library"); subst("cublasZtrsv_64", "rocblas_ztrsv_64", "library"); subst("cublasZtrsv_v2", "rocblas_ztrsv", "library"); @@ -12675,8 +12683,6 @@ sub warnRocOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", - "cublasZtrsm_v2_64", - "cublasZtrsm_64", "cublasZtrsmBatched_64", "cublasZtrmm_v2_64", "cublasZtrmm_64", @@ -12714,8 +12720,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsm_v2_64", - "cublasStrsm_64", "cublasStrsmBatched_64", "cublasStrmm_v2_64", "cublasStrmm_64", @@ -12847,8 +12851,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsm_v2_64", - "cublasDtrsm_64", "cublasDtrsmBatched_64", "cublasDtrmm_v2_64", "cublasDtrmm_64", @@ -12875,8 +12877,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsm_v2_64", - "cublasCtrsm_64", "cublasCtrsmBatched_64", "cublasCtrmm_v2_64", "cublasCtrmm_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index b24a6bb1..adc47104 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1068,9 +1068,9 @@ |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsm_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`hipblasDgemmBatched`|1.8.2| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | |`cublasDgemmBatched_64`|12.0| | | | | | | | | | | | | | | | @@ -1104,9 +1104,9 @@ |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsm_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | | | | | | | |`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_batched`|6.0.0| | | | | @@ -1156,9 +1156,9 @@ |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsm_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsm_v2_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasTSSgemvBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | | | | | | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | @@ -1214,9 +1214,9 @@ |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsm_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | ## **8. BLAS-like Extension** diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 96e2612f..1ad86206 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1068,9 +1068,9 @@ |`cublasCtrmm_v2`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_v2_64`|12.0| | | | | | | | | | |`cublasCtrsm`| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_64`|12.0| | | | | | | | | | +|`cublasCtrsm_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrsm_v2_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | |`cublasDgemmBatched_64`|12.0| | | | | | | | | | @@ -1104,9 +1104,9 @@ |`cublasDtrmm_v2`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_v2_64`|12.0| | | | | | | | | | |`cublasDtrsm`| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_64`|12.0| | | | | | | | | | +|`cublasDtrsm_64`|12.0| | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrsm_v2_64`|12.0| | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | |`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | |`rocblas_hshgemv_batched`|6.0.0| | | | | @@ -1156,9 +1156,9 @@ |`cublasStrmm_v2`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_v2_64`|12.0| | | | | | | | | | |`cublasStrsm`| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_64`|12.0| | | | | | | | | | +|`cublasStrsm_64`|12.0| | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_v2_64`|12.0| | | | | | | | | | +|`cublasStrsm_v2_64`|12.0| | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasTSSgemvBatched`|11.6| | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | @@ -1214,9 +1214,9 @@ |`cublasZtrmm_v2`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_v2_64`|12.0| | | | | | | | | | |`cublasZtrsm`| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_64`|12.0| | | | | | | | | | +|`cublasZtrsm_64`|12.0| | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrsm_v2_64`|12.0| | | |`rocblas_ztrsm_64`|6.2.0| | | | | ## **8. BLAS-like Extension** diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index c86c7e1a..1dee8c87 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -541,13 +541,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrsm_64", {"hipblasStrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrsm_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrsm_64", {"hipblasDtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrsm_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasCtrsm", {"hipblasCtrsm_v2", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrsm_64", {"hipblasCtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrsm_64", {"hipblasCtrsm_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasZtrsm", {"hipblasZtrsm_v2", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrsm_64", {"hipblasZtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrsm_64", {"hipblasZtrsm_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, // TRMM {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -908,13 +908,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM {"cublasStrsm_v2", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasStrsm_v2_64", {"hipblasStrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrsm_v2_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasDtrsm_v2", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDtrsm_v2_64", {"hipblasDtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrsm_v2_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasCtrsm_v2", {"hipblasCtrsm_v2", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCtrsm_v2_64", {"hipblasCtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrsm_v2_64", {"hipblasCtrsm_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasZtrsm_v2", {"hipblasZtrsm_v2", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZtrsm_v2_64", {"hipblasZtrsm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrsm_v2_64", {"hipblasZtrsm_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, // TRMM {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2403,6 +2403,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zgeru_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_cgerc_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_zgerc_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_strsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtrsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctrsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztrsm_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 9c9a08ee..3ef155d6 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -157,6 +157,7 @@ int main() { int num = 0; int lda = 0; int64_t lda_64 = 0; + int64_t ldb_64 = 0; int ldb = 0; int ldc = 0; int res = 0; @@ -2979,6 +2980,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); blasStatus = cublasZgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); blasStatus = cublasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, float* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, float* B, int64_t ldb); + // CHECK: blasStatus = rocblas_strsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_strsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + blasStatus = cublasStrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + blasStatus = cublasStrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag,int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, double* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, double* B, int64_t ldb); + // CHECK: blasStatus = rocblas_dtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_dtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + blasStatus = cublasDtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + blasStatus = cublasDtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, cuComplex* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* B, int64_t ldb); + // CHECK: blasStatus = rocblas_ctrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_ctrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + blasStatus = cublasCtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + blasStatus = cublasCtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* B, int64_t ldb); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* B, int64_t ldb); + // CHECK: blasStatus = rocblas_ztrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + // CHECK-NEXT: blasStatus = rocblas_ztrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + blasStatus = cublasZtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + blasStatus = cublasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); #endif return 0; From dbe9fd1470d6d5776b9d39d4b688a8a8f73a53b3 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 14 Oct 2024 21:42:45 +0100 Subject: [PATCH 04/51] [HIPIFY][rocBLAS] 64-bit functions support - Step 20 + `rocblas_(s|d|c|z)trsm_batched_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 8 ++++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 ++++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 ++++---- src/CUDA2HIP_BLAS_API_functions.cpp | 12 +++++++---- .../synthetic/libraries/cublas2rocblas_v2.cu | 20 +++++++++++++++++++ 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index c97c5533..0075ea16 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1714,6 +1714,7 @@ sub rocSubstitutions { subst("cublasCtrmv_v2_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); subst("cublasCtrsmBatched", "rocblas_ctrsm_batched", "library"); + subst("cublasCtrsmBatched_64", "rocblas_ctrsm_batched_64", "library"); subst("cublasCtrsm_64", "rocblas_ctrsm_64", "library"); subst("cublasCtrsm_v2", "rocblas_ctrsm", "library"); subst("cublasCtrsm_v2_64", "rocblas_ctrsm_64", "library"); @@ -1848,6 +1849,7 @@ sub rocSubstitutions { subst("cublasDtrmv_v2_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); subst("cublasDtrsmBatched", "rocblas_dtrsm_batched", "library"); + subst("cublasDtrsmBatched_64", "rocblas_dtrsm_batched_64", "library"); subst("cublasDtrsm_64", "rocblas_dtrsm_64", "library"); subst("cublasDtrsm_v2", "rocblas_dtrsm", "library"); subst("cublasDtrsm_v2_64", "rocblas_dtrsm_64", "library"); @@ -2066,6 +2068,7 @@ sub rocSubstitutions { subst("cublasStrmv_v2_64", "rocblas_strmv_64", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); subst("cublasStrsmBatched", "rocblas_strsm_batched", "library"); + subst("cublasStrsmBatched_64", "rocblas_strsm_batched_64", "library"); subst("cublasStrsm_64", "rocblas_strsm_64", "library"); subst("cublasStrsm_v2", "rocblas_strsm", "library"); subst("cublasStrsm_v2_64", "rocblas_strsm_64", "library"); @@ -2223,6 +2226,7 @@ sub rocSubstitutions { subst("cublasZtrmv_v2_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); subst("cublasZtrsmBatched", "rocblas_ztrsm_batched", "library"); + subst("cublasZtrsmBatched_64", "rocblas_ztrsm_batched_64", "library"); subst("cublasZtrsm_64", "rocblas_ztrsm_64", "library"); subst("cublasZtrsm_v2", "rocblas_ztrsm", "library"); subst("cublasZtrsm_v2_64", "rocblas_ztrsm_64", "library"); @@ -12683,7 +12687,6 @@ sub warnRocOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", - "cublasZtrsmBatched_64", "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", @@ -12720,7 +12723,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsmBatched_64", "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", @@ -12851,7 +12853,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsmBatched_64", "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", @@ -12877,7 +12878,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsmBatched_64", "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index adc47104..5fdf8baf 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1250,7 +1250,7 @@ |`cublasCsyrkEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasCtpttr`| | | | | | | | | | | | | | | | | |`cublasCtrsmBatched`| | | | |`hipblasCtrsmBatched_v2`|6.0.0| | | | |`rocblas_ctrsm_batched`|3.5.0| | | | | -|`cublasCtrsmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsmBatched_64`|12.0| | | | | | | | | |`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | |`rocblas_ddgmm`|3.5.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | | | | | | | @@ -1268,7 +1268,7 @@ |`cublasDotcEx_64`|12.0| | | |`hipblasDotcEx_v2_64`|6.2.0| | | | |`rocblas_dotc_ex_64`|6.1.0| | | | | |`cublasDtpttr`| | | | | | | | | | | | | | | | | |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | |`rocblas_dtrsm_batched`|3.5.0| | | | | -|`cublasDtrsmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsmBatched_64`|12.0| | | | | | | | | |`rocblas_dtrsm_batched_64`|6.2.0| | | | | |`cublasDtrttp`| | | | | | | | | | | | | | | | | |`cublasGemmBatchedEx`|9.1| | | |`hipblasGemmBatchedEx_v2`|6.0.0| | | | |`rocblas_gemm_batched_ex`|3.5.0| | | | | |`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | | | | | | | @@ -1302,7 +1302,7 @@ |`cublasSmatinvBatched`| | | | | | | | | | | | | | | | | |`cublasStpttr`| | | | | | | | | | | | | | | | | |`cublasStrsmBatched`| | | | |`hipblasStrsmBatched`|3.2.0| | | | |`rocblas_strsm_batched`|3.5.0| | | | | -|`cublasStrsmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsmBatched_64`|12.0| | | | | | | | | |`rocblas_strsm_batched_64`|6.2.0| | | | | |`cublasStrttp`| | | | | | | | | | | | | | | | | |`cublasSwapEx`|10.1| | | | | | | | | | | | | | | | |`cublasSwapEx_64`|12.0| | | | | | | | | | | | | | | | @@ -1319,7 +1319,7 @@ |`cublasZmatinvBatched`| | | | | | | | | | | | | | | | | |`cublasZtpttr`| | | | | | | | | | | | | | | | | |`cublasZtrsmBatched`| | | | |`hipblasZtrsmBatched_v2`|6.0.0| | | | |`rocblas_ztrsm_batched`|3.5.0| | | | | -|`cublasZtrsmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsmBatched_64`|12.0| | | | | | | | | |`rocblas_ztrsm_batched_64`|6.2.0| | | | | |`cublasZtrttp`| | | | | | | | | | | | | | | | | ## **9. BLASLt Function Reference** diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 1ad86206..3b98b6bc 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1250,7 +1250,7 @@ |`cublasCsyrkEx_64`|12.0| | | | | | | | | | |`cublasCtpttr`| | | | | | | | | | | |`cublasCtrsmBatched`| | | | |`rocblas_ctrsm_batched`|3.5.0| | | | | -|`cublasCtrsmBatched_64`|12.0| | | | | | | | | | +|`cublasCtrsmBatched_64`|12.0| | | |`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | |`cublasDdgmm`| | | | |`rocblas_ddgmm`|3.5.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | @@ -1268,7 +1268,7 @@ |`cublasDotcEx_64`|12.0| | | |`rocblas_dotc_ex_64`|6.1.0| | | | | |`cublasDtpttr`| | | | | | | | | | | |`cublasDtrsmBatched`| | | | |`rocblas_dtrsm_batched`|3.5.0| | | | | -|`cublasDtrsmBatched_64`|12.0| | | | | | | | | | +|`cublasDtrsmBatched_64`|12.0| | | |`rocblas_dtrsm_batched_64`|6.2.0| | | | | |`cublasDtrttp`| | | | | | | | | | | |`cublasGemmBatchedEx`|9.1| | | |`rocblas_gemm_batched_ex`|3.5.0| | | | | |`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | @@ -1302,7 +1302,7 @@ |`cublasSmatinvBatched`| | | | | | | | | | | |`cublasStpttr`| | | | | | | | | | | |`cublasStrsmBatched`| | | | |`rocblas_strsm_batched`|3.5.0| | | | | -|`cublasStrsmBatched_64`|12.0| | | | | | | | | | +|`cublasStrsmBatched_64`|12.0| | | |`rocblas_strsm_batched_64`|6.2.0| | | | | |`cublasStrttp`| | | | | | | | | | | |`cublasSwapEx`|10.1| | | | | | | | | | |`cublasSwapEx_64`|12.0| | | | | | | | | | @@ -1319,7 +1319,7 @@ |`cublasZmatinvBatched`| | | | | | | | | | | |`cublasZtpttr`| | | | | | | | | | | |`cublasZtrsmBatched`| | | | |`rocblas_ztrsm_batched`|3.5.0| | | | | -|`cublasZtrsmBatched_64`|12.0| | | | | | | | | | +|`cublasZtrsmBatched_64`|12.0| | | |`rocblas_ztrsm_batched_64`|6.2.0| | | | | |`cublasZtrttp`| | | | | | | | | | | ## **9. BLASLt Function Reference** diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 1dee8c87..f1a096ba 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -590,13 +590,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM - Batched Triangular Solver {"cublasStrsmBatched", {"hipblasStrsmBatched", "rocblas_strsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasStrsmBatched_64", {"hipblasStrsmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasStrsmBatched_64", {"hipblasStrsmBatched_64", "rocblas_strsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, {"cublasDtrsmBatched", {"hipblasDtrsmBatched", "rocblas_dtrsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDtrsmBatched_64", {"hipblasDtrsmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasDtrsmBatched_64", {"hipblasDtrsmBatched_64", "rocblas_dtrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, {"cublasCtrsmBatched", {"hipblasCtrsmBatched_v2", "rocblas_ctrsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasCtrsmBatched_64", {"hipblasCtrsmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasCtrsmBatched_64", {"hipblasCtrsmBatched_64", "rocblas_ctrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, {"cublasZtrsmBatched", {"hipblasZtrsmBatched_v2", "rocblas_ztrsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasZtrsmBatched_64", {"hipblasZtrsmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasZtrsmBatched_64", {"hipblasZtrsmBatched_64", "rocblas_ztrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, // MATINV - Batched {"cublasSmatinvBatched", {"hipblasSmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -2407,6 +2407,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtrsm_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ctrsm_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ztrsm_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_strsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 3ef155d6..0b975ea5 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3008,6 +3008,26 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_ztrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); blasStatus = cublasZtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); blasStatus = cublasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* const A[], int64_t lda, float* const B[], int64_t ldb, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const float* alpha, const float* const A[], int64_t lda, float* const B[], int64_t ldb, int64_t batch_count); + // CHECK: blasStatus = rocblas_strsm_batched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, fAarray_const, lda_64, fBarray, ldb_64, batchCount_64); + blasStatus = cublasStrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, fAarray_const, lda_64, fBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* const A[], int64_t lda, double* const B[], int64_t ldb, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const double* alpha, const double* const A[], int64_t lda, double* const B[], int64_t ldb, int64_t batch_count); + // CHECK: blasStatus = rocblas_dtrsm_batched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, dAarray_const, lda_64, dBarray, ldb_64, batchCount_64); + blasStatus = cublasDtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, dAarray_const, lda_64, dBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* const A[], int64_t lda, cuComplex* const B[], int64_t ldb, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], int64_t lda, rocblas_float_complex* const B[], int64_t ldb, int64_t batch_count); + // CHECK: blasStatus = rocblas_ctrsm_batched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, complexAarray_const, lda_64, complexBarray, ldb_64, batchCount_64); + blasStatus = cublasCtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, complexAarray_const, lda_64, complexBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int64_t lda, cuDoubleComplex* const B[], int64_t ldb, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], int64_t lda, rocblas_double_complex* const B[], int64_t ldb, int64_t batch_count); + // CHECK: blasStatus = rocblas_ztrsm_batched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); + blasStatus = cublasZtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); #endif return 0; From 6e809177837801c0074350c13f1f5c4aae0bd14e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 15 Oct 2024 16:24:30 +0100 Subject: [PATCH 05/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 1 + `rocblas_(s|d|c|z|h)gemm_64` and `hipblas(|D|C|Z|H)gemm_(v2_)?64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 36 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 18 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 18 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 18 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 28 ++++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 33 +++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 33 +++++++++++++++++ 7 files changed, 130 insertions(+), 54 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 0075ea16..d1c8f4d5 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1595,7 +1595,9 @@ sub rocSubstitutions { subst("cublasCgemm", "rocblas_cgemm", "library"); subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); subst("cublasCgemmStridedBatched", "rocblas_cgemm_strided_batched", "library"); + subst("cublasCgemm_64", "rocblas_cgemm_64", "library"); subst("cublasCgemm_v2", "rocblas_cgemm", "library"); + subst("cublasCgemm_v2_64", "rocblas_cgemm_64", "library"); subst("cublasCgemv", "rocblas_cgemv", "library"); subst("cublasCgemvBatched", "rocblas_cgemv_batched", "library"); subst("cublasCgemvBatched_64", "rocblas_cgemv_batched_64", "library"); @@ -1749,7 +1751,9 @@ sub rocSubstitutions { subst("cublasDgemm", "rocblas_dgemm", "library"); subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); subst("cublasDgemmStridedBatched", "rocblas_dgemm_strided_batched", "library"); + subst("cublasDgemm_64", "rocblas_dgemm_64", "library"); subst("cublasDgemm_v2", "rocblas_dgemm", "library"); + subst("cublasDgemm_v2_64", "rocblas_dgemm_64", "library"); subst("cublasDgemv", "rocblas_dgemv", "library"); subst("cublasDgemvBatched", "rocblas_dgemv_batched", "library"); subst("cublasDgemvBatched_64", "rocblas_dgemv_batched_64", "library"); @@ -1890,6 +1894,7 @@ sub rocSubstitutions { subst("cublasHgemm", "rocblas_hgemm", "library"); subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); + subst("cublasHgemm_64", "rocblas_hgemm_64", "library"); subst("cublasIcamax", "rocblas_icamax", "library"); subst("cublasIcamax_64", "rocblas_icamax_64", "library"); subst("cublasIcamax_v2", "rocblas_icamax", "library"); @@ -1972,7 +1977,9 @@ sub rocSubstitutions { subst("cublasSgemm", "rocblas_sgemm", "library"); subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); subst("cublasSgemmStridedBatched", "rocblas_sgemm_strided_batched", "library"); + subst("cublasSgemm_64", "rocblas_sgemm_64", "library"); subst("cublasSgemm_v2", "rocblas_sgemm", "library"); + subst("cublasSgemm_v2_64", "rocblas_sgemm_64", "library"); subst("cublasSgemv", "rocblas_sgemv", "library"); subst("cublasSgemvBatched", "rocblas_sgemv_batched", "library"); subst("cublasSgemvBatched_64", "rocblas_sgemv_batched_64", "library"); @@ -2117,7 +2124,9 @@ sub rocSubstitutions { subst("cublasZgemm", "rocblas_zgemm", "library"); subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); subst("cublasZgemmStridedBatched", "rocblas_zgemm_strided_batched", "library"); + subst("cublasZgemm_64", "rocblas_zgemm_64", "library"); subst("cublasZgemm_v2", "rocblas_zgemm", "library"); + subst("cublasZgemm_v2_64", "rocblas_zgemm_64", "library"); subst("cublasZgemv", "rocblas_zgemv", "library"); subst("cublasZgemvBatched", "rocblas_zgemv_batched", "library"); subst("cublasZgemvBatched_64", "rocblas_zgemv_batched_64", "library"); @@ -4317,7 +4326,9 @@ sub simpleSubstitutions { subst("cublasCgemm", "hipblasCgemm_v2", "library"); subst("cublasCgemmBatched", "hipblasCgemmBatched_v2", "library"); subst("cublasCgemmStridedBatched", "hipblasCgemmStridedBatched_v2", "library"); + subst("cublasCgemm_64", "hipblasCgemm_v2_64", "library"); subst("cublasCgemm_v2", "hipblasCgemm_v2", "library"); + subst("cublasCgemm_v2_64", "hipblasCgemm_v2_64", "library"); subst("cublasCgemv", "hipblasCgemv_v2", "library"); subst("cublasCgemvBatched", "hipblasCgemvBatched_v2", "library"); subst("cublasCgemvBatched_64", "hipblasCgemvBatched_v2_64", "library"); @@ -4473,7 +4484,9 @@ sub simpleSubstitutions { subst("cublasDgemm", "hipblasDgemm", "library"); subst("cublasDgemmBatched", "hipblasDgemmBatched", "library"); subst("cublasDgemmStridedBatched", "hipblasDgemmStridedBatched", "library"); + subst("cublasDgemm_64", "hipblasDgemm_64", "library"); subst("cublasDgemm_v2", "hipblasDgemm", "library"); + subst("cublasDgemm_v2_64", "hipblasDgemm_64", "library"); subst("cublasDgemv", "hipblasDgemv", "library"); subst("cublasDgemvBatched", "hipblasDgemvBatched", "library"); subst("cublasDgemvBatched_64", "hipblasDgemvBatched_64", "library"); @@ -4606,6 +4619,7 @@ sub simpleSubstitutions { subst("cublasHgemm", "hipblasHgemm", "library"); subst("cublasHgemmBatched", "hipblasHgemmBatched", "library"); subst("cublasHgemmStridedBatched", "hipblasHgemmStridedBatched", "library"); + subst("cublasHgemm_64", "hipblasHgemm_64", "library"); subst("cublasIcamax", "hipblasIcamax_v2", "library"); subst("cublasIcamax_64", "hipblasIcamax_v2_64", "library"); subst("cublasIcamax_v2", "hipblasIcamax_v2", "library"); @@ -4709,7 +4723,9 @@ sub simpleSubstitutions { subst("cublasSgemm", "hipblasSgemm", "library"); subst("cublasSgemmBatched", "hipblasSgemmBatched", "library"); subst("cublasSgemmStridedBatched", "hipblasSgemmStridedBatched", "library"); + subst("cublasSgemm_64", "hipblasSgemm_64", "library"); subst("cublasSgemm_v2", "hipblasSgemm", "library"); + subst("cublasSgemm_v2_64", "hipblasSgemm_64", "library"); subst("cublasSgemv", "hipblasSgemv", "library"); subst("cublasSgemvBatched", "hipblasSgemvBatched", "library"); subst("cublasSgemvBatched_64", "hipblasSgemvBatched_64", "library"); @@ -4848,7 +4864,9 @@ sub simpleSubstitutions { subst("cublasZgemm", "hipblasZgemm_v2", "library"); subst("cublasZgemmBatched", "hipblasZgemmBatched_v2", "library"); subst("cublasZgemmStridedBatched", "hipblasZgemmStridedBatched_v2", "library"); + subst("cublasZgemm_64", "hipblasZgemm_v2_64", "library"); subst("cublasZgemm_v2", "hipblasZgemm_v2", "library"); + subst("cublasZgemm_v2_64", "hipblasZgemm_v2_64", "library"); subst("cublasZgemv", "hipblasZgemv_v2", "library"); subst("cublasZgemvBatched", "hipblasZgemvBatched_v2", "library"); subst("cublasZgemvBatched_64", "hipblasZgemvBatched_v2_64", "library"); @@ -12181,8 +12199,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZher2k_64", "cublasZhemm_v2_64", "cublasZhemm_64", - "cublasZgemm_v2_64", - "cublasZgemm_64", "cublasZgemmStridedBatched_64", "cublasZgemmBatched_64", "cublasZgemm3m_64", @@ -12217,8 +12233,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSsymm_64", "cublasSmatinvBatched", "cublasShutdown", - "cublasSgemm_v2_64", - "cublasSgemm_64", "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", @@ -12282,7 +12296,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemm_64", "cublasHgemmStridedBatched_64", "cublasHgemmBatched_64", "cublasHSSgemvStridedBatched_64", @@ -12327,8 +12340,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDsymm_v2_64", "cublasDsymm_64", "cublasDmatinvBatched", - "cublasDgemm_v2_64", - "cublasDgemm_64", "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -12368,8 +12379,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCher2k_64", "cublasChemm_v2_64", "cublasChemm_64", - "cublasCgemm_v2_64", - "cublasCgemm_64", "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", @@ -12709,8 +12718,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetriBatched", "cublasZgetrfBatched", "cublasZgeqrfBatched", - "cublasZgemm_v2_64", - "cublasZgemm_64", "cublasZgemmStridedBatched_64", "cublasZgemmBatched_64", "cublasZgemm3m_64", @@ -12739,8 +12746,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgetriBatched", "cublasSgetrfBatched", "cublasSgeqrfBatched", - "cublasSgemm_v2_64", - "cublasSgemm_64", "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", @@ -12831,7 +12836,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemm_64", "cublasHgemmStridedBatched_64", "cublasHgemmBatched_64", "cublasGetVersion_v2", @@ -12868,8 +12872,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgetriBatched", "cublasDgetrfBatched", "cublasDgeqrfBatched", - "cublasDgemm_v2_64", - "cublasDgemm_64", "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -12910,8 +12912,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgetriBatched", "cublasCgetrfBatched", "cublasCgeqrfBatched", - "cublasCgemm_v2_64", - "cublasCgemm_64", "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 2363b7e1..9ef081e8 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1028,9 +1028,9 @@ |`cublasCgemmBatched_64`|12.0| | | | | | | | | | |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasCgemm_64`|12.0| | | | | | | | | | +|`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | | -|`cublasCgemm_v2_64`|12.0| | | | | | | | | | +|`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | | @@ -1078,9 +1078,9 @@ |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | | |`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasDgemm_64`|12.0| | | | | | | | | | +|`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | | -|`cublasDgemm_v2_64`|12.0| | | | | | | | | | +|`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | | | |`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | | @@ -1122,7 +1122,7 @@ |`cublasHgemmBatched_64`|12.0| | | | | | | | | | |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasHgemm_64`|12.0| | | | | | | | | | +|`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | | |`cublasSgemmBatched_64`|12.0| | | | | | | | | | @@ -1130,9 +1130,9 @@ |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | | |`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasSgemm_64`|12.0| | | | | | | | | | +|`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | | -|`cublasSgemm_v2_64`|12.0| | | | | | | | | | +|`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | | | |`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | | @@ -1174,9 +1174,9 @@ |`cublasZgemmBatched_64`|12.0| | | | | | | | | | |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasZgemm_64`|12.0| | | | | | | | | | +|`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | | -|`cublasZgemm_v2_64`|12.0| | | | | | | | | | +|`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 5fdf8baf..067167e6 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1028,9 +1028,9 @@ |`cublasCgemmBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasCgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | |`rocblas_cgemm`|1.5.0| | | | | -|`cublasCgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | |`rocblas_cgemv_batched`|3.5.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | | |`rocblas_cgemv_batched_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | @@ -1078,9 +1078,9 @@ |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | |`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasDgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | -|`cublasDgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | |`rocblas_dgemv_batched`|3.5.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | | |`rocblas_dgemv_batched_64`|6.2.0| | | | | |`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | @@ -1122,7 +1122,7 @@ |`cublasHgemmBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasHgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | |`cublasSgemmBatched_64`|12.0| | | | | | | | | | | | | | | | @@ -1130,9 +1130,9 @@ |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | |`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasSgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | -|`cublasSgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | |`rocblas_sgemv_batched`|3.5.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | | |`rocblas_sgemv_batched_64`|6.2.0| | | | | |`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | @@ -1174,9 +1174,9 @@ |`cublasZgemmBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasZgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | |`rocblas_zgemm`|1.5.0| | | | | -|`cublasZgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | |`rocblas_zgemv_batched`|3.5.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | | |`rocblas_zgemv_batched_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 3b98b6bc..1db1997e 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1028,9 +1028,9 @@ |`cublasCgemmBatched_64`|12.0| | | | | | | | | | |`cublasCgemmStridedBatched`|8.0| | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasCgemm_64`|12.0| | | | | | | | | | +|`cublasCgemm_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`rocblas_cgemm`|1.5.0| | | | | -|`cublasCgemm_v2_64`|12.0| | | | | | | | | | +|`cublasCgemm_v2_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemvBatched`|11.6| | | |`rocblas_cgemv_batched`|3.5.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`rocblas_cgemv_batched_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | @@ -1078,9 +1078,9 @@ |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | |`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasDgemm_64`|12.0| | | | | | | | | | +|`cublasDgemm_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`rocblas_dgemm`|1.5.0| | | | | -|`cublasDgemm_v2_64`|12.0| | | | | | | | | | +|`cublasDgemm_v2_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemvBatched`|11.6| | | |`rocblas_dgemv_batched`|3.5.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`rocblas_dgemv_batched_64`|6.2.0| | | | | |`cublasDgemvStridedBatched`|11.6| | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | @@ -1122,7 +1122,7 @@ |`cublasHgemmBatched_64`|12.0| | | | | | | | | | |`cublasHgemmStridedBatched`|8.0| | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasHgemm_64`|12.0| | | | | | | | | | +|`cublasHgemm_64`|12.0| | | |`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | |`cublasSgemmBatched_64`|12.0| | | | | | | | | | @@ -1130,9 +1130,9 @@ |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | |`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasSgemm_64`|12.0| | | | | | | | | | +|`cublasSgemm_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`rocblas_sgemm`|1.5.0| | | | | -|`cublasSgemm_v2_64`|12.0| | | | | | | | | | +|`cublasSgemm_v2_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemvBatched`|11.6| | | |`rocblas_sgemv_batched`|3.5.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`rocblas_sgemv_batched_64`|6.2.0| | | | | |`cublasSgemvStridedBatched`|11.6| | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | @@ -1174,9 +1174,9 @@ |`cublasZgemmBatched_64`|12.0| | | | | | | | | | |`cublasZgemmStridedBatched`|8.0| | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasZgemm_64`|12.0| | | | | | | | | | +|`cublasZgemm_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`rocblas_zgemm`|1.5.0| | | | | -|`cublasZgemm_v2_64`|12.0| | | | | | | | | | +|`cublasZgemm_v2_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemvBatched`|11.6| | | |`rocblas_zgemv_batched`|3.5.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`rocblas_zgemv_batched_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index f1a096ba..5dfc5338 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -399,15 +399,15 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // Blas3 (v1) Routines // GEMM {"cublasSgemm", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgemm_64", {"hipblasSgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemm_64", {"hipblasSgemm_64", "rocblas_sgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDgemm", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgemm_64", {"hipblasDgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemm_64", {"hipblasDgemm_64", "rocblas_dgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm", {"hipblasCgemm_v2", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgemm_64", {"hipblasCgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemm_64", {"hipblasCgemm_v2_64", "rocblas_cgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemm", {"hipblasZgemm_v2", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgemm_64", {"hipblasZgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemm_64", {"hipblasZgemm_v2_64", "rocblas_zgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasHgemm", {"hipblasHgemm", "rocblas_hgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasHgemm_64", {"hipblasHgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHgemm_64", {"hipblasHgemm_64", "rocblas_hgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // BATCH GEMM {"cublasSgemmBatched", {"hipblasSgemmBatched", "rocblas_sgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -817,17 +817,17 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // Blas3 (v2) Routines // GEMM {"cublasSgemm_v2", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSgemm_v2_64", {"hipblasSgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemm_v2_64", {"hipblasSgemm_64", "rocblas_sgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDgemm_v2", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDgemm_v2_64", {"hipblasDgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemm_v2_64", {"hipblasDgemm_64", "rocblas_dgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm_v2", {"hipblasCgemm_v2", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemm_v2_64", {"hipblasCgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemm_v2_64", {"hipblasCgemm_v2_64", "rocblas_cgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm3m", {"hipblasCgemm3m", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3m_64", {"hipblasCgemm3m_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mEx", {"hipblasCgemm3mEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mEx_64", {"hipblasCgemm3mEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemm_v2", {"hipblasZgemm_v2", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemm_v2_64", {"hipblasZgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemm_v2_64", {"hipblasZgemm_v2_64", "rocblas_zgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemm3m", {"hipblasZgemm3m", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemm3m_64", {"hipblasZgemm3m_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -2023,6 +2023,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasNrm2Ex_v2_64", {HIP_6020, HIP_0, HIP_0, }}, {"hipblasRotEx_v2_64", {HIP_6020, HIP_0, HIP_0, }}, {"hipblasScalEx_v2_64", {HIP_6020, HIP_0, HIP_0, }}, + {"hipblasHgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2411,6 +2416,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ctrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ztrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_hgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index ff3cdbff..d3fc1ad2 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2822,6 +2822,39 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScalEx_v2_64(hipblasHandle_t handle, int64_t n, const void* alpha, hipDataType alphaType, void* x, hipDataType xType, int64_t incx, hipDataType executionType); // CHECK: blasStatus = hipblasScalEx_v2_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, Executiontype); blasStatus = cublasScalEx_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* BP, int64_t ldb, const float* beta, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* BP, int64_t ldb, const double* beta, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemm_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemm_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, const __half* B, int64_t ldb, const __half* beta, __half* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, const hipblasHalf* BP, int64_t ldb, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc); + // CHECK: blasStatus = hipblasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 0b975ea5..1bfd4810 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3028,6 +3028,39 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], int64_t lda, rocblas_double_complex* const B[], int64_t ldb, int64_t batch_count); // CHECK: blasStatus = rocblas_ztrsm_batched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); blasStatus = cublasZtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, const __half* B, int64_t ldb, const __half* beta, __half* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, const rocblas_half* B, int64_t ldb, const rocblas_half* beta, rocblas_half* C, int64_t ldc); + // CHECK: blasStatus = rocblas_hgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); #endif return 0; From 357859fbf14549589c633b5c4e783a3d538e1da5 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 16 Oct 2024 23:30:04 +0100 Subject: [PATCH 06/51] [HIPIFY][perl][fix] Fix warning reporting for unsupported APIs + [Reason] Only BLAS API were taken into account, even though we already have RAND, SPARSE, and DNN with `ROC` support + As a result, erroneous warnings were emitted by `hipify-perl` for APIs which are actually supported by `-roc` hipification + Updated the regenerated `hipify-perl` script --- bin/hipify-perl | 3100 ++++++++++++++++++++++++++--------------- src/CUDA2HIP_Perl.cpp | 4 +- 2 files changed, 2000 insertions(+), 1104 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index d1c8f4d5..489c722b 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -9110,157 +9110,6 @@ sub warnUnsupportedFunctions { "libraryPropertyType_t", "libraryPropertyType", "gesvdjInfo", - "cusparseZhybsv_solve", - "cusparseZhybsv_analysis", - "cusparseZhyb2dense", - "cusparseZhyb2csc", - "cusparseZgtsv_nopivot", - "cusparseZgtsvStridedBatch", - "cusparseZgtsv", - "cusparseZgebsr2gebsr_bufferSizeExt", - "cusparseZgebsr2gebsc_bufferSizeExt", - "cusparseZdense2hyb", - "cusparseZcsrsv_solve", - "cusparseZcsrsv_analysis", - "cusparseZcsrsm_solve", - "cusparseZcsrsm_analysis", - "cusparseZcsrmv_mp", - "cusparseZcsrilu0", - "cusparseZcsric0", - "cusparseZcsr2gebsr_bufferSizeExt", - "cusparseZcsc2hyb", - "cusparseZbsrsm2_bufferSizeExt", - "cusparseZbsrilu02_bufferSizeExt", - "cusparseZbsric02_bufferSizeExt", - "cusparseXgebsr2csr", - "cusparseSpVecDescr", - "cusparseSpSV_updateMatrix", - "cusparseSpSVUpdate_t", - "cusparseSpSM_updateMatrix", - "cusparseSpSMUpdate_t", - "cusparseSpMatSetNumBatches", - "cusparseSpMatGetNumBatches", - "cusparseSpMatDescr", - "cusparseSpMMOp_destroyPlan", - "cusparseSpMMOp_createPlan", - "cusparseSpMMOpPlan_t", - "cusparseSpMMOpPlan", - "cusparseSpMMOpAlg_t", - "cusparseSpMMOp", - "cusparseSpGEMM_getNumProducts", - "cusparseSpGEMM_estimateMemory", - "cusparseSolveAnalysisInfo_t", - "cusparseSolveAnalysisInfo", - "cusparseSideMode_t", - "cusparseShybsv_solve", - "cusparseShybsv_analysis", - "cusparseShyb2dense", - "cusparseShyb2csc", - "cusparseSgtsv_nopivot", - "cusparseSgtsvStridedBatch", - "cusparseSgtsv", - "cusparseSgebsr2gebsr_bufferSizeExt", - "cusparseSgebsr2gebsc_bufferSizeExt", - "cusparseSdense2hyb", - "cusparseScsrsv_solve", - "cusparseScsrsv_analysis", - "cusparseScsrsm_solve", - "cusparseScsrsm_analysis", - "cusparseScsrmv_mp", - "cusparseScsrilu0", - "cusparseScsric0", - "cusparseScsr2gebsr_bufferSizeExt", - "cusparseScsc2hyb", - "cusparseSbsrsm2_bufferSizeExt", - "cusparseSbsrilu02_bufferSizeExt", - "cusparseSbsric02_bufferSizeExt", - "cusparseMatDescr", - "cusparseLoggerSetMask", - "cusparseLoggerSetLevel", - "cusparseLoggerSetFile", - "cusparseLoggerSetCallback", - "cusparseLoggerOpenFile", - "cusparseLoggerForceDisable", - "cusparseLoggerCallback_t", - "cusparseHybMat", - "cusparseHpruneDense2csr_bufferSizeExt", - "cusparseHpruneDense2csrNnzByPercentage", - "cusparseHpruneDense2csrNnz", - "cusparseHpruneDense2csrByPercentage_bufferSizeExt", - "cusparseHpruneDense2csrByPercentage", - "cusparseHpruneDense2csr", - "cusparseHpruneCsr2csr_bufferSizeExt", - "cusparseHpruneCsr2csrNnzByPercentage", - "cusparseHpruneCsr2csrNnz", - "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", - "cusparseHpruneCsr2csrByPercentage", - "cusparseHpruneCsr2csr", - "cusparseGetLevelInfo", - "cusparseDnVecDescr", - "cusparseDnMatDescr", - "cusparseDhybsv_solve", - "cusparseDhybsv_analysis", - "cusparseDhyb2dense", - "cusparseDhyb2csc", - "cusparseDgtsv_nopivot", - "cusparseDgtsvStridedBatch", - "cusparseDgtsv", - "cusparseDgebsr2gebsr_bufferSizeExt", - "cusparseDgebsr2gebsc_bufferSizeExt", - "cusparseDestroySolveAnalysisInfo", - "cusparseDdense2hyb", - "cusparseDcsrsv_solve", - "cusparseDcsrsv_analysis", - "cusparseDcsrsm_solve", - "cusparseDcsrsm_analysis", - "cusparseDcsrmv_mp", - "cusparseDcsrilu0", - "cusparseDcsric0", - "cusparseDcsr2gebsr_bufferSizeExt", - "cusparseDcsc2hyb", - "cusparseDbsrsm2_bufferSizeExt", - "cusparseDbsrilu02_bufferSizeExt", - "cusparseDbsric02_bufferSizeExt", - "cusparseCsrsv_solveEx", - "cusparseCsrsv_analysisEx", - "cusparseCsrmvEx_bufferSize", - "cusparseCsrmvEx", - "cusparseCsrilu0Ex", - "cusparseCsr2cscEx", - "cusparseCreateSolveAnalysisInfo", - "cusparseCreateSlicedEll", - "cusparseCreateConstSlicedEll", - "cusparseCreateConstBsr", - "cusparseCreateBsr", - "cusparseContext", - "cusparseConstrainedGeMM_bufferSize", - "cusparseConstrainedGeMM", - "cusparseColorInfo", - "cusparseColorAlg_t", - "cusparseChybsv_solve", - "cusparseChybsv_analysis", - "cusparseChyb2dense", - "cusparseChyb2csc", - "cusparseCgtsv_nopivot", - "cusparseCgtsvStridedBatch", - "cusparseCgtsv", - "cusparseCgebsr2gebsr_bufferSizeExt", - "cusparseCgebsr2gebsc_bufferSizeExt", - "cusparseCdense2hyb", - "cusparseCcsrsv_solve", - "cusparseCcsrsv_analysis", - "cusparseCcsrsm_solve", - "cusparseCcsrsm_analysis", - "cusparseCcsrmv_mp", - "cusparseCcsrilu0", - "cusparseCcsric0", - "cusparseCcsr2gebsr_bufferSizeExt", - "cusparseCcsc2hyb", - "cusparseCbsrsm2_bufferSizeExt", - "cusparseCbsrilu02_bufferSizeExt", - "cusparseCbsric02_bufferSizeExt", - "cusparseBsrSetStridedBatch", - "cusparseAlgMode_t", "cusolverStorevMode_t", "cusolverSpZcsrzfdHost", "cusolverSpZcsrqrsvBatched", @@ -9598,25 +9447,6 @@ sub warnUnsupportedFunctions { "cusolverDnCEgels_bufferSize", "cusolverDnCEgels", "cusolverDirectMode_t", - "curand_mtgp32_specific", - "curand_mtgp32_single_specific", - "curand_mtgp32_single", - "curand_Philox4x32_10", - "curandMethod_t", - "curandMethod", - "curandHistogramM2_t", - "curandHistogramM2_st", - "curandHistogramM2V_t", - "curandHistogramM2V_st", - "curandHistogramM2K_t", - "curandHistogramM2K_st", - "curandGetProperty", - "curandDistribution_t", - "curandDistribution_st", - "curandDistributionShift_t", - "curandDistributionShift_st", - "curandDistributionM2Shift_t", - "curandDistributionM2Shift_st", "cufftXtWorkAreaPolicy_t", "cufftXtWorkAreaPolicy", "cufftXtSubFormat_t", @@ -9654,268 +9484,6 @@ sub warnUnsupportedFunctions { "cufftCompatibility", "cufftBox3d_t", "cufftBox3d", - "cudnnWgradMode_t", - "cudnnTransformTensorEx", - "cudnnTransformTensor", - "cudnnTransformFilter", - "cudnnTensorTransformStruct", - "cudnnTensorTransformDescriptor_t", - "cudnnTensorStruct", - "cudnnSpatialTransformerStruct", - "cudnnSpatialTransformerDescriptor_t", - "cudnnSpatialTfSamplerForward", - "cudnnSpatialTfSamplerBackward", - "cudnnSpatialTfGridGeneratorForward", - "cudnnSpatialTfGridGeneratorBackward", - "cudnnSignalMode_t", - "cudnnSeverity_t", - "cudnnSetTensorTransformDescriptor", - "cudnnSetTensorNdDescriptorEx", - "cudnnSetSpatialTransformerNdDescriptor", - "cudnnSetSeqDataDescriptor", - "cudnnSetRNNProjectionLayers", - "cudnnSetRNNPaddingMode", - "cudnnSetRNNMatrixMathType", - "cudnnSetRNNDescriptor_v8", - "cudnnSetRNNDataDescriptor", - "cudnnSetRNNBiasMode", - "cudnnSetRNNAlgorithmDescriptor", - "cudnnSetFusedOpsVariantParamPackAttribute", - "cudnnSetFusedOpsConstParamPackAttribute", - "cudnnSetConvolutionReorderType", - "cudnnSetCallback", - "cudnnSetCTCLossDescriptor_v9", - "cudnnSetCTCLossDescriptor_v8", - "cudnnSetCTCLossDescriptorEx", - "cudnnSetCTCLossDescriptor", - "cudnnSetAttnDescriptor", - "cudnnSetAlgorithmPerformance", - "cudnnSetAlgorithmDescriptor", - "cudnnSetActivationDescriptorSwishBeta", - "cudnnSeqDataStruct", - "cudnnSeqDataDescriptor_t", - "cudnnSeqDataAxis_t", - "cudnnSaveAlgorithm", - "cudnnSamplerType_t", - "cudnnRuntimeTag_t", - "cudnnRngDistribution_t", - "cudnnRestoreDropoutDescriptor", - "cudnnRestoreAlgorithm", - "cudnnResampleMode_t", - "cudnnReorderType_t", - "cudnnReorderFilterAndBias", - "cudnnReduceTensorStruct", - "cudnnRNNStruct", - "cudnnRNNSetClip_v9", - "cudnnRNNSetClip_v8", - "cudnnRNNSetClip", - "cudnnRNNPaddingMode_t", - "cudnnRNNGetClip_v9", - "cudnnRNNGetClip_v8", - "cudnnRNNGetClip", - "cudnnRNNForwardTrainingEx", - "cudnnRNNForwardInferenceEx", - "cudnnRNNForward", - "cudnnRNNDataStruct", - "cudnnRNNDataLayout_t", - "cudnnRNNDataDescriptor_t", - "cudnnRNNClipMode_t", - "cudnnRNNBackwardWeights_v8", - "cudnnRNNBackwardWeightsEx", - "cudnnRNNBackwardData_v8", - "cudnnRNNBackwardDataEx", - "cudnnQueryRuntimeError", - "cudnnPoolingStruct", - "cudnnPointwiseMode_t", - "cudnnPersistentRNNPlan", - "cudnnPaddingMode_t", - "cudnnOpsVersionCheck", - "cudnnOpsTrainVersionCheck", - "cudnnOpsInferVersionCheck", - "cudnnOpTensorStruct", - "cudnnNormalizationForwardTraining", - "cudnnNormalizationForwardInference", - "cudnnNormalizationBackward", - "cudnnNormOps_t", - "cudnnNormMode_t", - "cudnnNormAlgo_t", - "cudnnMultiHeadAttnWeightKind_t", - "cudnnMultiHeadAttnForward", - "cudnnMultiHeadAttnBackwardWeights", - "cudnnMultiHeadAttnBackwardData", - "cudnnMakeFusedOpsPlan", - "cudnnLossNormalizationMode_t", - "cudnnLRNStruct", - "cudnnInitTransformDest", - "cudnnIm2Col", - "cudnnGraphVersionCheck", - "cudnnGetTensorTransformDescriptor", - "cudnnGetTensorSizeInBytes", - "cudnnGetSeqDataDescriptor", - "cudnnGetReductionIndicesSize", - "cudnnGetRNNWeightSpaceSize", - "cudnnGetRNNWeightParams", - "cudnnGetRNNTempSpaceSizes", - "cudnnGetRNNProjectionLayers", - "cudnnGetRNNPaddingMode", - "cudnnGetRNNMatrixMathType", - "cudnnGetRNNForwardTrainingAlgorithmMaxCount", - "cudnnGetRNNForwardInferenceAlgorithmMaxCount", - "cudnnGetRNNDescriptor_v8", - "cudnnGetRNNDescriptor_v6", - "cudnnGetRNNDataDescriptor", - "cudnnGetRNNBiasMode", - "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", - "cudnnGetRNNBackwardDataAlgorithmMaxCount", - "cudnnGetProperty", - "cudnnGetPoolingNdForwardOutputDim", - "cudnnGetPoolingNdDescriptor", - "cudnnGetNormalizationTrainingReserveSpaceSize", - "cudnnGetNormalizationForwardTrainingWorkspaceSize", - "cudnnGetNormalizationBackwardWorkspaceSize", - "cudnnGetMultiHeadAttnWeights", - "cudnnGetMultiHeadAttnBuffers", - "cudnnGetMaxDeviceVersion", - "cudnnGetLastErrorString", - "cudnnGetFusedOpsVariantParamPackAttribute", - "cudnnGetFusedOpsConstParamPackAttribute", - "cudnnGetFoldedConvBackwardDataDescriptors", - "cudnnGetFilterSizeInBytes", - "cudnnGetDropoutDescriptor", - "cudnnGetCudartVersion", - "cudnnGetConvolutionReorderType", - "cudnnGetConvolutionNdForwardOutputDim", - "cudnnGetConvolutionNdDescriptor", - "cudnnGetConvolutionMathType", - "cudnnGetConvolutionGroupCount", - "cudnnGetConvolutionForwardAlgorithm_v7", - "cudnnGetConvolutionForwardAlgorithmMaxCount", - "cudnnGetConvolutionBackwardFilterAlgorithm_v7", - "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", - "cudnnGetConvolutionBackwardDataAlgorithm_v7", - "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", - "cudnnGetCallback", - "cudnnGetCTCLossWorkspaceSize_v8", - "cudnnGetCTCLossWorkspaceSize", - "cudnnGetCTCLossDescriptor_v9", - "cudnnGetCTCLossDescriptor_v8", - "cudnnGetCTCLossDescriptorEx", - "cudnnGetCTCLossDescriptor", - "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", - "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", - "cudnnGetBatchNormalizationBackwardExWorkspaceSize", - "cudnnGetAttnDescriptor", - "cudnnGetAlgorithmSpaceSize", - "cudnnGetAlgorithmPerformance", - "cudnnGetAlgorithmDescriptor", - "cudnnGetActivationDescriptorSwishBeta", - "cudnnGenStatsMode_t", - "cudnnFusedOps_t", - "cudnnFusedOpsVariantParamStruct", - "cudnnFusedOpsVariantParamPack_t", - "cudnnFusedOpsVariantParamLabel_t", - "cudnnFusedOpsPointerPlaceHolder_t", - "cudnnFusedOpsPlan_t", - "cudnnFusedOpsPlanStruct", - "cudnnFusedOpsExecute", - "cudnnFusedOpsConstParamStruct", - "cudnnFusedOpsConstParamPack_t", - "cudnnFusedOpsConstParamLabel_t", - "cudnnFraction_t", - "cudnnFractionStruct", - "cudnnForwardMode_t", - "cudnnFoldingDirection_t", - "cudnnFindRNNForwardTrainingAlgorithmEx", - "cudnnFindRNNForwardInferenceAlgorithmEx", - "cudnnFindRNNBackwardWeightsAlgorithmEx", - "cudnnFindRNNBackwardDataAlgorithmEx", - "cudnnFilterStruct", - "cudnnErrQueryMode_t", - "cudnnDropoutStruct", - "cudnnDropoutGetReserveSpaceSize", - "cudnnDropoutForward", - "cudnnDropoutBackward", - "cudnnDivisiveNormalizationForward", - "cudnnDivisiveNormalizationBackward", - "cudnnDivNormMode_t", - "cudnnDeterminism_t", - "cudnnDestroyTensorTransformDescriptor", - "cudnnDestroySpatialTransformerDescriptor", - "cudnnDestroySeqDataDescriptor", - "cudnnDestroyRNNDataDescriptor", - "cudnnDestroyFusedOpsVariantParamPack", - "cudnnDestroyFusedOpsPlan", - "cudnnDestroyFusedOpsConstParamPack", - "cudnnDestroyCTCLossDescriptor", - "cudnnDestroyAttnDescriptor", - "cudnnDestroyAlgorithmPerformance", - "cudnnDestroyAlgorithmDescriptor", - "cudnnDeriveNormTensorDescriptor", - "cudnnDebug_t", - "cudnnDebugStruct", - "cudnnCreateTensorTransformDescriptor", - "cudnnCreateSpatialTransformerDescriptor", - "cudnnCreateSeqDataDescriptor", - "cudnnCreateRNNDataDescriptor", - "cudnnCreateFusedOpsVariantParamPack", - "cudnnCreateFusedOpsPlan", - "cudnnCreateFusedOpsConstParamPack", - "cudnnCreateCTCLossDescriptor", - "cudnnCreateAttnDescriptor", - "cudnnCreateAlgorithmPerformance", - "cudnnCreateAlgorithmDescriptor", - "cudnnCopyAlgorithmDescriptor", - "cudnnConvolutionStruct", - "cudnnConvolutionBiasActivationForward", - "cudnnContext", - "cudnnCnnTrainVersionCheck", - "cudnnCnnInferVersionCheck", - "cudnnCallback_t", - "cudnnCTCLoss_v8", - "cudnnCTCLossStruct", - "cudnnCTCLossDescriptor_t", - "cudnnCTCLossAlgo_t", - "cudnnCTCLoss", - "cudnnCTCGradMode_t", - "cudnnBuildRNNDynamic", - "cudnnBnFinalizeStatsMode_t", - "cudnnBatchNormalizationForwardTrainingEx", - "cudnnBatchNormalizationBackwardEx", - "cudnnBatchNormOps_t", - "cudnnBackendUpdateCudaGraph", - "cudnnBackendTensorReordering_t", - "cudnnBackendSetAttribute", - "cudnnBackendPopulateCudaGraph", - "cudnnBackendNumericalNote_t", - "cudnnBackendNormMode_t", - "cudnnBackendNormFwdPhase_t", - "cudnnBackendLayoutType_t", - "cudnnBackendKnobType_t", - "cudnnBackendInitialize", - "cudnnBackendHeurMode_t", - "cudnnBackendGetAttribute", - "cudnnBackendFinalize", - "cudnnBackendExecute", - "cudnnBackendDestroyDescriptor", - "cudnnBackendDescriptor_t", - "cudnnBackendDescriptorType_t", - "cudnnBackendCreateDescriptor", - "cudnnBackendBehaviorNote_t", - "cudnnBackendAttributeType_t", - "cudnnBackendAttributeName_t", - "cudnnAttnStruct", - "cudnnAttnQueryMap_t", - "cudnnAttnDescriptor_t", - "cudnnAlgorithm_t", - "cudnnAlgorithmUnionStruct", - "cudnnAlgorithmStruct", - "cudnnAlgorithmPerformance_t", - "cudnnAlgorithmPerformanceStruct", - "cudnnAlgorithmDescriptor_t", - "cudnnAdvVersionCheck", - "cudnnAdvTrainVersionCheck", - "cudnnAdvInferVersionCheck", - "cudnnActivationStruct", "cudaWGLGetDevice", "cudaVDPAUSetVDPAUDevice", "cudaVDPAUGetDevice", @@ -10630,8 +10198,6 @@ sub warnUnsupportedFunctions { "cuArrayGetSparseProperties", "cuArrayGetPlane", "cuArrayGetMemoryRequirements", - "csrsv2Info", - "csrsm2Info", "csrqrInfo_t", "csrqrInfoHost_t", "csrqrInfoHost", @@ -10660,7 +10226,6 @@ sub warnUnsupportedFunctions { "__nv_bfloat16_raw", "__nv_bfloat162_raw", "__nv_bfloat162", - "__curand_umul", "__NV_SATFINITE", "__NV_NOSAT", "__NV_E5M2", @@ -11287,28 +10852,6 @@ sub warnUnsupportedFunctions { "CU_AD_FORMAT_BC1_UNORM_SRGB", "CU_AD_FORMAT_BC1_UNORM", "CU_AD_FORMAT_AYUV", - "CUSPARSE_SPSV_UPDATE_GENERAL", - "CUSPARSE_SPSV_UPDATE_DIAGONAL", - "CUSPARSE_SPSM_UPDATE_GENERAL", - "CUSPARSE_SPSM_UPDATE_DIAGONAL", - "CUSPARSE_SPMV_SELL_ALG1", - "CUSPARSE_SPMM_OP_ALG_DEFAULT", - "CUSPARSE_SPMM_BSR_ALG1", - "CUSPARSE_SPMMA_PREPROCESS", - "CUSPARSE_SPMMA_ALG4", - "CUSPARSE_SPMMA_ALG3", - "CUSPARSE_SPMMA_ALG2", - "CUSPARSE_SPMMA_ALG1", - "CUSPARSE_SIDE_RIGHT", - "CUSPARSE_SIDE_LEFT", - "CUSPARSE_FORMAT_SLICED_ELLPACK", - "CUSPARSE_FORMAT_BSR", - "CUSPARSE_COLOR_ALG1", - "CUSPARSE_COLOR_ALG0", - "CUSPARSE_ALG_NAIVE", - "CUSPARSE_ALG_MERGE_PATH", - "CUSPARSE_ALG1", - "CUSPARSE_ALG0", "CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED", "CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE", "CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC", @@ -11352,20 +10895,6 @@ sub warnUnsupportedFunctions { "CUSOLVER_C_16BF", "CUSOLVER_ALG_2", "CUSOLVERDN_POTRF", - "CURAND_REJECTION", - "CURAND_POISSON", - "CURAND_M2", - "CURAND_M1", - "CURAND_KNUTH", - "CURAND_ITR", - "CURAND_HITR", - "CURAND_FAST_REJECTION", - "CURAND_DISCRETE_GAUSS", - "CURAND_DEVICE_API", - "CURAND_DEFINITION", - "CURAND_CHOOSE_BEST", - "CURAND_BINARY_SEARCH", - "CURAND_3RD", "CUGLmap_flags_enum", "CUGLmap_flags", "CUFFT_XT_FORMAT_OUTPUT", @@ -11388,637 +10917,6 @@ sub warnUnsupportedFunctions { "CUFFT_COPY_DEVICE_TO_DEVICE", "CUFFT_COMPATIBILITY_FFTW_PADDING", "CUFFT_COMPATIBILITY_DEFAULT", - "CUDNN_ZERO_PAD", - "CUDNN_WGRAD_MODE_SET", - "CUDNN_WGRAD_MODE_ADD", - "CUDNN_TYPE_VOID_PTR", - "CUDNN_TYPE_TENSOR_REORDERING_MODE", - "CUDNN_TYPE_SIGNAL_MODE", - "CUDNN_TYPE_RNG_DISTRIBUTION", - "CUDNN_TYPE_RESAMPLE_MODE", - "CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", - "CUDNN_TYPE_POINTWISE_MODE", - "CUDNN_TYPE_PADDING_MODE", - "CUDNN_TYPE_NUMERICAL_NOTE", - "CUDNN_TYPE_NORM_MODE", - "CUDNN_TYPE_NORM_FWD_PHASE", - "CUDNN_TYPE_NAN_PROPOGATION", - "CUDNN_TYPE_LAYOUT_TYPE", - "CUDNN_TYPE_KNOB_TYPE", - "CUDNN_TYPE_INT64", - "CUDNN_TYPE_INT32", - "CUDNN_TYPE_HEUR_MODE", - "CUDNN_TYPE_HANDLE", - "CUDNN_TYPE_GENSTATS_MODE", - "CUDNN_TYPE_FRACTION", - "CUDNN_TYPE_FLOAT", - "CUDNN_TYPE_DOUBLE", - "CUDNN_TYPE_DATA_TYPE", - "CUDNN_TYPE_CONVOLUTION_MODE", - "CUDNN_TYPE_CHAR", - "CUDNN_TYPE_BOOLEAN", - "CUDNN_TYPE_BN_FINALIZE_STATS_MODE", - "CUDNN_TYPE_BEHAVIOR_NOTE", - "CUDNN_TYPE_BACKEND_DESCRIPTOR", - "CUDNN_TYPE_ATTRIB_NAME", - "CUDNN_TRANSFORM_UNFOLD", - "CUDNN_TRANSFORM_FOLD", - "CUDNN_TENSOR_REORDERING_NONE", - "CUDNN_TENSOR_REORDERING_INT8x32", - "CUDNN_TENSOR_REORDERING_F16x16", - "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", - "CUDNN_STATUS_VERSION_MISMATCH", - "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", - "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", - "CUDNN_STATUS_SPECIFIC_ERROR", - "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", - "CUDNN_STATUS_RUNTIME_IN_PROGRESS", - "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", - "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", - "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", - "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", - "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", - "CUDNN_STATUS_NOT_SUPPORTED_PADDING", - "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", - "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", - "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", - "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", - "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", - "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", - "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", - "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", - "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", - "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", - "CUDNN_STATUS_FULL_ERROR_CODE", - "CUDNN_STATUS_EXECUTION_FAILED_CURAND", - "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", - "CUDNN_STATUS_EXECUTION_FAILED_CUDART", - "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", - "CUDNN_STATUS_DEPRECATED", - "CUDNN_STATUS_CATEGORY", - "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", - "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", - "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", - "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", - "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", - "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", - "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", - "CUDNN_SIGNAL_WAIT", - "CUDNN_SIGNAL_SET", - "CUDNN_SEV_WARNING_EN", - "CUDNN_SEV_WARNING", - "CUDNN_SEV_INFO_EN", - "CUDNN_SEV_INFO", - "CUDNN_SEV_FATAL", - "CUDNN_SEV_ERROR_EN", - "CUDNN_SEV_ERROR", - "CUDNN_SEQDATA_VECT_DIM", - "CUDNN_SEQDATA_TIME_DIM", - "CUDNN_SEQDATA_DIM_COUNT", - "CUDNN_SEQDATA_BEAM_DIM", - "CUDNN_SEQDATA_BATCH_DIM", - "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", - "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", - "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", - "CUDNN_SCALAR_DOUBLE_BN_EPSILON", - "CUDNN_SAMPLER_BILINEAR", - "CUDNN_RNN_PADDED_IO_ENABLED", - "CUDNN_RNN_PADDED_IO_DISABLED", - "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", - "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", - "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", - "CUDNN_RNN_CLIP_NONE", - "CUDNN_RNN_CLIP_MINMAX", - "CUDNN_RNN_ALGO_COUNT", - "CUDNN_RNG_DISTRIBUTION_UNIFORM", - "CUDNN_RNG_DISTRIBUTION_NORMAL", - "CUDNN_RNG_DISTRIBUTION_BERNOULLI", - "CUDNN_RMS_NORM", - "CUDNN_RESAMPLE_NEAREST", - "CUDNN_RESAMPLE_MAXPOOL", - "CUDNN_RESAMPLE_BILINEAR", - "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", - "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", - "CUDNN_RESAMPLE_AVGPOOL", - "CUDNN_PTR_ZDATA", - "CUDNN_PTR_YSUM", - "CUDNN_PTR_YSQSUM", - "CUDNN_PTR_YDATA", - "CUDNN_PTR_XDATA", - "CUDNN_PTR_WORKSPACE", - "CUDNN_PTR_WDATA", - "CUDNN_PTR_NULL", - "CUDNN_PTR_ELEM_ALIGNED", - "CUDNN_PTR_DZDATA", - "CUDNN_PTR_DYDATA", - "CUDNN_PTR_DXDATA", - "CUDNN_PTR_DWDATA", - "CUDNN_PTR_BN_Z_EQSCALE", - "CUDNN_PTR_BN_Z_EQBIAS", - "CUDNN_PTR_BN_SCALE", - "CUDNN_PTR_BN_SAVED_MEAN", - "CUDNN_PTR_BN_SAVED_INVSTD", - "CUDNN_PTR_BN_RUNNING_VAR", - "CUDNN_PTR_BN_RUNNING_MEAN", - "CUDNN_PTR_BN_EQSCALE", - "CUDNN_PTR_BN_EQBIAS", - "CUDNN_PTR_BN_DSCALE", - "CUDNN_PTR_BN_DBIAS", - "CUDNN_PTR_BN_BIAS", - "CUDNN_PTR_ACTIVATION_BITMASK", - "CUDNN_PTR_16B_ALIGNED", - "CUDNN_POINTWISE_TANH_FWD", - "CUDNN_POINTWISE_TANH_BWD", - "CUDNN_POINTWISE_TAN", - "CUDNN_POINTWISE_SWISH_FWD", - "CUDNN_POINTWISE_SWISH_BWD", - "CUDNN_POINTWISE_SUB", - "CUDNN_POINTWISE_SQRT", - "CUDNN_POINTWISE_SOFTPLUS_FWD", - "CUDNN_POINTWISE_SOFTPLUS_BWD", - "CUDNN_POINTWISE_SIN", - "CUDNN_POINTWISE_SIGMOID_FWD", - "CUDNN_POINTWISE_SIGMOID_BWD", - "CUDNN_POINTWISE_RSQRT", - "CUDNN_POINTWISE_RELU_FWD", - "CUDNN_POINTWISE_RELU_BWD", - "CUDNN_POINTWISE_RECIPROCAL", - "CUDNN_POINTWISE_POW", - "CUDNN_POINTWISE_NEG", - "CUDNN_POINTWISE_MUL", - "CUDNN_POINTWISE_MOD", - "CUDNN_POINTWISE_MIN", - "CUDNN_POINTWISE_MAX", - "CUDNN_POINTWISE_LOGICAL_OR", - "CUDNN_POINTWISE_LOGICAL_NOT", - "CUDNN_POINTWISE_LOGICAL_AND", - "CUDNN_POINTWISE_LOG", - "CUDNN_POINTWISE_IDENTITY", - "CUDNN_POINTWISE_GEN_INDEX", - "CUDNN_POINTWISE_GELU_FWD", - "CUDNN_POINTWISE_GELU_BWD", - "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", - "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", - "CUDNN_POINTWISE_FLOOR", - "CUDNN_POINTWISE_EXP", - "CUDNN_POINTWISE_ERF", - "CUDNN_POINTWISE_ELU_FWD", - "CUDNN_POINTWISE_ELU_BWD", - "CUDNN_POINTWISE_DIV", - "CUDNN_POINTWISE_COS", - "CUDNN_POINTWISE_CMP_NEQ", - "CUDNN_POINTWISE_CMP_LT", - "CUDNN_POINTWISE_CMP_LE", - "CUDNN_POINTWISE_CMP_GT", - "CUDNN_POINTWISE_CMP_GE", - "CUDNN_POINTWISE_CMP_EQ", - "CUDNN_POINTWISE_CEIL", - "CUDNN_POINTWISE_BINARY_SELECT", - "CUDNN_POINTWISE_ATAN2", - "CUDNN_POINTWISE_ADD_SQUARE", - "CUDNN_POINTWISE_ADD", - "CUDNN_POINTWISE_ABS", - "CUDNN_PARAM_ZDESC", - "CUDNN_PARAM_ZDATA_PLACEHOLDER", - "CUDNN_PARAM_YSUM_PLACEHOLDER", - "CUDNN_PARAM_YSTATS_DESC", - "CUDNN_PARAM_YSQSUM_PLACEHOLDER", - "CUDNN_PARAM_YDESC", - "CUDNN_PARAM_YDATA_PLACEHOLDER", - "CUDNN_PARAM_XDESC", - "CUDNN_PARAM_XDATA_PLACEHOLDER", - "CUDNN_PARAM_WDESC", - "CUDNN_PARAM_WDATA_PLACEHOLDER", - "CUDNN_PARAM_DZDESC", - "CUDNN_PARAM_DZDATA_PLACEHOLDER", - "CUDNN_PARAM_DYDESC", - "CUDNN_PARAM_DYDATA_PLACEHOLDER", - "CUDNN_PARAM_DXDESC", - "CUDNN_PARAM_DXDATA_PLACEHOLDER", - "CUDNN_PARAM_DWDESC", - "CUDNN_PARAM_DWDATA_PLACEHOLDER", - "CUDNN_PARAM_CONV_DESC", - "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", - "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", - "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", - "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", - "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", - "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", - "CUDNN_PARAM_BN_MODE", - "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", - "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", - "CUDNN_PARAM_ACTIVATION_DESC", - "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", - "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", - "CUDNN_OP_TENSOR_NOT", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", - "CUDNN_NUMERICAL_NOTE_WINOGRAD", - "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", - "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", - "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", - "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", - "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", - "CUDNN_NUMERICAL_NOTE_FFT", - "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", - "CUDNN_NO_REORDER", - "CUDNN_NORM_PER_CHANNEL", - "CUDNN_NORM_PER_ACTIVATION", - "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", - "CUDNN_NORM_OPS_NORM_ACTIVATION", - "CUDNN_NORM_OPS_NORM", - "CUDNN_NORM_FWD_TRAINING", - "CUDNN_NORM_FWD_INFERENCE", - "CUDNN_NORM_ALGO_STANDARD", - "CUDNN_NORM_ALGO_PERSIST", - "CUDNN_NON_DETERMINISTIC", - "CUDNN_NEG_INF_PAD", - "CUDNN_MH_ATTN_V_WEIGHTS", - "CUDNN_MH_ATTN_V_BIASES", - "CUDNN_MH_ATTN_Q_WEIGHTS", - "CUDNN_MH_ATTN_Q_BIASES", - "CUDNN_MH_ATTN_O_WEIGHTS", - "CUDNN_MH_ATTN_O_BIASES", - "CUDNN_MH_ATTN_K_WEIGHTS", - "CUDNN_MH_ATTN_K_BIASES", - "CUDNN_LRN_MIN_N", - "CUDNN_LRN_MIN_K", - "CUDNN_LRN_MIN_BETA", - "CUDNN_LRN_MAX_N", - "CUDNN_LOSS_NORMALIZATION_SOFTMAX", - "CUDNN_LOSS_NORMALIZATION_NONE", - "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", - "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", - "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", - "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", - "CUDNN_LAYOUT_TYPE_COUNT", - "CUDNN_LAYER_NORM", - "CUDNN_KNOB_TYPE_WORKSPACE", - "CUDNN_KNOB_TYPE_WINO_TILE", - "CUDNN_KNOB_TYPE_USE_TEX", - "CUDNN_KNOB_TYPE_TILE_SIZE", - "CUDNN_KNOB_TYPE_TILE_ROWS", - "CUDNN_KNOB_TYPE_TILE_COLS", - "CUDNN_KNOB_TYPE_TILE_CGA_N", - "CUDNN_KNOB_TYPE_TILE_CGA_M", - "CUDNN_KNOB_TYPE_TILE_CGA", - "CUDNN_KNOB_TYPE_TILEK", - "CUDNN_KNOB_TYPE_SWIZZLE", - "CUDNN_KNOB_TYPE_STAGES", - "CUDNN_KNOB_TYPE_SPLIT_RS", - "CUDNN_KNOB_TYPE_SPLIT_K_SLC", - "CUDNN_KNOB_TYPE_SPLIT_K_BUF", - "CUDNN_KNOB_TYPE_SPLIT_K", - "CUDNN_KNOB_TYPE_SPLIT_H", - "CUDNN_KNOB_TYPE_SPLIT_COLS", - "CUDNN_KNOB_TYPE_SPECFILT", - "CUDNN_KNOB_TYPE_SLICED", - "CUDNN_KNOB_TYPE_SINGLEBUFFER", - "CUDNN_KNOB_TYPE_REDUCTION_MODE", - "CUDNN_KNOB_TYPE_OCCUPANCY", - "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", - "CUDNN_KNOB_TYPE_MULTIPLY", - "CUDNN_KNOB_TYPE_LOAD_SIZE", - "CUDNN_KNOB_TYPE_LDGC", - "CUDNN_KNOB_TYPE_LDGB", - "CUDNN_KNOB_TYPE_LDGA", - "CUDNN_KNOB_TYPE_KERNEL_CFG", - "CUDNN_KNOB_TYPE_KBLOCK", - "CUDNN_KNOB_TYPE_IDX_MODE", - "CUDNN_KNOB_TYPE_EDGE", - "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", - "CUDNN_KNOB_TYPE_COUNTS", - "CUDNN_KNOB_TYPE_CHUNK_K", - "CUDNN_KNOB_TYPE_BLOCK_SIZE", - "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", - "CUDNN_INSTANCE_NORM", - "CUDNN_HEUR_MODE_INSTANT", - "CUDNN_HEUR_MODE_FALLBACK", - "CUDNN_HEUR_MODE_B", - "CUDNN_HEUR_MODE_A", - "CUDNN_HEUR_MODES_COUNT", - "CUDNN_GROUP_NORM", - "CUDNN_GENSTATS_SUM_SQSUM", - "CUDNN_FWD_MODE_TRAINING", - "CUDNN_FWD_MODE_INFERENCE", - "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", - "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", - "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", - "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", - "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", - "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", - "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", - "CUDNN_FMA_MATH", - "CUDNN_ERRQUERY_RAWCODE", - "CUDNN_ERRQUERY_NONBLOCKING", - "CUDNN_ERRQUERY_BLOCKING", - "CUDNN_EDGE_VAL_PAD", - "CUDNN_DIVNORM_PRECOMPUTED_MEANS", - "CUDNN_DIM_MAX", - "CUDNN_DETERMINISTIC", - "CUDNN_DEFAULT_REORDER", - "CUDNN_DATA_UINT8x4", - "CUDNN_DATA_UINT8", - "CUDNN_DATA_INT8x32", - "CUDNN_DATA_INT64", - "CUDNN_DATA_FP8_E5M2", - "CUDNN_DATA_FP8_E4M3", - "CUDNN_DATA_FAST_FLOAT_FOR_FP8", - "CUDNN_DATA_BOOLEAN", - "CUDNN_DATA_BFLOAT16", - "CUDNN_CTC_ZERO_OOB_GRADIENTS", - "CUDNN_CTC_SKIP_OOB_GRADIENTS", - "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", - "CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", - "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", - "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", - "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", - "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", - "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", - "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", - "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", - "CUDNN_BATCH_NORM", - "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", - "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", - "CUDNN_BATCHNORM_OPS_BN", - "CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR", - "CUDNN_BACKEND_TENSOR_DESCRIPTOR", - "CUDNN_BACKEND_RNG_DESCRIPTOR", - "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", - "CUDNN_BACKEND_REDUCTION_DESCRIPTOR", - "CUDNN_BACKEND_POINTWISE_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR", - "CUDNN_BACKEND_MATMUL_DESCRIPTOR", - "CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR", - "CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR", - "CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR", - "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", - "CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR", - "CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR", - "CUDNN_BACKEND_ENGINE_DESCRIPTOR", - "CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR", - "CUDNN_BACKEND_ENGINECFG_DESCRIPTOR", - "CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", - "CUDNN_ATTR_VARIANT_PACK_WORKSPACE", - "CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", - "CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", - "CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", - "CUDNN_ATTR_TENSOR_VECTOR_COUNT", - "CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION", - "CUDNN_ATTR_TENSOR_UNIQUE_ID", - "CUDNN_ATTR_TENSOR_STRIDES", - "CUDNN_ATTR_TENSOR_REORDERING_MODE", - "CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC", - "CUDNN_ATTR_TENSOR_IS_VIRTUAL", - "CUDNN_ATTR_TENSOR_IS_BY_VALUE", - "CUDNN_ATTR_TENSOR_DIMENSIONS", - "CUDNN_ATTR_TENSOR_DATA_TYPE", - "CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", - "CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM", - "CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM", - "CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION", - "CUDNN_ATTR_RNG_NORMAL_DIST_MEAN", - "CUDNN_ATTR_RNG_DISTRIBUTION", - "CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY", - "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", - "CUDNN_ATTR_RESAMPLE_STRIDES", - "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", - "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", - "CUDNN_ATTR_RESAMPLE_POST_PADDINGS", - "CUDNN_ATTR_RESAMPLE_PADDING_MODE", - "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", - "CUDNN_ATTR_RESAMPLE_MODE", - "CUDNN_ATTR_RESAMPLE_COMP_TYPE", - "CUDNN_ATTR_REDUCTION_OPERATOR", - "CUDNN_ATTR_REDUCTION_COMP_TYPE", - "CUDNN_ATTR_POINTWISE_SWISH_BETA", - "CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", - "CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP", - "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE", - "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP", - "CUDNN_ATTR_POINTWISE_NAN_PROPAGATION", - "CUDNN_ATTR_POINTWISE_MODE", - "CUDNN_ATTR_POINTWISE_MATH_PREC", - "CUDNN_ATTR_POINTWISE_ELU_ALPHA", - "CUDNN_ATTR_POINTWISE_AXIS", - "CUDNN_ATTR_OPERATION_SIGNAL_YDESC", - "CUDNN_ATTR_OPERATION_SIGNAL_XDESC", - "CUDNN_ATTR_OPERATION_SIGNAL_VALUE", - "CUDNN_ATTR_OPERATION_SIGNAL_MODE", - "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", - "CUDNN_ATTR_OPERATION_RNG_YDESC", - "CUDNN_ATTR_OPERATION_RNG_SEED", - "CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC", - "CUDNN_ATTR_OPERATION_RNG_DESC", - "CUDNN_ATTR_OPERATION_RESHAPE_YDESC", - "CUDNN_ATTR_OPERATION_RESHAPE_XDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", - "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", - "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", - "CUDNN_ATTR_OPERATION_REDUCTION_DESC", - "CUDNN_ATTR_OPERATION_POINTWISE_YDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_XDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_TDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR", - "CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_BDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", - "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", - "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", - "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_MODE", - "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", - "CUDNN_ATTR_OPERATION_NORM_BWD_MODE", - "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", - "CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_CDESC", - "CUDNN_ATTR_OPERATION_MATMUL_BDESC", - "CUDNN_ATTR_OPERATION_MATMUL_ADESC", - "CUDNN_ATTR_OPERATION_GENSTATS_XDESC", - "CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC", - "CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC", - "CUDNN_ATTR_OPERATION_GENSTATS_MODE", - "CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA", - "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", - "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", - "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", - "CUDNN_ATTR_OPERATION_CONCAT_AXIS", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC", - "CUDNN_ATTR_OPERATIONGRAPH_OPS", - "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", - "CUDNN_ATTR_OPERATIONGRAPH_HANDLE", - "CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", - "CUDNN_ATTR_MATMUL_PADDING_VALUE", - "CUDNN_ATTR_MATMUL_COMP_TYPE", - "CUDNN_ATTR_LAYOUT_INFO_TYPES", - "CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID", - "CUDNN_ATTR_KNOB_INFO_TYPE", - "CUDNN_ATTR_KNOB_INFO_STRIDE", - "CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE", - "CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE", - "CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE", - "CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE", - "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", - "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", - "CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", - "CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", - "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", - "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES", - "CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", - "CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", - "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", - "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", - "CUDNN_ATTR_EXECUTION_PLAN_HANDLE", - "CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", - "CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", - "CUDNN_ATTR_ENGINE_SM_COUNT_TARGET", - "CUDNN_ATTR_ENGINE_OPERATION_GRAPH", - "CUDNN_ATTR_ENGINE_NUMERICAL_NOTE", - "CUDNN_ATTR_ENGINE_LAYOUT_INFO", - "CUDNN_ATTR_ENGINE_KNOB_INFO", - "CUDNN_ATTR_ENGINE_GLOBAL_INDEX", - "CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE", - "CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET", - "CUDNN_ATTR_ENGINEHEUR_RESULTS", - "CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH", - "CUDNN_ATTR_ENGINEHEUR_MODE", - "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", - "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", - "CUDNN_ATTR_ENGINECFG_KNOB_CHOICES", - "CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", - "CUDNN_ATTR_ENGINECFG_ENGINE", - "CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS", - "CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS", - "CUDNN_ATTR_CONVOLUTION_POST_PADDINGS", - "CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES", - "CUDNN_ATTR_CONVOLUTION_DILATIONS", - "CUDNN_ATTR_CONVOLUTION_CONV_MODE", - "CUDNN_ATTR_CONVOLUTION_COMP_TYPE", - "CUDNN_ATTN_WKIND_COUNT", - "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", - "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", - "CUDNN_ATTN_ENABLE_PROJ_BIASES", - "CUDNN_ATTN_DISABLE_PROJ_BIASES", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS", @@ -12177,6 +11075,438 @@ sub warnHipOnlyUnsupportedFunctions { my $line_num = shift; my $k = 0; foreach $func ( + "cusparseZhybsv_solve", + "cusparseZhybsv_analysis", + "cusparseZhyb2dense", + "cusparseZhyb2csc", + "cusparseZgtsv_nopivot", + "cusparseZgtsvStridedBatch", + "cusparseZgtsv", + "cusparseZgebsr2gebsr_bufferSizeExt", + "cusparseZgebsr2gebsc_bufferSizeExt", + "cusparseZdense2hyb", + "cusparseZcsrsv_solve", + "cusparseZcsrsv_analysis", + "cusparseZcsrsm_solve", + "cusparseZcsrsm_analysis", + "cusparseZcsrmv_mp", + "cusparseZcsrilu0", + "cusparseZcsric0", + "cusparseZcsr2gebsr_bufferSizeExt", + "cusparseZcsc2hyb", + "cusparseZbsrsm2_bufferSizeExt", + "cusparseZbsrilu02_bufferSizeExt", + "cusparseZbsric02_bufferSizeExt", + "cusparseXgebsr2csr", + "cusparseSpVecDescr", + "cusparseSpSV_updateMatrix", + "cusparseSpSVUpdate_t", + "cusparseSpSM_updateMatrix", + "cusparseSpSMUpdate_t", + "cusparseSpMatSetNumBatches", + "cusparseSpMatGetNumBatches", + "cusparseSpMatDescr", + "cusparseSpMMOp_destroyPlan", + "cusparseSpMMOp_createPlan", + "cusparseSpMMOpPlan_t", + "cusparseSpMMOpPlan", + "cusparseSpMMOpAlg_t", + "cusparseSpMMOp", + "cusparseSpGEMM_getNumProducts", + "cusparseSpGEMM_estimateMemory", + "cusparseSolveAnalysisInfo_t", + "cusparseSolveAnalysisInfo", + "cusparseSideMode_t", + "cusparseShybsv_solve", + "cusparseShybsv_analysis", + "cusparseShyb2dense", + "cusparseShyb2csc", + "cusparseSgtsv_nopivot", + "cusparseSgtsvStridedBatch", + "cusparseSgtsv", + "cusparseSgebsr2gebsr_bufferSizeExt", + "cusparseSgebsr2gebsc_bufferSizeExt", + "cusparseSdense2hyb", + "cusparseScsrsv_solve", + "cusparseScsrsv_analysis", + "cusparseScsrsm_solve", + "cusparseScsrsm_analysis", + "cusparseScsrmv_mp", + "cusparseScsrilu0", + "cusparseScsric0", + "cusparseScsr2gebsr_bufferSizeExt", + "cusparseScsc2hyb", + "cusparseSbsrsm2_bufferSizeExt", + "cusparseSbsrilu02_bufferSizeExt", + "cusparseSbsric02_bufferSizeExt", + "cusparseMatDescr", + "cusparseLoggerSetMask", + "cusparseLoggerSetLevel", + "cusparseLoggerSetFile", + "cusparseLoggerSetCallback", + "cusparseLoggerOpenFile", + "cusparseLoggerForceDisable", + "cusparseLoggerCallback_t", + "cusparseHybMat", + "cusparseHpruneDense2csr_bufferSizeExt", + "cusparseHpruneDense2csrNnzByPercentage", + "cusparseHpruneDense2csrNnz", + "cusparseHpruneDense2csrByPercentage_bufferSizeExt", + "cusparseHpruneDense2csrByPercentage", + "cusparseHpruneDense2csr", + "cusparseHpruneCsr2csr_bufferSizeExt", + "cusparseHpruneCsr2csrNnzByPercentage", + "cusparseHpruneCsr2csrNnz", + "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", + "cusparseHpruneCsr2csrByPercentage", + "cusparseHpruneCsr2csr", + "cusparseGetLevelInfo", + "cusparseDnVecDescr", + "cusparseDnMatDescr", + "cusparseDhybsv_solve", + "cusparseDhybsv_analysis", + "cusparseDhyb2dense", + "cusparseDhyb2csc", + "cusparseDgtsv_nopivot", + "cusparseDgtsvStridedBatch", + "cusparseDgtsv", + "cusparseDgebsr2gebsr_bufferSizeExt", + "cusparseDgebsr2gebsc_bufferSizeExt", + "cusparseDestroySolveAnalysisInfo", + "cusparseDdense2hyb", + "cusparseDcsrsv_solve", + "cusparseDcsrsv_analysis", + "cusparseDcsrsm_solve", + "cusparseDcsrsm_analysis", + "cusparseDcsrmv_mp", + "cusparseDcsrilu0", + "cusparseDcsric0", + "cusparseDcsr2gebsr_bufferSizeExt", + "cusparseDcsc2hyb", + "cusparseDbsrsm2_bufferSizeExt", + "cusparseDbsrilu02_bufferSizeExt", + "cusparseDbsric02_bufferSizeExt", + "cusparseCsrsv_solveEx", + "cusparseCsrsv_analysisEx", + "cusparseCsrmvEx_bufferSize", + "cusparseCsrmvEx", + "cusparseCsrilu0Ex", + "cusparseCsr2cscEx", + "cusparseCreateSolveAnalysisInfo", + "cusparseCreateSlicedEll", + "cusparseCreateConstSlicedEll", + "cusparseCreateConstBsr", + "cusparseCreateBsr", + "cusparseContext", + "cusparseConstrainedGeMM_bufferSize", + "cusparseConstrainedGeMM", + "cusparseColorInfo", + "cusparseColorAlg_t", + "cusparseChybsv_solve", + "cusparseChybsv_analysis", + "cusparseChyb2dense", + "cusparseChyb2csc", + "cusparseCgtsv_nopivot", + "cusparseCgtsvStridedBatch", + "cusparseCgtsv", + "cusparseCgebsr2gebsr_bufferSizeExt", + "cusparseCgebsr2gebsc_bufferSizeExt", + "cusparseCdense2hyb", + "cusparseCcsrsv_solve", + "cusparseCcsrsv_analysis", + "cusparseCcsrsm_solve", + "cusparseCcsrsm_analysis", + "cusparseCcsrmv_mp", + "cusparseCcsrilu0", + "cusparseCcsric0", + "cusparseCcsr2gebsr_bufferSizeExt", + "cusparseCcsc2hyb", + "cusparseCbsrsm2_bufferSizeExt", + "cusparseCbsrilu02_bufferSizeExt", + "cusparseCbsric02_bufferSizeExt", + "cusparseBsrSetStridedBatch", + "cusparseAlgMode_t", + "curand_mtgp32_specific", + "curand_mtgp32_single_specific", + "curand_mtgp32_single", + "curand_Philox4x32_10", + "curandMethod_t", + "curandMethod", + "curandHistogramM2_t", + "curandHistogramM2_st", + "curandHistogramM2V_t", + "curandHistogramM2V_st", + "curandHistogramM2K_t", + "curandHistogramM2K_st", + "curandGetProperty", + "curandDistribution_t", + "curandDistribution_st", + "curandDistributionShift_t", + "curandDistributionShift_st", + "curandDistributionM2Shift_t", + "curandDistributionM2Shift_st", + "cudnnWgradMode_t", + "cudnnTransformTensorEx", + "cudnnTransformTensor", + "cudnnTransformFilter", + "cudnnTensorTransformStruct", + "cudnnTensorTransformDescriptor_t", + "cudnnTensorStruct", + "cudnnSpatialTransformerStruct", + "cudnnSpatialTransformerDescriptor_t", + "cudnnSpatialTfSamplerForward", + "cudnnSpatialTfSamplerBackward", + "cudnnSpatialTfGridGeneratorForward", + "cudnnSpatialTfGridGeneratorBackward", + "cudnnSignalMode_t", + "cudnnSeverity_t", + "cudnnSetTensorTransformDescriptor", + "cudnnSetTensorNdDescriptorEx", + "cudnnSetSpatialTransformerNdDescriptor", + "cudnnSetSeqDataDescriptor", + "cudnnSetRNNProjectionLayers", + "cudnnSetRNNPaddingMode", + "cudnnSetRNNMatrixMathType", + "cudnnSetRNNDescriptor_v8", + "cudnnSetRNNDataDescriptor", + "cudnnSetRNNBiasMode", + "cudnnSetRNNAlgorithmDescriptor", + "cudnnSetFusedOpsVariantParamPackAttribute", + "cudnnSetFusedOpsConstParamPackAttribute", + "cudnnSetConvolutionReorderType", + "cudnnSetCallback", + "cudnnSetCTCLossDescriptor_v9", + "cudnnSetCTCLossDescriptor_v8", + "cudnnSetCTCLossDescriptorEx", + "cudnnSetCTCLossDescriptor", + "cudnnSetAttnDescriptor", + "cudnnSetAlgorithmPerformance", + "cudnnSetAlgorithmDescriptor", + "cudnnSetActivationDescriptorSwishBeta", + "cudnnSeqDataStruct", + "cudnnSeqDataDescriptor_t", + "cudnnSeqDataAxis_t", + "cudnnSaveAlgorithm", + "cudnnSamplerType_t", + "cudnnRuntimeTag_t", + "cudnnRngDistribution_t", + "cudnnRestoreDropoutDescriptor", + "cudnnRestoreAlgorithm", + "cudnnResampleMode_t", + "cudnnReorderType_t", + "cudnnReorderFilterAndBias", + "cudnnReduceTensorStruct", + "cudnnRNNStruct", + "cudnnRNNSetClip_v9", + "cudnnRNNSetClip_v8", + "cudnnRNNSetClip", + "cudnnRNNPaddingMode_t", + "cudnnRNNGetClip_v9", + "cudnnRNNGetClip_v8", + "cudnnRNNGetClip", + "cudnnRNNForwardTrainingEx", + "cudnnRNNForwardInferenceEx", + "cudnnRNNForward", + "cudnnRNNDataStruct", + "cudnnRNNDataLayout_t", + "cudnnRNNDataDescriptor_t", + "cudnnRNNClipMode_t", + "cudnnRNNBackwardWeights_v8", + "cudnnRNNBackwardWeightsEx", + "cudnnRNNBackwardData_v8", + "cudnnRNNBackwardDataEx", + "cudnnQueryRuntimeError", + "cudnnPoolingStruct", + "cudnnPointwiseMode_t", + "cudnnPersistentRNNPlan", + "cudnnPaddingMode_t", + "cudnnOpsVersionCheck", + "cudnnOpsTrainVersionCheck", + "cudnnOpsInferVersionCheck", + "cudnnOpTensorStruct", + "cudnnNormalizationForwardTraining", + "cudnnNormalizationForwardInference", + "cudnnNormalizationBackward", + "cudnnNormOps_t", + "cudnnNormMode_t", + "cudnnNormAlgo_t", + "cudnnMultiHeadAttnWeightKind_t", + "cudnnMultiHeadAttnForward", + "cudnnMultiHeadAttnBackwardWeights", + "cudnnMultiHeadAttnBackwardData", + "cudnnMakeFusedOpsPlan", + "cudnnLossNormalizationMode_t", + "cudnnLRNStruct", + "cudnnInitTransformDest", + "cudnnIm2Col", + "cudnnGraphVersionCheck", + "cudnnGetTensorTransformDescriptor", + "cudnnGetTensorSizeInBytes", + "cudnnGetSeqDataDescriptor", + "cudnnGetReductionIndicesSize", + "cudnnGetRNNWeightSpaceSize", + "cudnnGetRNNWeightParams", + "cudnnGetRNNTempSpaceSizes", + "cudnnGetRNNProjectionLayers", + "cudnnGetRNNPaddingMode", + "cudnnGetRNNMatrixMathType", + "cudnnGetRNNForwardTrainingAlgorithmMaxCount", + "cudnnGetRNNForwardInferenceAlgorithmMaxCount", + "cudnnGetRNNDescriptor_v8", + "cudnnGetRNNDescriptor_v6", + "cudnnGetRNNDataDescriptor", + "cudnnGetRNNBiasMode", + "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", + "cudnnGetRNNBackwardDataAlgorithmMaxCount", + "cudnnGetProperty", + "cudnnGetPoolingNdForwardOutputDim", + "cudnnGetPoolingNdDescriptor", + "cudnnGetNormalizationTrainingReserveSpaceSize", + "cudnnGetNormalizationForwardTrainingWorkspaceSize", + "cudnnGetNormalizationBackwardWorkspaceSize", + "cudnnGetMultiHeadAttnWeights", + "cudnnGetMultiHeadAttnBuffers", + "cudnnGetMaxDeviceVersion", + "cudnnGetLastErrorString", + "cudnnGetFusedOpsVariantParamPackAttribute", + "cudnnGetFusedOpsConstParamPackAttribute", + "cudnnGetFoldedConvBackwardDataDescriptors", + "cudnnGetFilterSizeInBytes", + "cudnnGetDropoutDescriptor", + "cudnnGetCudartVersion", + "cudnnGetConvolutionReorderType", + "cudnnGetConvolutionNdForwardOutputDim", + "cudnnGetConvolutionNdDescriptor", + "cudnnGetConvolutionMathType", + "cudnnGetConvolutionGroupCount", + "cudnnGetConvolutionForwardAlgorithm_v7", + "cudnnGetConvolutionForwardAlgorithmMaxCount", + "cudnnGetConvolutionBackwardFilterAlgorithm_v7", + "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", + "cudnnGetConvolutionBackwardDataAlgorithm_v7", + "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", + "cudnnGetCallback", + "cudnnGetCTCLossWorkspaceSize_v8", + "cudnnGetCTCLossWorkspaceSize", + "cudnnGetCTCLossDescriptor_v9", + "cudnnGetCTCLossDescriptor_v8", + "cudnnGetCTCLossDescriptorEx", + "cudnnGetCTCLossDescriptor", + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", + "cudnnGetBatchNormalizationBackwardExWorkspaceSize", + "cudnnGetAttnDescriptor", + "cudnnGetAlgorithmSpaceSize", + "cudnnGetAlgorithmPerformance", + "cudnnGetAlgorithmDescriptor", + "cudnnGetActivationDescriptorSwishBeta", + "cudnnGenStatsMode_t", + "cudnnFusedOps_t", + "cudnnFusedOpsVariantParamStruct", + "cudnnFusedOpsVariantParamPack_t", + "cudnnFusedOpsVariantParamLabel_t", + "cudnnFusedOpsPointerPlaceHolder_t", + "cudnnFusedOpsPlan_t", + "cudnnFusedOpsPlanStruct", + "cudnnFusedOpsExecute", + "cudnnFusedOpsConstParamStruct", + "cudnnFusedOpsConstParamPack_t", + "cudnnFusedOpsConstParamLabel_t", + "cudnnFraction_t", + "cudnnFractionStruct", + "cudnnForwardMode_t", + "cudnnFoldingDirection_t", + "cudnnFindRNNForwardTrainingAlgorithmEx", + "cudnnFindRNNForwardInferenceAlgorithmEx", + "cudnnFindRNNBackwardWeightsAlgorithmEx", + "cudnnFindRNNBackwardDataAlgorithmEx", + "cudnnFilterStruct", + "cudnnErrQueryMode_t", + "cudnnDropoutStruct", + "cudnnDropoutGetReserveSpaceSize", + "cudnnDropoutForward", + "cudnnDropoutBackward", + "cudnnDivisiveNormalizationForward", + "cudnnDivisiveNormalizationBackward", + "cudnnDivNormMode_t", + "cudnnDeterminism_t", + "cudnnDestroyTensorTransformDescriptor", + "cudnnDestroySpatialTransformerDescriptor", + "cudnnDestroySeqDataDescriptor", + "cudnnDestroyRNNDataDescriptor", + "cudnnDestroyFusedOpsVariantParamPack", + "cudnnDestroyFusedOpsPlan", + "cudnnDestroyFusedOpsConstParamPack", + "cudnnDestroyCTCLossDescriptor", + "cudnnDestroyAttnDescriptor", + "cudnnDestroyAlgorithmPerformance", + "cudnnDestroyAlgorithmDescriptor", + "cudnnDeriveNormTensorDescriptor", + "cudnnDebug_t", + "cudnnDebugStruct", + "cudnnCreateTensorTransformDescriptor", + "cudnnCreateSpatialTransformerDescriptor", + "cudnnCreateSeqDataDescriptor", + "cudnnCreateRNNDataDescriptor", + "cudnnCreateFusedOpsVariantParamPack", + "cudnnCreateFusedOpsPlan", + "cudnnCreateFusedOpsConstParamPack", + "cudnnCreateCTCLossDescriptor", + "cudnnCreateAttnDescriptor", + "cudnnCreateAlgorithmPerformance", + "cudnnCreateAlgorithmDescriptor", + "cudnnCopyAlgorithmDescriptor", + "cudnnConvolutionStruct", + "cudnnConvolutionBiasActivationForward", + "cudnnContext", + "cudnnCnnTrainVersionCheck", + "cudnnCnnInferVersionCheck", + "cudnnCallback_t", + "cudnnCTCLoss_v8", + "cudnnCTCLossStruct", + "cudnnCTCLossDescriptor_t", + "cudnnCTCLossAlgo_t", + "cudnnCTCLoss", + "cudnnCTCGradMode_t", + "cudnnBuildRNNDynamic", + "cudnnBnFinalizeStatsMode_t", + "cudnnBatchNormalizationForwardTrainingEx", + "cudnnBatchNormalizationBackwardEx", + "cudnnBatchNormOps_t", + "cudnnBackendUpdateCudaGraph", + "cudnnBackendTensorReordering_t", + "cudnnBackendSetAttribute", + "cudnnBackendPopulateCudaGraph", + "cudnnBackendNumericalNote_t", + "cudnnBackendNormMode_t", + "cudnnBackendNormFwdPhase_t", + "cudnnBackendLayoutType_t", + "cudnnBackendKnobType_t", + "cudnnBackendInitialize", + "cudnnBackendHeurMode_t", + "cudnnBackendGetAttribute", + "cudnnBackendFinalize", + "cudnnBackendExecute", + "cudnnBackendDestroyDescriptor", + "cudnnBackendDescriptor_t", + "cudnnBackendDescriptorType_t", + "cudnnBackendCreateDescriptor", + "cudnnBackendBehaviorNote_t", + "cudnnBackendAttributeType_t", + "cudnnBackendAttributeName_t", + "cudnnAttnStruct", + "cudnnAttnQueryMap_t", + "cudnnAttnDescriptor_t", + "cudnnAlgorithm_t", + "cudnnAlgorithmUnionStruct", + "cudnnAlgorithmStruct", + "cudnnAlgorithmPerformance_t", + "cudnnAlgorithmPerformanceStruct", + "cudnnAlgorithmDescriptor_t", + "cudnnAdvVersionCheck", + "cudnnAdvTrainVersionCheck", + "cudnnAdvInferVersionCheck", + "cudnnActivationStruct", "cublasZtrttp", "cublasZtrsm_v2_64", "cublasZtrsm_64", @@ -12396,6 +11726,676 @@ sub warnHipOnlyUnsupportedFunctions { "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", + "csrsv2Info", + "csrsm2Info", + "__curand_umul", + "CUSPARSE_SPSV_UPDATE_GENERAL", + "CUSPARSE_SPSV_UPDATE_DIAGONAL", + "CUSPARSE_SPSM_UPDATE_GENERAL", + "CUSPARSE_SPSM_UPDATE_DIAGONAL", + "CUSPARSE_SPMV_SELL_ALG1", + "CUSPARSE_SPMM_OP_ALG_DEFAULT", + "CUSPARSE_SPMM_BSR_ALG1", + "CUSPARSE_SPMMA_PREPROCESS", + "CUSPARSE_SPMMA_ALG4", + "CUSPARSE_SPMMA_ALG3", + "CUSPARSE_SPMMA_ALG2", + "CUSPARSE_SPMMA_ALG1", + "CUSPARSE_SIDE_RIGHT", + "CUSPARSE_SIDE_LEFT", + "CUSPARSE_FORMAT_SLICED_ELLPACK", + "CUSPARSE_FORMAT_BSR", + "CUSPARSE_COLOR_ALG1", + "CUSPARSE_COLOR_ALG0", + "CUSPARSE_ALG_NAIVE", + "CUSPARSE_ALG_MERGE_PATH", + "CUSPARSE_ALG1", + "CUSPARSE_ALG0", + "CURAND_REJECTION", + "CURAND_POISSON", + "CURAND_M2", + "CURAND_M1", + "CURAND_KNUTH", + "CURAND_ITR", + "CURAND_HITR", + "CURAND_FAST_REJECTION", + "CURAND_DISCRETE_GAUSS", + "CURAND_DEVICE_API", + "CURAND_DEFINITION", + "CURAND_CHOOSE_BEST", + "CURAND_BINARY_SEARCH", + "CURAND_3RD", + "CUDNN_ZERO_PAD", + "CUDNN_WGRAD_MODE_SET", + "CUDNN_WGRAD_MODE_ADD", + "CUDNN_TYPE_VOID_PTR", + "CUDNN_TYPE_TENSOR_REORDERING_MODE", + "CUDNN_TYPE_SIGNAL_MODE", + "CUDNN_TYPE_RNG_DISTRIBUTION", + "CUDNN_TYPE_RESAMPLE_MODE", + "CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", + "CUDNN_TYPE_POINTWISE_MODE", + "CUDNN_TYPE_PADDING_MODE", + "CUDNN_TYPE_NUMERICAL_NOTE", + "CUDNN_TYPE_NORM_MODE", + "CUDNN_TYPE_NORM_FWD_PHASE", + "CUDNN_TYPE_NAN_PROPOGATION", + "CUDNN_TYPE_LAYOUT_TYPE", + "CUDNN_TYPE_KNOB_TYPE", + "CUDNN_TYPE_INT64", + "CUDNN_TYPE_INT32", + "CUDNN_TYPE_HEUR_MODE", + "CUDNN_TYPE_HANDLE", + "CUDNN_TYPE_GENSTATS_MODE", + "CUDNN_TYPE_FRACTION", + "CUDNN_TYPE_FLOAT", + "CUDNN_TYPE_DOUBLE", + "CUDNN_TYPE_DATA_TYPE", + "CUDNN_TYPE_CONVOLUTION_MODE", + "CUDNN_TYPE_CHAR", + "CUDNN_TYPE_BOOLEAN", + "CUDNN_TYPE_BN_FINALIZE_STATS_MODE", + "CUDNN_TYPE_BEHAVIOR_NOTE", + "CUDNN_TYPE_BACKEND_DESCRIPTOR", + "CUDNN_TYPE_ATTRIB_NAME", + "CUDNN_TRANSFORM_UNFOLD", + "CUDNN_TRANSFORM_FOLD", + "CUDNN_TENSOR_REORDERING_NONE", + "CUDNN_TENSOR_REORDERING_INT8x32", + "CUDNN_TENSOR_REORDERING_F16x16", + "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", + "CUDNN_STATUS_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", + "CUDNN_STATUS_SPECIFIC_ERROR", + "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", + "CUDNN_STATUS_RUNTIME_IN_PROGRESS", + "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", + "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", + "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", + "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", + "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", + "CUDNN_STATUS_NOT_SUPPORTED_PADDING", + "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", + "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", + "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", + "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", + "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", + "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", + "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_FULL_ERROR_CODE", + "CUDNN_STATUS_EXECUTION_FAILED_CURAND", + "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", + "CUDNN_STATUS_EXECUTION_FAILED_CUDART", + "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", + "CUDNN_STATUS_DEPRECATED", + "CUDNN_STATUS_CATEGORY", + "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", + "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", + "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", + "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", + "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", + "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", + "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", + "CUDNN_SIGNAL_WAIT", + "CUDNN_SIGNAL_SET", + "CUDNN_SEV_WARNING_EN", + "CUDNN_SEV_WARNING", + "CUDNN_SEV_INFO_EN", + "CUDNN_SEV_INFO", + "CUDNN_SEV_FATAL", + "CUDNN_SEV_ERROR_EN", + "CUDNN_SEV_ERROR", + "CUDNN_SEQDATA_VECT_DIM", + "CUDNN_SEQDATA_TIME_DIM", + "CUDNN_SEQDATA_DIM_COUNT", + "CUDNN_SEQDATA_BEAM_DIM", + "CUDNN_SEQDATA_BATCH_DIM", + "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", + "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", + "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", + "CUDNN_SCALAR_DOUBLE_BN_EPSILON", + "CUDNN_SAMPLER_BILINEAR", + "CUDNN_RNN_PADDED_IO_ENABLED", + "CUDNN_RNN_PADDED_IO_DISABLED", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", + "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", + "CUDNN_RNN_CLIP_NONE", + "CUDNN_RNN_CLIP_MINMAX", + "CUDNN_RNN_ALGO_COUNT", + "CUDNN_RNG_DISTRIBUTION_UNIFORM", + "CUDNN_RNG_DISTRIBUTION_NORMAL", + "CUDNN_RNG_DISTRIBUTION_BERNOULLI", + "CUDNN_RMS_NORM", + "CUDNN_RESAMPLE_NEAREST", + "CUDNN_RESAMPLE_MAXPOOL", + "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL", + "CUDNN_PTR_ZDATA", + "CUDNN_PTR_YSUM", + "CUDNN_PTR_YSQSUM", + "CUDNN_PTR_YDATA", + "CUDNN_PTR_XDATA", + "CUDNN_PTR_WORKSPACE", + "CUDNN_PTR_WDATA", + "CUDNN_PTR_NULL", + "CUDNN_PTR_ELEM_ALIGNED", + "CUDNN_PTR_DZDATA", + "CUDNN_PTR_DYDATA", + "CUDNN_PTR_DXDATA", + "CUDNN_PTR_DWDATA", + "CUDNN_PTR_BN_Z_EQSCALE", + "CUDNN_PTR_BN_Z_EQBIAS", + "CUDNN_PTR_BN_SCALE", + "CUDNN_PTR_BN_SAVED_MEAN", + "CUDNN_PTR_BN_SAVED_INVSTD", + "CUDNN_PTR_BN_RUNNING_VAR", + "CUDNN_PTR_BN_RUNNING_MEAN", + "CUDNN_PTR_BN_EQSCALE", + "CUDNN_PTR_BN_EQBIAS", + "CUDNN_PTR_BN_DSCALE", + "CUDNN_PTR_BN_DBIAS", + "CUDNN_PTR_BN_BIAS", + "CUDNN_PTR_ACTIVATION_BITMASK", + "CUDNN_PTR_16B_ALIGNED", + "CUDNN_POINTWISE_TANH_FWD", + "CUDNN_POINTWISE_TANH_BWD", + "CUDNN_POINTWISE_TAN", + "CUDNN_POINTWISE_SWISH_FWD", + "CUDNN_POINTWISE_SWISH_BWD", + "CUDNN_POINTWISE_SUB", + "CUDNN_POINTWISE_SQRT", + "CUDNN_POINTWISE_SOFTPLUS_FWD", + "CUDNN_POINTWISE_SOFTPLUS_BWD", + "CUDNN_POINTWISE_SIN", + "CUDNN_POINTWISE_SIGMOID_FWD", + "CUDNN_POINTWISE_SIGMOID_BWD", + "CUDNN_POINTWISE_RSQRT", + "CUDNN_POINTWISE_RELU_FWD", + "CUDNN_POINTWISE_RELU_BWD", + "CUDNN_POINTWISE_RECIPROCAL", + "CUDNN_POINTWISE_POW", + "CUDNN_POINTWISE_NEG", + "CUDNN_POINTWISE_MUL", + "CUDNN_POINTWISE_MOD", + "CUDNN_POINTWISE_MIN", + "CUDNN_POINTWISE_MAX", + "CUDNN_POINTWISE_LOGICAL_OR", + "CUDNN_POINTWISE_LOGICAL_NOT", + "CUDNN_POINTWISE_LOGICAL_AND", + "CUDNN_POINTWISE_LOG", + "CUDNN_POINTWISE_IDENTITY", + "CUDNN_POINTWISE_GEN_INDEX", + "CUDNN_POINTWISE_GELU_FWD", + "CUDNN_POINTWISE_GELU_BWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", + "CUDNN_POINTWISE_FLOOR", + "CUDNN_POINTWISE_EXP", + "CUDNN_POINTWISE_ERF", + "CUDNN_POINTWISE_ELU_FWD", + "CUDNN_POINTWISE_ELU_BWD", + "CUDNN_POINTWISE_DIV", + "CUDNN_POINTWISE_COS", + "CUDNN_POINTWISE_CMP_NEQ", + "CUDNN_POINTWISE_CMP_LT", + "CUDNN_POINTWISE_CMP_LE", + "CUDNN_POINTWISE_CMP_GT", + "CUDNN_POINTWISE_CMP_GE", + "CUDNN_POINTWISE_CMP_EQ", + "CUDNN_POINTWISE_CEIL", + "CUDNN_POINTWISE_BINARY_SELECT", + "CUDNN_POINTWISE_ATAN2", + "CUDNN_POINTWISE_ADD_SQUARE", + "CUDNN_POINTWISE_ADD", + "CUDNN_POINTWISE_ABS", + "CUDNN_PARAM_ZDESC", + "CUDNN_PARAM_ZDATA_PLACEHOLDER", + "CUDNN_PARAM_YSUM_PLACEHOLDER", + "CUDNN_PARAM_YSTATS_DESC", + "CUDNN_PARAM_YSQSUM_PLACEHOLDER", + "CUDNN_PARAM_YDESC", + "CUDNN_PARAM_YDATA_PLACEHOLDER", + "CUDNN_PARAM_XDESC", + "CUDNN_PARAM_XDATA_PLACEHOLDER", + "CUDNN_PARAM_WDESC", + "CUDNN_PARAM_WDATA_PLACEHOLDER", + "CUDNN_PARAM_DZDESC", + "CUDNN_PARAM_DZDATA_PLACEHOLDER", + "CUDNN_PARAM_DYDESC", + "CUDNN_PARAM_DYDATA_PLACEHOLDER", + "CUDNN_PARAM_DXDESC", + "CUDNN_PARAM_DXDATA_PLACEHOLDER", + "CUDNN_PARAM_DWDESC", + "CUDNN_PARAM_DWDATA_PLACEHOLDER", + "CUDNN_PARAM_CONV_DESC", + "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", + "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_MODE", + "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_DESC", + "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", + "CUDNN_OP_TENSOR_NOT", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", + "CUDNN_NUMERICAL_NOTE_WINOGRAD", + "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", + "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", + "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", + "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", + "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", + "CUDNN_NUMERICAL_NOTE_FFT", + "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", + "CUDNN_NO_REORDER", + "CUDNN_NORM_PER_CHANNEL", + "CUDNN_NORM_PER_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ACTIVATION", + "CUDNN_NORM_OPS_NORM", + "CUDNN_NORM_FWD_TRAINING", + "CUDNN_NORM_FWD_INFERENCE", + "CUDNN_NORM_ALGO_STANDARD", + "CUDNN_NORM_ALGO_PERSIST", + "CUDNN_NON_DETERMINISTIC", + "CUDNN_NEG_INF_PAD", + "CUDNN_MH_ATTN_V_WEIGHTS", + "CUDNN_MH_ATTN_V_BIASES", + "CUDNN_MH_ATTN_Q_WEIGHTS", + "CUDNN_MH_ATTN_Q_BIASES", + "CUDNN_MH_ATTN_O_WEIGHTS", + "CUDNN_MH_ATTN_O_BIASES", + "CUDNN_MH_ATTN_K_WEIGHTS", + "CUDNN_MH_ATTN_K_BIASES", + "CUDNN_LRN_MIN_N", + "CUDNN_LRN_MIN_K", + "CUDNN_LRN_MIN_BETA", + "CUDNN_LRN_MAX_N", + "CUDNN_LOSS_NORMALIZATION_SOFTMAX", + "CUDNN_LOSS_NORMALIZATION_NONE", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", + "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", + "CUDNN_LAYOUT_TYPE_COUNT", + "CUDNN_LAYER_NORM", + "CUDNN_KNOB_TYPE_WORKSPACE", + "CUDNN_KNOB_TYPE_WINO_TILE", + "CUDNN_KNOB_TYPE_USE_TEX", + "CUDNN_KNOB_TYPE_TILE_SIZE", + "CUDNN_KNOB_TYPE_TILE_ROWS", + "CUDNN_KNOB_TYPE_TILE_COLS", + "CUDNN_KNOB_TYPE_TILE_CGA_N", + "CUDNN_KNOB_TYPE_TILE_CGA_M", + "CUDNN_KNOB_TYPE_TILE_CGA", + "CUDNN_KNOB_TYPE_TILEK", + "CUDNN_KNOB_TYPE_SWIZZLE", + "CUDNN_KNOB_TYPE_STAGES", + "CUDNN_KNOB_TYPE_SPLIT_RS", + "CUDNN_KNOB_TYPE_SPLIT_K_SLC", + "CUDNN_KNOB_TYPE_SPLIT_K_BUF", + "CUDNN_KNOB_TYPE_SPLIT_K", + "CUDNN_KNOB_TYPE_SPLIT_H", + "CUDNN_KNOB_TYPE_SPLIT_COLS", + "CUDNN_KNOB_TYPE_SPECFILT", + "CUDNN_KNOB_TYPE_SLICED", + "CUDNN_KNOB_TYPE_SINGLEBUFFER", + "CUDNN_KNOB_TYPE_REDUCTION_MODE", + "CUDNN_KNOB_TYPE_OCCUPANCY", + "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", + "CUDNN_KNOB_TYPE_MULTIPLY", + "CUDNN_KNOB_TYPE_LOAD_SIZE", + "CUDNN_KNOB_TYPE_LDGC", + "CUDNN_KNOB_TYPE_LDGB", + "CUDNN_KNOB_TYPE_LDGA", + "CUDNN_KNOB_TYPE_KERNEL_CFG", + "CUDNN_KNOB_TYPE_KBLOCK", + "CUDNN_KNOB_TYPE_IDX_MODE", + "CUDNN_KNOB_TYPE_EDGE", + "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", + "CUDNN_KNOB_TYPE_COUNTS", + "CUDNN_KNOB_TYPE_CHUNK_K", + "CUDNN_KNOB_TYPE_BLOCK_SIZE", + "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", + "CUDNN_INSTANCE_NORM", + "CUDNN_HEUR_MODE_INSTANT", + "CUDNN_HEUR_MODE_FALLBACK", + "CUDNN_HEUR_MODE_B", + "CUDNN_HEUR_MODE_A", + "CUDNN_HEUR_MODES_COUNT", + "CUDNN_GROUP_NORM", + "CUDNN_GENSTATS_SUM_SQSUM", + "CUDNN_FWD_MODE_TRAINING", + "CUDNN_FWD_MODE_INFERENCE", + "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", + "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", + "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_FMA_MATH", + "CUDNN_ERRQUERY_RAWCODE", + "CUDNN_ERRQUERY_NONBLOCKING", + "CUDNN_ERRQUERY_BLOCKING", + "CUDNN_EDGE_VAL_PAD", + "CUDNN_DIVNORM_PRECOMPUTED_MEANS", + "CUDNN_DIM_MAX", + "CUDNN_DETERMINISTIC", + "CUDNN_DEFAULT_REORDER", + "CUDNN_DATA_UINT8x4", + "CUDNN_DATA_UINT8", + "CUDNN_DATA_INT8x32", + "CUDNN_DATA_INT64", + "CUDNN_DATA_FP8_E5M2", + "CUDNN_DATA_FP8_E4M3", + "CUDNN_DATA_FAST_FLOAT_FOR_FP8", + "CUDNN_DATA_BOOLEAN", + "CUDNN_DATA_BFLOAT16", + "CUDNN_CTC_ZERO_OOB_GRADIENTS", + "CUDNN_CTC_SKIP_OOB_GRADIENTS", + "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", + "CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", + "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", + "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", + "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", + "CUDNN_BATCH_NORM", + "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN", + "CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR", + "CUDNN_BACKEND_TENSOR_DESCRIPTOR", + "CUDNN_BACKEND_RNG_DESCRIPTOR", + "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", + "CUDNN_BACKEND_REDUCTION_DESCRIPTOR", + "CUDNN_BACKEND_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR", + "CUDNN_BACKEND_MATMUL_DESCRIPTOR", + "CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR", + "CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR", + "CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR", + "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", + "CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR", + "CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR", + "CUDNN_BACKEND_ENGINE_DESCRIPTOR", + "CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR", + "CUDNN_BACKEND_ENGINECFG_DESCRIPTOR", + "CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", + "CUDNN_ATTR_VARIANT_PACK_WORKSPACE", + "CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", + "CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", + "CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", + "CUDNN_ATTR_TENSOR_VECTOR_COUNT", + "CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION", + "CUDNN_ATTR_TENSOR_UNIQUE_ID", + "CUDNN_ATTR_TENSOR_STRIDES", + "CUDNN_ATTR_TENSOR_REORDERING_MODE", + "CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC", + "CUDNN_ATTR_TENSOR_IS_VIRTUAL", + "CUDNN_ATTR_TENSOR_IS_BY_VALUE", + "CUDNN_ATTR_TENSOR_DIMENSIONS", + "CUDNN_ATTR_TENSOR_DATA_TYPE", + "CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", + "CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM", + "CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM", + "CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION", + "CUDNN_ATTR_RNG_NORMAL_DIST_MEAN", + "CUDNN_ATTR_RNG_DISTRIBUTION", + "CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY", + "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", + "CUDNN_ATTR_RESAMPLE_STRIDES", + "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", + "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", + "CUDNN_ATTR_RESAMPLE_POST_PADDINGS", + "CUDNN_ATTR_RESAMPLE_PADDING_MODE", + "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", + "CUDNN_ATTR_RESAMPLE_MODE", + "CUDNN_ATTR_RESAMPLE_COMP_TYPE", + "CUDNN_ATTR_REDUCTION_OPERATOR", + "CUDNN_ATTR_REDUCTION_COMP_TYPE", + "CUDNN_ATTR_POINTWISE_SWISH_BETA", + "CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", + "CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP", + "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE", + "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP", + "CUDNN_ATTR_POINTWISE_NAN_PROPAGATION", + "CUDNN_ATTR_POINTWISE_MODE", + "CUDNN_ATTR_POINTWISE_MATH_PREC", + "CUDNN_ATTR_POINTWISE_ELU_ALPHA", + "CUDNN_ATTR_POINTWISE_AXIS", + "CUDNN_ATTR_OPERATION_SIGNAL_YDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_XDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_VALUE", + "CUDNN_ATTR_OPERATION_SIGNAL_MODE", + "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", + "CUDNN_ATTR_OPERATION_RNG_YDESC", + "CUDNN_ATTR_OPERATION_RNG_SEED", + "CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC", + "CUDNN_ATTR_OPERATION_RNG_DESC", + "CUDNN_ATTR_OPERATION_RESHAPE_YDESC", + "CUDNN_ATTR_OPERATION_RESHAPE_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", + "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", + "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", + "CUDNN_ATTR_OPERATION_REDUCTION_DESC", + "CUDNN_ATTR_OPERATION_POINTWISE_YDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_XDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_TDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR", + "CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_BDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", + "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", + "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_BWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", + "CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_CDESC", + "CUDNN_ATTR_OPERATION_MATMUL_BDESC", + "CUDNN_ATTR_OPERATION_MATMUL_ADESC", + "CUDNN_ATTR_OPERATION_GENSTATS_XDESC", + "CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC", + "CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC", + "CUDNN_ATTR_OPERATION_GENSTATS_MODE", + "CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA", + "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", + "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", + "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", + "CUDNN_ATTR_OPERATION_CONCAT_AXIS", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC", + "CUDNN_ATTR_OPERATIONGRAPH_OPS", + "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", + "CUDNN_ATTR_OPERATIONGRAPH_HANDLE", + "CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", + "CUDNN_ATTR_MATMUL_PADDING_VALUE", + "CUDNN_ATTR_MATMUL_COMP_TYPE", + "CUDNN_ATTR_LAYOUT_INFO_TYPES", + "CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID", + "CUDNN_ATTR_KNOB_INFO_TYPE", + "CUDNN_ATTR_KNOB_INFO_STRIDE", + "CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE", + "CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE", + "CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE", + "CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE", + "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", + "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", + "CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", + "CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", + "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", + "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES", + "CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", + "CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", + "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", + "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", + "CUDNN_ATTR_EXECUTION_PLAN_HANDLE", + "CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", + "CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", + "CUDNN_ATTR_ENGINE_SM_COUNT_TARGET", + "CUDNN_ATTR_ENGINE_OPERATION_GRAPH", + "CUDNN_ATTR_ENGINE_NUMERICAL_NOTE", + "CUDNN_ATTR_ENGINE_LAYOUT_INFO", + "CUDNN_ATTR_ENGINE_KNOB_INFO", + "CUDNN_ATTR_ENGINE_GLOBAL_INDEX", + "CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE", + "CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET", + "CUDNN_ATTR_ENGINEHEUR_RESULTS", + "CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH", + "CUDNN_ATTR_ENGINEHEUR_MODE", + "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", + "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", + "CUDNN_ATTR_ENGINECFG_KNOB_CHOICES", + "CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", + "CUDNN_ATTR_ENGINECFG_ENGINE", + "CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS", + "CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS", + "CUDNN_ATTR_CONVOLUTION_POST_PADDINGS", + "CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES", + "CUDNN_ATTR_CONVOLUTION_DILATIONS", + "CUDNN_ATTR_CONVOLUTION_CONV_MODE", + "CUDNN_ATTR_CONVOLUTION_COMP_TYPE", + "CUDNN_ATTN_WKIND_COUNT", + "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", + "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", + "CUDNN_ATTN_ENABLE_PROJ_BIASES", + "CUDNN_ATTN_DISABLE_PROJ_BIASES", "CUDA_R_8F_E5M2", "CUDA_R_8F_E4M3", "CUDA_R_64U", @@ -12695,6 +12695,513 @@ sub warnRocOnlyUnsupportedFunctions { my $line_num = shift; my $k = 0; foreach $func ( + "cusparseZhybsv_solve", + "cusparseZhybsv_analysis", + "cusparseZhyb2dense", + "cusparseZhyb2csr", + "cusparseZhyb2csc", + "cusparseZgtsv_nopivot", + "cusparseZgtsvStridedBatch", + "cusparseZgtsv", + "cusparseZgemmi", + "cusparseZgebsr2gebsr_bufferSizeExt", + "cusparseZgebsr2gebsc_bufferSizeExt", + "cusparseZdense2hyb", + "cusparseZcsru2csr_bufferSizeExt", + "cusparseZcsru2csr", + "cusparseZcsrsv_solve", + "cusparseZcsrsv_analysis", + "cusparseZcsrsm_solve", + "cusparseZcsrsm_analysis", + "cusparseZcsrmv_mp", + "cusparseZcsrilu0", + "cusparseZcsric0", + "cusparseZcsrgemm", + "cusparseZcsrgeam2_bufferSizeExt", + "cusparseZcsr2gebsr_bufferSizeExt", + "cusparseZcsr2csru", + "cusparseZcsr2csc", + "cusparseZcsc2hyb", + "cusparseZbsrsm2_bufferSizeExt", + "cusparseZbsrilu02_bufferSizeExt", + "cusparseZbsric02_bufferSizeExt", + "cusparseXgebsr2csr", + "cusparseXcsrgemmNnz", + "cusparseSpSV_updateMatrix", + "cusparseSpSV_solve", + "cusparseSpSV_destroyDescr", + "cusparseSpSV_createDescr", + "cusparseSpSV_analysis", + "cusparseSpSVUpdate_t", + "cusparseSpSVDescr_t", + "cusparseSpSVDescr", + "cusparseSpSM_updateMatrix", + "cusparseSpSM_destroyDescr", + "cusparseSpSM_createDescr", + "cusparseSpSM_bufferSize", + "cusparseSpSMUpdate_t", + "cusparseSpSMDescr_t", + "cusparseSpSMDescr", + "cusparseSpMatSetNumBatches", + "cusparseSpMatGetNumBatches", + "cusparseSpMV_preprocess", + "cusparseSpMMOp_destroyPlan", + "cusparseSpMMOp_createPlan", + "cusparseSpMMOpPlan_t", + "cusparseSpMMOpPlan", + "cusparseSpMMOpAlg_t", + "cusparseSpMMOp", + "cusparseSpGEMMreuse_workEstimation", + "cusparseSpGEMMreuse_nnz", + "cusparseSpGEMMreuse_copy", + "cusparseSpGEMMreuse_compute", + "cusparseSpGEMM_workEstimation", + "cusparseSpGEMM_getNumProducts", + "cusparseSpGEMM_estimateMemory", + "cusparseSpGEMM_destroyDescr", + "cusparseSpGEMM_createDescr", + "cusparseSpGEMM_copy", + "cusparseSpGEMM_compute", + "cusparseSpGEMMDescr_t", + "cusparseSpGEMMDescr", + "cusparseSolveAnalysisInfo_t", + "cusparseSolveAnalysisInfo", + "cusparseSideMode_t", + "cusparseShybsv_solve", + "cusparseShybsv_analysis", + "cusparseShyb2dense", + "cusparseShyb2csr", + "cusparseShyb2csc", + "cusparseSgtsv_nopivot", + "cusparseSgtsvStridedBatch", + "cusparseSgtsv", + "cusparseSgemmi", + "cusparseSgebsr2gebsr_bufferSizeExt", + "cusparseSgebsr2gebsc_bufferSizeExt", + "cusparseSdense2hyb", + "cusparseScsru2csr_bufferSizeExt", + "cusparseScsru2csr", + "cusparseScsrsv_solve", + "cusparseScsrsv_analysis", + "cusparseScsrsm_solve", + "cusparseScsrsm_analysis", + "cusparseScsrmv_mp", + "cusparseScsrilu0", + "cusparseScsric0", + "cusparseScsrgemm", + "cusparseScsrgeam2_bufferSizeExt", + "cusparseScsr2gebsr_bufferSizeExt", + "cusparseScsr2csru", + "cusparseScsr2csc", + "cusparseScsc2hyb", + "cusparseSbsrsm2_bufferSizeExt", + "cusparseSbsrilu02_bufferSizeExt", + "cusparseSbsric02_bufferSizeExt", + "cusparseLoggerSetMask", + "cusparseLoggerSetLevel", + "cusparseLoggerSetFile", + "cusparseLoggerSetCallback", + "cusparseLoggerOpenFile", + "cusparseLoggerForceDisable", + "cusparseLoggerCallback_t", + "cusparseHpruneDense2csr_bufferSizeExt", + "cusparseHpruneDense2csrNnzByPercentage", + "cusparseHpruneDense2csrNnz", + "cusparseHpruneDense2csrByPercentage_bufferSizeExt", + "cusparseHpruneDense2csrByPercentage", + "cusparseHpruneDense2csr", + "cusparseHpruneCsr2csr_bufferSizeExt", + "cusparseHpruneCsr2csrNnzByPercentage", + "cusparseHpruneCsr2csrNnz", + "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", + "cusparseHpruneCsr2csrByPercentage", + "cusparseHpruneCsr2csr", + "cusparseGetLevelInfo", + "cusparseDhybsv_solve", + "cusparseDhybsv_analysis", + "cusparseDhyb2dense", + "cusparseDhyb2csr", + "cusparseDhyb2csc", + "cusparseDgtsv_nopivot", + "cusparseDgtsvStridedBatch", + "cusparseDgtsv", + "cusparseDgemmi", + "cusparseDgebsr2gebsr_bufferSizeExt", + "cusparseDgebsr2gebsc_bufferSizeExt", + "cusparseDestroySolveAnalysisInfo", + "cusparseDestroyCsru2csrInfo", + "cusparseDenseToSparse_convert", + "cusparseDdense2hyb", + "cusparseDcsru2csr_bufferSizeExt", + "cusparseDcsru2csr", + "cusparseDcsrsv_solve", + "cusparseDcsrsv_analysis", + "cusparseDcsrsm_solve", + "cusparseDcsrsm_analysis", + "cusparseDcsrmv_mp", + "cusparseDcsrilu0", + "cusparseDcsric0", + "cusparseDcsrgemm", + "cusparseDcsrgeam2_bufferSizeExt", + "cusparseDcsr2gebsr_bufferSizeExt", + "cusparseDcsr2csru", + "cusparseDcsr2csc", + "cusparseDcsc2hyb", + "cusparseDbsrsm2_bufferSizeExt", + "cusparseDbsrilu02_bufferSizeExt", + "cusparseDbsric02_bufferSizeExt", + "cusparseCsrsv_solveEx", + "cusparseCsrsv_analysisEx", + "cusparseCsrmvEx_bufferSize", + "cusparseCsrmvEx", + "cusparseCsrilu0Ex", + "cusparseCsr2cscEx2", + "cusparseCsr2cscEx", + "cusparseCsr2CscAlg_t", + "cusparseCreateSolveAnalysisInfo", + "cusparseCreateSlicedEll", + "cusparseCreateCsru2csrInfo", + "cusparseCreateConstSlicedEll", + "cusparseCreateConstBsr", + "cusparseCreateBsr", + "cusparseConstrainedGeMM_bufferSize", + "cusparseConstrainedGeMM", + "cusparseColorAlg_t", + "cusparseChybsv_solve", + "cusparseChybsv_analysis", + "cusparseChyb2dense", + "cusparseChyb2csr", + "cusparseChyb2csc", + "cusparseCgtsv_nopivot", + "cusparseCgtsvStridedBatch", + "cusparseCgtsv", + "cusparseCgemmi", + "cusparseCgebsr2gebsr_bufferSizeExt", + "cusparseCgebsr2gebsc_bufferSizeExt", + "cusparseCdense2hyb", + "cusparseCcsru2csr_bufferSizeExt", + "cusparseCcsru2csr", + "cusparseCcsrsv_solve", + "cusparseCcsrsv_analysis", + "cusparseCcsrsm_solve", + "cusparseCcsrsm_analysis", + "cusparseCcsrmv_mp", + "cusparseCcsrilu0", + "cusparseCcsric0", + "cusparseCcsrgemm", + "cusparseCcsrgeam2_bufferSizeExt", + "cusparseCcsr2gebsr_bufferSizeExt", + "cusparseCcsr2csru", + "cusparseCcsr2csc", + "cusparseCcsc2hyb", + "cusparseCbsrsm2_bufferSizeExt", + "cusparseCbsrilu02_bufferSizeExt", + "cusparseCbsric02_bufferSizeExt", + "cusparseBsrSetStridedBatch", + "cusparseAlgMode_t", + "curand_mtgp32_specific", + "curand_mtgp32_single_specific", + "curand_mtgp32_single", + "curand_Philox4x32_10", + "curandState_t", + "curandStateXORWOW_t", + "curandStateXORWOW", + "curandState", + "curandMethod_t", + "curandMethod", + "curandHistogramM2_t", + "curandHistogramM2_st", + "curandHistogramM2V_t", + "curandHistogramM2V_st", + "curandHistogramM2K_t", + "curandHistogramM2K_st", + "curandGetProperty", + "curandDistribution_t", + "curandDistribution_st", + "curandDistributionShift_t", + "curandDistributionShift_st", + "curandDistributionM2Shift_t", + "curandDistributionM2Shift_st", + "curandDirectionVectors64_t", + "curandDirectionVectors32_t", + "cudnnWgradMode_t", + "cudnnTransformTensorEx", + "cudnnTransformFilter", + "cudnnTensorTransformStruct", + "cudnnTensorTransformDescriptor_t", + "cudnnTensorStruct", + "cudnnSpatialTransformerStruct", + "cudnnSpatialTransformerDescriptor_t", + "cudnnSpatialTfSamplerForward", + "cudnnSpatialTfSamplerBackward", + "cudnnSpatialTfGridGeneratorForward", + "cudnnSpatialTfGridGeneratorBackward", + "cudnnSignalMode_t", + "cudnnSeverity_t", + "cudnnSetTensorTransformDescriptor", + "cudnnSetTensorNdDescriptorEx", + "cudnnSetTensorNdDescriptor", + "cudnnSetTensor4dDescriptor", + "cudnnSetSpatialTransformerNdDescriptor", + "cudnnSetSeqDataDescriptor", + "cudnnSetRNNProjectionLayers", + "cudnnSetRNNPaddingMode", + "cudnnSetRNNMatrixMathType", + "cudnnSetRNNDescriptor_v8", + "cudnnSetRNNDescriptor_v5", + "cudnnSetRNNDescriptor", + "cudnnSetRNNDataDescriptor", + "cudnnSetRNNBiasMode", + "cudnnSetRNNAlgorithmDescriptor", + "cudnnSetPersistentRNNPlan", + "cudnnSetOpTensorDescriptor", + "cudnnSetFusedOpsVariantParamPackAttribute", + "cudnnSetFusedOpsConstParamPackAttribute", + "cudnnSetFilterNdDescriptor", + "cudnnSetFilter4dDescriptor", + "cudnnSetConvolutionReorderType", + "cudnnSetConvolutionNdDescriptor", + "cudnnSetConvolutionMathType", + "cudnnSetConvolution2dDescriptor", + "cudnnSetCallback", + "cudnnSetCTCLossDescriptor_v9", + "cudnnSetCTCLossDescriptor_v8", + "cudnnSetCTCLossDescriptorEx", + "cudnnSetAttnDescriptor", + "cudnnSetAlgorithmPerformance", + "cudnnSetAlgorithmDescriptor", + "cudnnSetActivationDescriptorSwishBeta", + "cudnnSetActivationDescriptor", + "cudnnSeqDataStruct", + "cudnnSeqDataDescriptor_t", + "cudnnSeqDataAxis_t", + "cudnnSaveAlgorithm", + "cudnnSamplerType_t", + "cudnnRuntimeTag_t", + "cudnnRestoreAlgorithm", + "cudnnResampleMode_t", + "cudnnReorderType_t", + "cudnnReorderFilterAndBias", + "cudnnReduceTensorStruct", + "cudnnRNNStruct", + "cudnnRNNSetClip_v9", + "cudnnRNNSetClip_v8", + "cudnnRNNSetClip", + "cudnnRNNGetClip_v9", + "cudnnRNNGetClip_v8", + "cudnnRNNGetClip", + "cudnnRNNForwardTrainingEx", + "cudnnRNNForwardInferenceEx", + "cudnnRNNForward", + "cudnnRNNDataStruct", + "cudnnRNNDataLayout_t", + "cudnnRNNDataDescriptor_t", + "cudnnRNNClipMode_t", + "cudnnRNNBackwardWeights_v8", + "cudnnRNNBackwardWeightsEx", + "cudnnRNNBackwardData_v8", + "cudnnRNNBackwardDataEx", + "cudnnQueryRuntimeError", + "cudnnPoolingStruct", + "cudnnPoolingForward", + "cudnnPoolingBackward", + "cudnnPersistentRNNPlan_t", + "cudnnPersistentRNNPlan", + "cudnnOpsVersionCheck", + "cudnnOpsTrainVersionCheck", + "cudnnOpsInferVersionCheck", + "cudnnOpTensorStruct", + "cudnnOpTensorDescriptor_t", + "cudnnOpTensor", + "cudnnNormalizationForwardTraining", + "cudnnNormalizationForwardInference", + "cudnnNormalizationBackward", + "cudnnNormOps_t", + "cudnnNormMode_t", + "cudnnNormAlgo_t", + "cudnnMultiHeadAttnWeightKind_t", + "cudnnMultiHeadAttnForward", + "cudnnMultiHeadAttnBackwardWeights", + "cudnnMultiHeadAttnBackwardData", + "cudnnMathType_t", + "cudnnMakeFusedOpsPlan", + "cudnnLossNormalizationMode_t", + "cudnnLRNStruct", + "cudnnLRNCrossChannelForward", + "cudnnLRNCrossChannelBackward", + "cudnnInitTransformDest", + "cudnnIm2Col", + "cudnnGraphVersionCheck", + "cudnnGetVersion", + "cudnnGetTensorTransformDescriptor", + "cudnnGetTensorSizeInBytes", + "cudnnGetTensorNdDescriptor", + "cudnnGetSeqDataDescriptor", + "cudnnGetRNNWeightSpaceSize", + "cudnnGetRNNWeightParams", + "cudnnGetRNNTempSpaceSizes", + "cudnnGetRNNProjectionLayers", + "cudnnGetRNNPaddingMode", + "cudnnGetRNNMatrixMathType", + "cudnnGetRNNLinLayerMatrixParams", + "cudnnGetRNNLinLayerBiasParams", + "cudnnGetRNNForwardTrainingAlgorithmMaxCount", + "cudnnGetRNNForwardInferenceAlgorithmMaxCount", + "cudnnGetRNNDescriptor_v8", + "cudnnGetRNNDataDescriptor", + "cudnnGetRNNBiasMode", + "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", + "cudnnGetRNNBackwardDataAlgorithmMaxCount", + "cudnnGetProperty", + "cudnnGetOpTensorDescriptor", + "cudnnGetNormalizationTrainingReserveSpaceSize", + "cudnnGetNormalizationForwardTrainingWorkspaceSize", + "cudnnGetNormalizationBackwardWorkspaceSize", + "cudnnGetMultiHeadAttnWeights", + "cudnnGetMultiHeadAttnBuffers", + "cudnnGetMaxDeviceVersion", + "cudnnGetLastErrorString", + "cudnnGetFusedOpsVariantParamPackAttribute", + "cudnnGetFusedOpsConstParamPackAttribute", + "cudnnGetFoldedConvBackwardDataDescriptors", + "cudnnGetFilterSizeInBytes", + "cudnnGetFilterNdDescriptor", + "cudnnGetFilter4dDescriptor", + "cudnnGetCudartVersion", + "cudnnGetConvolutionReorderType", + "cudnnGetConvolutionNdForwardOutputDim", + "cudnnGetConvolutionNdDescriptor", + "cudnnGetConvolutionMathType", + "cudnnGetConvolutionGroupCount", + "cudnnGetConvolutionForwardAlgorithm_v7", + "cudnnGetConvolutionForwardAlgorithmMaxCount", + "cudnnGetConvolutionForwardAlgorithm", + "cudnnGetConvolutionBackwardFilterWorkspaceSize", + "cudnnGetConvolutionBackwardFilterAlgorithm_v7", + "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", + "cudnnGetConvolutionBackwardFilterAlgorithm", + "cudnnGetConvolutionBackwardDataAlgorithm_v7", + "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", + "cudnnGetConvolutionBackwardDataAlgorithm", + "cudnnGetConvolution2dDescriptor", + "cudnnGetCallback", + "cudnnGetCTCLossWorkspaceSize_v8", + "cudnnGetCTCLossDescriptor_v9", + "cudnnGetCTCLossDescriptor_v8", + "cudnnGetCTCLossDescriptorEx", + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", + "cudnnGetBatchNormalizationBackwardExWorkspaceSize", + "cudnnGetAttnDescriptor", + "cudnnGetAlgorithmSpaceSize", + "cudnnGetAlgorithmPerformance", + "cudnnGetAlgorithmDescriptor", + "cudnnGetActivationDescriptorSwishBeta", + "cudnnGetActivationDescriptor", + "cudnnGenStatsMode_t", + "cudnnFusedOps_t", + "cudnnFusedOpsVariantParamStruct", + "cudnnFusedOpsVariantParamPack_t", + "cudnnFusedOpsVariantParamLabel_t", + "cudnnFusedOpsPointerPlaceHolder_t", + "cudnnFusedOpsPlan_t", + "cudnnFusedOpsPlanStruct", + "cudnnFusedOpsExecute", + "cudnnFusedOpsConstParamStruct", + "cudnnFusedOpsConstParamPack_t", + "cudnnFusedOpsConstParamLabel_t", + "cudnnFraction_t", + "cudnnFractionStruct", + "cudnnFoldingDirection_t", + "cudnnFindRNNForwardTrainingAlgorithmEx", + "cudnnFindRNNForwardInferenceAlgorithmEx", + "cudnnFindRNNBackwardWeightsAlgorithmEx", + "cudnnFindRNNBackwardDataAlgorithmEx", + "cudnnFindConvolutionForwardAlgorithm", + "cudnnFindConvolutionBackwardFilterAlgorithmEx", + "cudnnFindConvolutionBackwardFilterAlgorithm", + "cudnnFindConvolutionBackwardDataAlgorithmEx", + "cudnnFindConvolutionBackwardDataAlgorithm", + "cudnnFilterStruct", + "cudnnErrQueryMode_t", + "cudnnDropoutStruct", + "cudnnDivisiveNormalizationForward", + "cudnnDivisiveNormalizationBackward", + "cudnnDivNormMode_t", + "cudnnDeterminism_t", + "cudnnDestroyTensorTransformDescriptor", + "cudnnDestroySpatialTransformerDescriptor", + "cudnnDestroySeqDataDescriptor", + "cudnnDestroyRNNDataDescriptor", + "cudnnDestroyPersistentRNNPlan", + "cudnnDestroyOpTensorDescriptor", + "cudnnDestroyFusedOpsVariantParamPack", + "cudnnDestroyFusedOpsPlan", + "cudnnDestroyFusedOpsConstParamPack", + "cudnnDestroyFilterDescriptor", + "cudnnDestroyAttnDescriptor", + "cudnnDestroyAlgorithmPerformance", + "cudnnDestroyAlgorithmDescriptor", + "cudnnDeriveNormTensorDescriptor", + "cudnnDebug_t", + "cudnnDebugStruct", + "cudnnCreateTensorTransformDescriptor", + "cudnnCreateSpatialTransformerDescriptor", + "cudnnCreateSeqDataDescriptor", + "cudnnCreateRNNDataDescriptor", + "cudnnCreatePersistentRNNPlan", + "cudnnCreateOpTensorDescriptor", + "cudnnCreateFusedOpsVariantParamPack", + "cudnnCreateFusedOpsPlan", + "cudnnCreateFusedOpsConstParamPack", + "cudnnCreateFilterDescriptor", + "cudnnCreateAttnDescriptor", + "cudnnCreateAlgorithmPerformance", + "cudnnCreateAlgorithmDescriptor", + "cudnnCopyAlgorithmDescriptor", + "cudnnConvolutionStruct", + "cudnnConvolutionFwdPreference_t", + "cudnnConvolutionBwdFilterPreference_t", + "cudnnConvolutionBwdFilterAlgo_t", + "cudnnConvolutionBwdFilterAlgoPerf_t", + "cudnnConvolutionBwdFilterAlgoPerfStruct", + "cudnnConvolutionBwdDataPreference_t", + "cudnnConvolutionBackwardFilter", + "cudnnCnnTrainVersionCheck", + "cudnnCnnInferVersionCheck", + "cudnnCallback_t", + "cudnnCTCLoss_v8", + "cudnnCTCLossStruct", + "cudnnCTCGradMode_t", + "cudnnBuildRNNDynamic", + "cudnnBnFinalizeStatsMode_t", + "cudnnBatchNormalizationForwardTrainingEx", + "cudnnBatchNormalizationBackwardEx", + "cudnnBatchNormOps_t", + "cudnnBackendUpdateCudaGraph", + "cudnnBackendTensorReordering_t", + "cudnnBackendPopulateCudaGraph", + "cudnnBackendNumericalNote_t", + "cudnnBackendNormMode_t", + "cudnnBackendNormFwdPhase_t", + "cudnnBackendLayoutType_t", + "cudnnBackendKnobType_t", + "cudnnBackendInitialize", + "cudnnBackendBehaviorNote_t", + "cudnnAttnStruct", + "cudnnAttnQueryMap_t", + "cudnnAttnDescriptor_t", + "cudnnAlgorithm_t", + "cudnnAlgorithmUnionStruct", + "cudnnAlgorithmStruct", + "cudnnAlgorithmPerformance_t", + "cudnnAlgorithmPerformanceStruct", + "cudnnAlgorithmDescriptor_t", + "cudnnAdvVersionCheck", + "cudnnAdvTrainVersionCheck", + "cudnnAdvInferVersionCheck", + "cudnnAddTensor", + "cudnnActivationStruct", "cublasZtrttp", "cublasZtrmm_v2_64", "cublasZtrmm_64", @@ -12930,6 +13437,395 @@ sub warnRocOnlyUnsupportedFunctions { "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", + "csru2csrInfo_t", + "csru2csrInfo", + "__curand_umul", + "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", + "CUSPARSE_STATUS_MAPPING_ERROR", + "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES", + "CUSPARSE_STATUS_EXECUTION_FAILED", + "CUSPARSE_SPSV_UPDATE_GENERAL", + "CUSPARSE_SPSV_UPDATE_DIAGONAL", + "CUSPARSE_SPSM_UPDATE_GENERAL", + "CUSPARSE_SPSM_UPDATE_DIAGONAL", + "CUSPARSE_SPMM_OP_ALG_DEFAULT", + "CUSPARSE_SPMM_COO_ALG4", + "CUSPARSE_SPMM_BSR_ALG1", + "CUSPARSE_SPMMA_PREPROCESS", + "CUSPARSE_SPMMA_ALG4", + "CUSPARSE_SPMMA_ALG3", + "CUSPARSE_SPMMA_ALG2", + "CUSPARSE_SPMMA_ALG1", + "CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC", + "CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC", + "CUSPARSE_SPGEMM_ALG3", + "CUSPARSE_SPGEMM_ALG2", + "CUSPARSE_SPGEMM_ALG1", + "CUSPARSE_SIDE_RIGHT", + "CUSPARSE_SIDE_LEFT", + "CUSPARSE_MV_ALG_DEFAULT", + "CUSPARSE_MM_ALG_DEFAULT", + "CUSPARSE_CSRMV_ALG2", + "CUSPARSE_CSRMV_ALG1", + "CUSPARSE_CSRMM_ALG1", + "CUSPARSE_CSR2CSC_ALG_DEFAULT", + "CUSPARSE_CSR2CSC_ALG2", + "CUSPARSE_CSR2CSC_ALG1", + "CUSPARSE_COOMV_ALG", + "CUSPARSE_COOMM_ALG3", + "CUSPARSE_COOMM_ALG2", + "CUSPARSE_COOMM_ALG1", + "CUSPARSE_COLOR_ALG1", + "CUSPARSE_COLOR_ALG0", + "CUSPARSE_ALG_NAIVE", + "CUSPARSE_ALG_MERGE_PATH", + "CUSPARSE_ALG1", + "CUSPARSE_ALG0", + "CURAND_STATUS_PREEXISTING_FAILURE", + "CURAND_STATUS_INITIALIZATION_FAILED", + "CURAND_STATUS_ARCH_MISMATCH", + "CURAND_RNG_TEST", + "CURAND_REJECTION", + "CURAND_POISSON", + "CURAND_M2", + "CURAND_M1", + "CURAND_KNUTH", + "CURAND_ITR", + "CURAND_HITR", + "CURAND_FAST_REJECTION", + "CURAND_DISCRETE_GAUSS", + "CURAND_DEVICE_API", + "CURAND_DEFINITION", + "CURAND_CHOOSE_BEST", + "CURAND_BINARY_SEARCH", + "CURAND_3RD", + "CUDNN_WGRAD_MODE_SET", + "CUDNN_WGRAD_MODE_ADD", + "CUDNN_TRANSFORM_UNFOLD", + "CUDNN_TRANSFORM_FOLD", + "CUDNN_TENSOR_REORDERING_NONE", + "CUDNN_TENSOR_REORDERING_INT8x32", + "CUDNN_TENSOR_REORDERING_F16x16", + "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", + "CUDNN_TENSOR_OP_MATH", + "CUDNN_TENSOR_NCHW_VECT_C", + "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", + "CUDNN_STATUS_SPECIFIC_ERROR", + "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", + "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", + "CUDNN_STATUS_RUNTIME_IN_PROGRESS", + "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", + "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", + "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", + "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", + "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", + "CUDNN_STATUS_NOT_SUPPORTED_PADDING", + "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", + "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", + "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", + "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", + "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", + "CUDNN_STATUS_MAPPING_ERROR", + "CUDNN_STATUS_LICENSE_ERROR", + "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", + "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_FULL_ERROR_CODE", + "CUDNN_STATUS_EXECUTION_FAILED_CURAND", + "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", + "CUDNN_STATUS_EXECUTION_FAILED_CUDART", + "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", + "CUDNN_STATUS_EXECUTION_FAILED", + "CUDNN_STATUS_DEPRECATED", + "CUDNN_STATUS_CATEGORY", + "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", + "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", + "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", + "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", + "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", + "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", + "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", + "CUDNN_STATUS_ARCH_MISMATCH", + "CUDNN_SIGNAL_WAIT", + "CUDNN_SIGNAL_SET", + "CUDNN_SEV_WARNING_EN", + "CUDNN_SEV_WARNING", + "CUDNN_SEV_INFO_EN", + "CUDNN_SEV_INFO", + "CUDNN_SEV_FATAL", + "CUDNN_SEV_ERROR_EN", + "CUDNN_SEV_ERROR", + "CUDNN_SEQDATA_VECT_DIM", + "CUDNN_SEQDATA_TIME_DIM", + "CUDNN_SEQDATA_DIM_COUNT", + "CUDNN_SEQDATA_BEAM_DIM", + "CUDNN_SEQDATA_BATCH_DIM", + "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", + "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", + "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", + "CUDNN_SCALAR_DOUBLE_BN_EPSILON", + "CUDNN_SAMPLER_BILINEAR", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", + "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", + "CUDNN_RNN_CLIP_NONE", + "CUDNN_RNN_CLIP_MINMAX", + "CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H", + "CUDNN_RNN_ALGO_PERSIST_STATIC", + "CUDNN_RNN_ALGO_PERSIST_DYNAMIC", + "CUDNN_RNN_ALGO_COUNT", + "CUDNN_RMS_NORM", + "CUDNN_RESAMPLE_NEAREST", + "CUDNN_RESAMPLE_MAXPOOL", + "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL", + "CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", + "CUDNN_PTR_ZDATA", + "CUDNN_PTR_YSUM", + "CUDNN_PTR_YSQSUM", + "CUDNN_PTR_YDATA", + "CUDNN_PTR_XDATA", + "CUDNN_PTR_WORKSPACE", + "CUDNN_PTR_WDATA", + "CUDNN_PTR_NULL", + "CUDNN_PTR_ELEM_ALIGNED", + "CUDNN_PTR_DZDATA", + "CUDNN_PTR_DYDATA", + "CUDNN_PTR_DXDATA", + "CUDNN_PTR_DWDATA", + "CUDNN_PTR_BN_Z_EQSCALE", + "CUDNN_PTR_BN_Z_EQBIAS", + "CUDNN_PTR_BN_SCALE", + "CUDNN_PTR_BN_SAVED_MEAN", + "CUDNN_PTR_BN_SAVED_INVSTD", + "CUDNN_PTR_BN_RUNNING_VAR", + "CUDNN_PTR_BN_RUNNING_MEAN", + "CUDNN_PTR_BN_EQSCALE", + "CUDNN_PTR_BN_EQBIAS", + "CUDNN_PTR_BN_DSCALE", + "CUDNN_PTR_BN_DBIAS", + "CUDNN_PTR_BN_BIAS", + "CUDNN_PTR_ACTIVATION_BITMASK", + "CUDNN_PTR_16B_ALIGNED", + "CUDNN_POOLING_MAX_DETERMINISTIC", + "CUDNN_POINTWISE_ATAN2", + "CUDNN_PARAM_ZDESC", + "CUDNN_PARAM_ZDATA_PLACEHOLDER", + "CUDNN_PARAM_YSUM_PLACEHOLDER", + "CUDNN_PARAM_YSTATS_DESC", + "CUDNN_PARAM_YSQSUM_PLACEHOLDER", + "CUDNN_PARAM_YDESC", + "CUDNN_PARAM_YDATA_PLACEHOLDER", + "CUDNN_PARAM_XDESC", + "CUDNN_PARAM_XDATA_PLACEHOLDER", + "CUDNN_PARAM_WDESC", + "CUDNN_PARAM_WDATA_PLACEHOLDER", + "CUDNN_PARAM_DZDESC", + "CUDNN_PARAM_DZDATA_PLACEHOLDER", + "CUDNN_PARAM_DYDESC", + "CUDNN_PARAM_DYDATA_PLACEHOLDER", + "CUDNN_PARAM_DXDESC", + "CUDNN_PARAM_DXDATA_PLACEHOLDER", + "CUDNN_PARAM_DWDESC", + "CUDNN_PARAM_DWDATA_PLACEHOLDER", + "CUDNN_PARAM_CONV_DESC", + "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", + "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_MODE", + "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_DESC", + "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", + "CUDNN_OP_TENSOR_SQRT", + "CUDNN_OP_TENSOR_NOT", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", + "CUDNN_NUMERICAL_NOTE_WINOGRAD", + "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", + "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", + "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", + "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", + "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", + "CUDNN_NUMERICAL_NOTE_FFT", + "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", + "CUDNN_NO_REORDER", + "CUDNN_NORM_PER_CHANNEL", + "CUDNN_NORM_PER_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ACTIVATION", + "CUDNN_NORM_OPS_NORM", + "CUDNN_NORM_FWD_TRAINING", + "CUDNN_NORM_FWD_INFERENCE", + "CUDNN_NORM_ALGO_STANDARD", + "CUDNN_NORM_ALGO_PERSIST", + "CUDNN_NON_DETERMINISTIC", + "CUDNN_MH_ATTN_V_WEIGHTS", + "CUDNN_MH_ATTN_V_BIASES", + "CUDNN_MH_ATTN_Q_WEIGHTS", + "CUDNN_MH_ATTN_Q_BIASES", + "CUDNN_MH_ATTN_O_WEIGHTS", + "CUDNN_MH_ATTN_O_BIASES", + "CUDNN_MH_ATTN_K_WEIGHTS", + "CUDNN_MH_ATTN_K_BIASES", + "CUDNN_LRN_MIN_N", + "CUDNN_LRN_MIN_K", + "CUDNN_LRN_MIN_BETA", + "CUDNN_LRN_MAX_N", + "CUDNN_LOSS_NORMALIZATION_SOFTMAX", + "CUDNN_LOSS_NORMALIZATION_NONE", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", + "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", + "CUDNN_LAYOUT_TYPE_COUNT", + "CUDNN_LAYER_NORM", + "CUDNN_KNOB_TYPE_WORKSPACE", + "CUDNN_KNOB_TYPE_WINO_TILE", + "CUDNN_KNOB_TYPE_USE_TEX", + "CUDNN_KNOB_TYPE_TILE_SIZE", + "CUDNN_KNOB_TYPE_TILE_ROWS", + "CUDNN_KNOB_TYPE_TILE_COLS", + "CUDNN_KNOB_TYPE_TILE_CGA_N", + "CUDNN_KNOB_TYPE_TILE_CGA_M", + "CUDNN_KNOB_TYPE_TILE_CGA", + "CUDNN_KNOB_TYPE_TILEK", + "CUDNN_KNOB_TYPE_SWIZZLE", + "CUDNN_KNOB_TYPE_STAGES", + "CUDNN_KNOB_TYPE_SPLIT_RS", + "CUDNN_KNOB_TYPE_SPLIT_K_SLC", + "CUDNN_KNOB_TYPE_SPLIT_K_BUF", + "CUDNN_KNOB_TYPE_SPLIT_K", + "CUDNN_KNOB_TYPE_SPLIT_H", + "CUDNN_KNOB_TYPE_SPLIT_COLS", + "CUDNN_KNOB_TYPE_SPECFILT", + "CUDNN_KNOB_TYPE_SLICED", + "CUDNN_KNOB_TYPE_SINGLEBUFFER", + "CUDNN_KNOB_TYPE_REDUCTION_MODE", + "CUDNN_KNOB_TYPE_OCCUPANCY", + "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", + "CUDNN_KNOB_TYPE_MULTIPLY", + "CUDNN_KNOB_TYPE_LOAD_SIZE", + "CUDNN_KNOB_TYPE_LDGC", + "CUDNN_KNOB_TYPE_LDGB", + "CUDNN_KNOB_TYPE_LDGA", + "CUDNN_KNOB_TYPE_KERNEL_CFG", + "CUDNN_KNOB_TYPE_KBLOCK", + "CUDNN_KNOB_TYPE_IDX_MODE", + "CUDNN_KNOB_TYPE_EDGE", + "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", + "CUDNN_KNOB_TYPE_COUNTS", + "CUDNN_KNOB_TYPE_CHUNK_K", + "CUDNN_KNOB_TYPE_BLOCK_SIZE", + "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", + "CUDNN_INSTANCE_NORM", + "CUDNN_GROUP_NORM", + "CUDNN_GENSTATS_SUM_SQSUM", + "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", + "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", + "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_FMA_MATH", + "CUDNN_ERRQUERY_RAWCODE", + "CUDNN_ERRQUERY_NONBLOCKING", + "CUDNN_ERRQUERY_BLOCKING", + "CUDNN_DIVNORM_PRECOMPUTED_MEANS", + "CUDNN_DIM_MAX", + "CUDNN_DETERMINISTIC", + "CUDNN_DEFAULT_REORDER", + "CUDNN_DEFAULT_MATH", + "CUDNN_DATA_UINT8x4", + "CUDNN_DATA_UINT8", + "CUDNN_DATA_INT8x32", + "CUDNN_DATA_FAST_FLOAT_FOR_FP8", + "CUDNN_DATA_BOOLEAN", + "CUDNN_CTC_ZERO_OOB_GRADIENTS", + "CUDNN_CTC_SKIP_OOB_GRADIENTS", + "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", + "CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", + "CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", + "CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", + "CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", + "CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", + "CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", + "CUDNN_CONVOLUTION_FWD_ALGO_COUNT", + "CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT", + "CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", + "CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", + "CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", + "CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", + "CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", + "CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", + "CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", + "CUDNN_BN_MIN_EPSILON", + "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", + "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", + "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", + "CUDNN_BATCH_NORM", + "CUDNN_BATCHNORM_SPATIAL_PERSISTENT", + "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN", + "CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", + "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", + "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", + "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", + "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", + "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", + "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", + "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", + "CUDNN_ATTN_WKIND_COUNT", + "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", + "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", + "CUDNN_ATTN_ENABLE_PROJ_BIASES", + "CUDNN_ATTN_DISABLE_PROJ_BIASES", + "CUDNN_ACTIVATION_SWISH", "CUDA_R_8F_E5M2", "CUDA_R_8F_E4M3", "CUDA_R_64U", diff --git a/src/CUDA2HIP_Perl.cpp b/src/CUDA2HIP_Perl.cpp index ef402091..2bc73b2f 100644 --- a/src/CUDA2HIP_Perl.cpp +++ b/src/CUDA2HIP_Perl.cpp @@ -597,7 +597,7 @@ namespace perl { for (auto ma = CUDA_RENAMES_MAP().rbegin(); ma != CUDA_RENAMES_MAP().rend(); ++ma) { TranslateToRoc = false; if (Statistics::isUnsupported(ma->second)) { - if (ma->second.apiType == API_BLAS) { + if (ma->second.apiType == API_BLAS || ma->second.apiType == API_SPARSE || ma->second.apiType == API_RAND || ma->second.apiType == API_DNN) { sHipUnsupported << (countHipOnlyUnsupported ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; countHipOnlyUnsupported++; } else { @@ -607,7 +607,7 @@ namespace perl { } TranslateToRoc = true; if (Statistics::isUnsupported(ma->second)) { - if (ma->second.apiType == API_BLAS) { + if (ma->second.apiType == API_BLAS || ma->second.apiType == API_SPARSE || ma->second.apiType == API_RAND || ma->second.apiType == API_DNN) { sRocUnsupported << (countRocOnlyUnsupported ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; countRocOnlyUnsupported++; } From b6ab7c8e70e4b67e87be9a5aada58a4d6c833573 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 17 Oct 2024 16:20:31 +0100 Subject: [PATCH 07/51] [HIPIFY][perl] Improve warning reporting --- bin/hipify-perl | 14 +++++++------- src/CUDA2HIP_Perl.cpp | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 489c722b..7ddefe06 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -9034,7 +9034,7 @@ sub warnUnsupportedDeviceFunctions { my $mt = m/\b($func)\b\s*\(\s*.*\s*\)/g; if ($mt && !$mt_namespace) { $k += $mt; - print STDERR " warning: $fileName:$line_num: unsupported device function \"$func\": $_\n"; + print STDERR " $fileName:$line_num: warning: unsupported device function \"$func\": $_\n"; } } return $k; @@ -9048,7 +9048,7 @@ sub warnExperimentalFunctions { my $mt = m/($func)/g; if ($mt) { $k += $mt; - print STDERR " warning: $fileName:$line_num: experimental identifier \"$func\" in HIP $val\n"; + print STDERR " $fileName:$line_num: warning: experimental ROCm HIP identifier: $func $val\n"; } } return $k; @@ -9067,7 +9067,7 @@ sub warnDeprecatedFunctions { if (index(lc($func),lc($cudnn)) == 0) { $cuda = $cudnn; } - print STDERR " warning: $fileName:$line_num: deprecated identifier \"$func\" since $cuda $val\n"; + print STDERR " $fileName:$line_num: warning: deprecated CUDA identifier: $func since $cuda $val\n"; } } return $k; @@ -9086,7 +9086,7 @@ sub warnRemovedFunctions { if (index(lc($func),lc($cudnn)) == 0) { $cuda = $cudnn; } - print STDERR " warning: $fileName:$line_num: removed identifier \"$func\" since $cuda $val\n"; + print STDERR " $fileName:$line_num: warning: removed CUDA identifier: $func since $cuda $val\n"; } } return $k; @@ -11065,7 +11065,7 @@ sub warnUnsupportedFunctions { my $mt = m/($func)/g; if ($mt) { $k += $mt; - print STDERR " warning: $fileName:$line_num: unsupported identifier \"$func\"\n"; + print STDERR " $fileName:$line_num: warning: unsupported ROCm HIP identifier: $func\n"; } } return $k; @@ -12685,7 +12685,7 @@ sub warnHipOnlyUnsupportedFunctions { my $mt = m/($func)/g; if ($mt) { $k += $mt; - print STDERR " warning: $fileName:$line_num: unsupported identifier \"$func\"\n"; + print STDERR " $fileName:$line_num: warning: unsupported HIP identifier: $func\n"; } } return $k; @@ -14173,7 +14173,7 @@ sub warnRocOnlyUnsupportedFunctions { my $mt = m/($func)/g; if ($mt) { $k += $mt; - print STDERR " warning: $fileName:$line_num: unsupported by ROC identifier \"$func\"\n"; + print STDERR " $fileName:$line_num: warning: unsupported ROC identifier: $func\n"; } } return $k; diff --git a/src/CUDA2HIP_Perl.cpp b/src/CUDA2HIP_Perl.cpp index 2bc73b2f..2c441943 100644 --- a/src/CUDA2HIP_Perl.cpp +++ b/src/CUDA2HIP_Perl.cpp @@ -91,7 +91,7 @@ namespace perl { const string printf = "printf STDERR "; const string no_warns = "no warnings qw/uninitialized/;"; const string hipify_perl = "hipify-perl"; - const string warning = "warning: $fileName:$line_num: "; + const string warning = "$fileName:$line_num: warning: "; const string warningsPlus = "$warnings += $s;"; const string sWarnExperimentalFunctions = "warnExperimentalFunctions"; const string sWarnDeprecatedFunctions = "warnDeprecatedFunctions"; @@ -635,12 +635,12 @@ namespace perl { sRocUnsupported << sCommon.str(); sCommon.str(std::string()); sCommon << tab_2 << "}\n" << tab << "}\n" << tab << return_k << "}" << endl; - sExperimental << tab_3 << print << "\" " << warning << "experimental identifier \\\"$func\\\" in HIP $val\\n\";" << endl << sCommon.str(); - sDeprecated << tab_3 << print << "\" " << warning << "deprecated identifier \\\"$func\\\" since $cuda $val\\n\";" << endl << sCommon.str(); - sRemoved << tab_3 << print << "\" " << warning << "removed identifier \\\"$func\\\" since $cuda $val\\n\";" << endl << sCommon.str(); - sUnsupported << tab_3 << print << "\" " << warning << "unsupported identifier \\\"$func\\\"\\n\";" << endl << sCommon.str(); - sHipUnsupported << tab_3 << print << "\" " << warning << "unsupported identifier \\\"$func\\\"\\n\";" << endl << sCommon.str(); - sRocUnsupported << tab_3 << print << "\" " << warning << "unsupported by ROC identifier \\\"$func\\\"\\n\";" << endl << sCommon.str(); + sExperimental << tab_3 << print << "\" " << warning << "experimental ROCm HIP identifier: $func $val\\n\";" << endl << sCommon.str(); + sDeprecated << tab_3 << print << "\" " << warning << "deprecated CUDA identifier: $func since $cuda $val\\n\";" << endl << sCommon.str(); + sRemoved << tab_3 << print << "\" " << warning << "removed CUDA identifier: $func since $cuda $val\\n\";" << endl << sCommon.str(); + sUnsupported << tab_3 << print << "\" " << warning << "unsupported ROCm HIP identifier: $func\\n\";" << endl << sCommon.str(); + sHipUnsupported << tab_3 << print << "\" " << warning << "unsupported HIP identifier: $func\\n\";" << endl << sCommon.str(); + sRocUnsupported << tab_3 << print << "\" " << warning << "unsupported ROC identifier: $func\\n\";" << endl << sCommon.str(); *streamPtr.get() << sExperimental.str(); *streamPtr.get() << sDeprecated.str(); *streamPtr.get() << sRemoved.str(); From 30f64e5945606b5b875b2ab55321a7e1bfc8af79 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 17 Oct 2024 23:08:38 +0100 Subject: [PATCH 08/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 2 + `rocblas_(s|d|c|z|h)gemm_batched_64` and `hipblas(S|D|C|Z|H)gemmBatched_(v2_)?64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 20 +++++++------- docs/tables/CUBLAS_API_supported_by_HIP.md | 10 +++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 10 +++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 10 +++---- src/CUDA2HIP_BLAS_API_functions.cpp | 20 ++++++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 26 +++++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 26 +++++++++++++++++++ 7 files changed, 92 insertions(+), 30 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 7ddefe06..c42496c4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1594,6 +1594,7 @@ sub rocSubstitutions { subst("cublasCgeam", "rocblas_cgeam", "library"); subst("cublasCgemm", "rocblas_cgemm", "library"); subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); + subst("cublasCgemmBatched_64", "rocblas_cgemm_batched_64", "library"); subst("cublasCgemmStridedBatched", "rocblas_cgemm_strided_batched", "library"); subst("cublasCgemm_64", "rocblas_cgemm_64", "library"); subst("cublasCgemm_v2", "rocblas_cgemm", "library"); @@ -1750,6 +1751,7 @@ sub rocSubstitutions { subst("cublasDgeam", "rocblas_dgeam", "library"); subst("cublasDgemm", "rocblas_dgemm", "library"); subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); + subst("cublasDgemmBatched_64", "rocblas_dgemm_batched_64", "library"); subst("cublasDgemmStridedBatched", "rocblas_dgemm_strided_batched", "library"); subst("cublasDgemm_64", "rocblas_dgemm_64", "library"); subst("cublasDgemm_v2", "rocblas_dgemm", "library"); @@ -1893,6 +1895,7 @@ sub rocSubstitutions { subst("cublasHSSgemvStridedBatched_64", "rocblas_hssgemv_strided_batched_64", "library"); subst("cublasHgemm", "rocblas_hgemm", "library"); subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); + subst("cublasHgemmBatched_64", "rocblas_hgemm_batched_64", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); subst("cublasHgemm_64", "rocblas_hgemm_64", "library"); subst("cublasIcamax", "rocblas_icamax", "library"); @@ -1976,6 +1979,7 @@ sub rocSubstitutions { subst("cublasSgeam", "rocblas_sgeam", "library"); subst("cublasSgemm", "rocblas_sgemm", "library"); subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); + subst("cublasSgemmBatched_64", "rocblas_sgemm_batched_64", "library"); subst("cublasSgemmStridedBatched", "rocblas_sgemm_strided_batched", "library"); subst("cublasSgemm_64", "rocblas_sgemm_64", "library"); subst("cublasSgemm_v2", "rocblas_sgemm", "library"); @@ -2123,6 +2127,7 @@ sub rocSubstitutions { subst("cublasZgeam", "rocblas_zgeam", "library"); subst("cublasZgemm", "rocblas_zgemm", "library"); subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); + subst("cublasZgemmBatched_64", "rocblas_zgemm_batched_64", "library"); subst("cublasZgemmStridedBatched", "rocblas_zgemm_strided_batched", "library"); subst("cublasZgemm_64", "rocblas_zgemm_64", "library"); subst("cublasZgemm_v2", "rocblas_zgemm", "library"); @@ -4325,6 +4330,7 @@ sub simpleSubstitutions { subst("cublasCgelsBatched", "hipblasCgelsBatched_v2", "library"); subst("cublasCgemm", "hipblasCgemm_v2", "library"); subst("cublasCgemmBatched", "hipblasCgemmBatched_v2", "library"); + subst("cublasCgemmBatched_64", "hipblasCgemmBatched_v2_64", "library"); subst("cublasCgemmStridedBatched", "hipblasCgemmStridedBatched_v2", "library"); subst("cublasCgemm_64", "hipblasCgemm_v2_64", "library"); subst("cublasCgemm_v2", "hipblasCgemm_v2", "library"); @@ -4483,6 +4489,7 @@ sub simpleSubstitutions { subst("cublasDgelsBatched", "hipblasDgelsBatched", "library"); subst("cublasDgemm", "hipblasDgemm", "library"); subst("cublasDgemmBatched", "hipblasDgemmBatched", "library"); + subst("cublasDgemmBatched_64", "hipblasDgemmBatched_64", "library"); subst("cublasDgemmStridedBatched", "hipblasDgemmStridedBatched", "library"); subst("cublasDgemm_64", "hipblasDgemm_64", "library"); subst("cublasDgemm_v2", "hipblasDgemm", "library"); @@ -4618,6 +4625,7 @@ sub simpleSubstitutions { subst("cublasGetVectorAsync", "hipblasGetVectorAsync", "library"); subst("cublasHgemm", "hipblasHgemm", "library"); subst("cublasHgemmBatched", "hipblasHgemmBatched", "library"); + subst("cublasHgemmBatched_64", "hipblasHgemmBatched_64", "library"); subst("cublasHgemmStridedBatched", "hipblasHgemmStridedBatched", "library"); subst("cublasHgemm_64", "hipblasHgemm_64", "library"); subst("cublasIcamax", "hipblasIcamax_v2", "library"); @@ -4722,6 +4730,7 @@ sub simpleSubstitutions { subst("cublasSgelsBatched", "hipblasSgelsBatched", "library"); subst("cublasSgemm", "hipblasSgemm", "library"); subst("cublasSgemmBatched", "hipblasSgemmBatched", "library"); + subst("cublasSgemmBatched_64", "hipblasSgemmBatched_64", "library"); subst("cublasSgemmStridedBatched", "hipblasSgemmStridedBatched", "library"); subst("cublasSgemm_64", "hipblasSgemm_64", "library"); subst("cublasSgemm_v2", "hipblasSgemm", "library"); @@ -4863,6 +4872,7 @@ sub simpleSubstitutions { subst("cublasZgelsBatched", "hipblasZgelsBatched_v2", "library"); subst("cublasZgemm", "hipblasZgemm_v2", "library"); subst("cublasZgemmBatched", "hipblasZgemmBatched_v2", "library"); + subst("cublasZgemmBatched_64", "hipblasZgemmBatched_v2_64", "library"); subst("cublasZgemmStridedBatched", "hipblasZgemmStridedBatched_v2", "library"); subst("cublasZgemm_64", "hipblasZgemm_v2_64", "library"); subst("cublasZgemm_v2", "hipblasZgemm_v2", "library"); @@ -11530,7 +11540,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZhemm_v2_64", "cublasZhemm_64", "cublasZgemmStridedBatched_64", - "cublasZgemmBatched_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgeam_64", @@ -11568,7 +11577,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSgemmGroupedBatched", "cublasSgemmEx_64", "cublasSgemmEx", - "cublasSgemmBatched_64", "cublasSgeam_64", "cublasSetVector_64", "cublasSetVectorAsync_64", @@ -11627,7 +11635,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasIamaxEx_64", "cublasIamaxEx", "cublasHgemmStridedBatched_64", - "cublasHgemmBatched_64", "cublasHSSgemvStridedBatched_64", "cublasHSSgemvStridedBatched", "cublasHSSgemvBatched_64", @@ -11673,7 +11680,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", - "cublasDgemmBatched_64", "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", @@ -11712,7 +11718,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", - "cublasCgemmBatched_64", "cublasCgemm3m_64", "cublasCgemm3mStridedBatched_64", "cublasCgemm3mStridedBatched", @@ -13226,7 +13231,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetrfBatched", "cublasZgeqrfBatched", "cublasZgemmStridedBatched_64", - "cublasZgemmBatched_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgelsBatched", @@ -13258,7 +13262,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgemmGroupedBatched", "cublasSgemmEx_64", "cublasSgemmEx", - "cublasSgemmBatched_64", "cublasSgelsBatched", "cublasSgeam_64", "cublasSetVector_64", @@ -13344,7 +13347,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIamaxEx_64", "cublasIamaxEx", "cublasHgemmStridedBatched_64", - "cublasHgemmBatched_64", "cublasGetVersion_v2", "cublasGetVersion", "cublasGetVector_64", @@ -13382,7 +13384,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", - "cublasDgemmBatched_64", "cublasDgelsBatched", "cublasDgeam_64", "cublasDdgmm_64", @@ -13422,7 +13423,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", - "cublasCgemmBatched_64", "cublasCgemm3m_64", "cublasCgemm3mStridedBatched_64", "cublasCgemm3mStridedBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 9ef081e8..8a6cc80e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1025,7 +1025,7 @@ |`cublasCgemm3mStridedBatched_64`|12.0| | | | | | | | | | |`cublasCgemm3m_64`|12.0| | | | | | | | | | |`cublasCgemmBatched`| | | | |`hipblasCgemmBatched_v2`|6.0.0| | | | | -|`cublasCgemmBatched_64`|12.0| | | | | | | | | | +|`cublasCgemmBatched_64`|12.0| | | |`hipblasCgemmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | |`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| @@ -1073,7 +1073,7 @@ |`cublasCtrsm_v2_64`|12.0| | | | | | | | | | |`cublasDgemm`| | | | |`hipblasDgemm`|1.8.2| | | | | |`cublasDgemmBatched`| | | | |`hipblasDgemmBatched`|1.8.2| | | | | -|`cublasDgemmBatched_64`|12.0| | | | | | | | | | +|`cublasDgemmBatched_64`|12.0| | | |`hipblasDgemmBatched_64`|6.3.0| | | |6.3.0| |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | | @@ -1119,13 +1119,13 @@ |`cublasHSSgemvStridedBatched_64`|12.0| | | | | | | | | | |`cublasHgemm`|7.5| | | |`hipblasHgemm`|1.8.2| | | | | |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | | -|`cublasHgemmBatched_64`|12.0| | | | | | | | | | +|`cublasHgemmBatched_64`|12.0| | | |`hipblasHgemmBatched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | |`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | | -|`cublasSgemmBatched_64`|12.0| | | | | | | | | | +|`cublasSgemmBatched_64`|12.0| | | |`hipblasSgemmBatched_64`|6.3.0| | | |6.3.0| |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | | @@ -1171,7 +1171,7 @@ |`cublasZgemm3m`|8.0| | | | | | | | | | |`cublasZgemm3m_64`|12.0| | | | | | | | | | |`cublasZgemmBatched`| | | | |`hipblasZgemmBatched_v2`|6.0.0| | | | | -|`cublasZgemmBatched_64`|12.0| | | | | | | | | | +|`cublasZgemmBatched_64`|12.0| | | |`hipblasZgemmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | |`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 067167e6..eacf573f 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1025,7 +1025,7 @@ |`cublasCgemm3mStridedBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgemm3m_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgemmBatched`| | | | |`hipblasCgemmBatched_v2`|6.0.0| | | | |`rocblas_cgemm_batched`|3.5.0| | | | | -|`cublasCgemmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemmBatched_64`|12.0| | | |`hipblasCgemmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| @@ -1073,7 +1073,7 @@ |`cublasCtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`hipblasDgemmBatched`|1.8.2| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | -|`cublasDgemmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemmBatched_64`|12.0| | | |`hipblasDgemmBatched_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | @@ -1119,13 +1119,13 @@ |`cublasHSSgemvStridedBatched_64`|12.0| | | | | | | | | |`rocblas_hssgemv_strided_batched_64`|6.2.0| | | | | |`cublasHgemm`|7.5| | | |`hipblasHgemm`|1.8.2| | | | |`rocblas_hgemm`|1.5.0| | | | | |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | |`rocblas_hgemm_batched`|3.5.0| | | | | -|`cublasHgemmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHgemmBatched_64`|12.0| | | |`hipblasHgemmBatched_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | -|`cublasSgemmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemmBatched_64`|12.0| | | |`hipblasSgemmBatched_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | @@ -1171,7 +1171,7 @@ |`cublasZgemm3m`|8.0| | | | | | | | | | | | | | | | |`cublasZgemm3m_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgemmBatched`| | | | |`hipblasZgemmBatched_v2`|6.0.0| | | | |`rocblas_zgemm_batched`|3.5.0| | | | | -|`cublasZgemmBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemmBatched_64`|12.0| | | |`hipblasZgemmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 1db1997e..20cbb3db 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1025,7 +1025,7 @@ |`cublasCgemm3mStridedBatched_64`|12.0| | | | | | | | | | |`cublasCgemm3m_64`|12.0| | | | | | | | | | |`cublasCgemmBatched`| | | | |`rocblas_cgemm_batched`|3.5.0| | | | | -|`cublasCgemmBatched_64`|12.0| | | | | | | | | | +|`cublasCgemmBatched_64`|12.0| | | |`rocblas_cgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | |`cublasCgemm_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| @@ -1073,7 +1073,7 @@ |`cublasCtrsm_v2_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | -|`cublasDgemmBatched_64`|12.0| | | | | | | | | | +|`cublasDgemmBatched_64`|12.0| | | |`rocblas_dgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | @@ -1119,13 +1119,13 @@ |`cublasHSSgemvStridedBatched_64`|12.0| | | |`rocblas_hssgemv_strided_batched_64`|6.2.0| | | | | |`cublasHgemm`|7.5| | | |`rocblas_hgemm`|1.5.0| | | | | |`cublasHgemmBatched`|9.0| | | |`rocblas_hgemm_batched`|3.5.0| | | | | -|`cublasHgemmBatched_64`|12.0| | | | | | | | | | +|`cublasHgemmBatched_64`|12.0| | | |`rocblas_hgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | |`cublasHgemm_64`|12.0| | | |`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | -|`cublasSgemmBatched_64`|12.0| | | | | | | | | | +|`cublasSgemmBatched_64`|12.0| | | |`rocblas_sgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | @@ -1171,7 +1171,7 @@ |`cublasZgemm3m`|8.0| | | | | | | | | | |`cublasZgemm3m_64`|12.0| | | | | | | | | | |`cublasZgemmBatched`| | | | |`rocblas_zgemm_batched`|3.5.0| | | | | -|`cublasZgemmBatched_64`|12.0| | | | | | | | | | +|`cublasZgemmBatched_64`|12.0| | | |`rocblas_zgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | |`cublasZgemm_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 5dfc5338..a8fc6cff 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -411,25 +411,25 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // BATCH GEMM {"cublasSgemmBatched", {"hipblasSgemmBatched", "rocblas_sgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSgemmBatched_64", {"hipblasSgemmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemmBatched_64", {"hipblasSgemmBatched_64", "rocblas_sgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasSgemmGroupedBatched", {"hipblasSgemmGroupedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasSgemmGroupedBatched_64", {"hipblasSgemmGroupedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasDgemmBatched", {"hipblasDgemmBatched", "rocblas_dgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDgemmBatched_64", {"hipblasDgemmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemmBatched_64", {"hipblasDgemmBatched_64", "rocblas_dgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDgemmGroupedBatched", {"hipblasDgemmGroupedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasDgemmGroupedBatched_64", {"hipblasDgemmGroupedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasHgemmBatched", {"hipblasHgemmBatched", "rocblas_hgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasHgemmBatched_64", {"hipblasHgemmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHgemmBatched_64", {"hipblasHgemmBatched_64", "rocblas_hgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasSgemmStridedBatched", {"hipblasSgemmStridedBatched", "rocblas_sgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasSgemmStridedBatched_64", {"hipblasSgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasDgemmStridedBatched", {"hipblasDgemmStridedBatched", "rocblas_dgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDgemmStridedBatched_64", {"hipblasDgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemmBatched", {"hipblasCgemmBatched_v2", "rocblas_cgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemmBatched_64", {"hipblasCgemmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemmBatched_64", {"hipblasCgemmBatched_v2_64", "rocblas_cgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm3mBatched", {"hipblasCgemm3mBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mBatched_64", {"hipblasCgemm3mBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemmBatched", {"hipblasZgemmBatched_v2", "rocblas_zgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemmBatched_64", {"hipblasZgemmBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemmBatched_64", {"hipblasZgemmBatched_v2_64", "rocblas_zgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemmStridedBatched", {"hipblasCgemmStridedBatched_v2", "rocblas_cgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemmStridedBatched_64", {"hipblasCgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mStridedBatched", {"hipblasCgemm3mStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -2028,6 +2028,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCgemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZgemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasHgemmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2421,6 +2426,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_hgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index d3fc1ad2..20369f55 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -244,6 +244,7 @@ int main() { float** fYarray = nullptr; const float** const fYarray_const = const_cast(fYarray); float** fCarray = nullptr; + const float** const fCarray_const = const_cast(fCarray); float** fTauarray = nullptr; double da = 0.0f; @@ -2855,6 +2856,31 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, const hipblasHalf* BP, int64_t ldb, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc); // CHECK: blasStatus = hipblasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const Aarray[], int64_t lda, const float* const Barray[], int64_t ldb, const float* beta, float* const Carray[], int64_t ldc, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const AP[], int64_t lda, const float* const BP[], int64_t ldb, const float* beta, float* const CP[], int64_t ldc, int64_t batchCount); + // CHECK: blasStatus = hipblasSgemmBatched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); + blasStatus = cublasSgemmBatched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const Aarray[], int64_t lda, const double* const Barray[], int64_t ldb, const double* beta, double* const Carray[], int64_t ldc, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const AP[], int64_t lda, const double* const BP[], int64_t ldb, const double* beta, double* const CP[], int64_t ldc, int64_t batchCount); + // CHECK: blasStatus = hipblasDgemmBatched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); + blasStatus = cublasDgemmBatched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* const Aarray[], int64_t lda, const cuComplex* const Barray[], int64_t ldb, const cuComplex* beta, cuComplex* const Carray[], int64_t ldc, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* const AP[], int64_t lda, const hipComplex* const BP[], int64_t ldb, const hipComplex* beta, hipComplex* const CP[], int64_t ldc, int64_t batchCount); + // CHECK: blasStatus = hipblasCgemmBatched_v2_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); + blasStatus = cublasCgemmBatched_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int64_t lda, const cuDoubleComplex* const Barray[], int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int64_t ldc, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* const AP[], int64_t lda, const hipDoubleComplex* const BP[], int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* const CP[], int64_t ldc, int64_t batchCount); + // CHECK: blasStatus = hipblasZgemmBatched_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); + blasStatus = cublasZgemmBatched_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* const Aarray[], int64_t lda, const __half* const Barray[], int64_t ldb, const __half* beta, __half* const Carray[], int64_t ldc, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* const AP[], int64_t lda, const hipblasHalf* const BP[], int64_t ldb, const hipblasHalf* beta, hipblasHalf* const CP[], int64_t ldc, int64_t batchCount); + // CHECK: blasStatus = hipblasHgemmBatched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); + blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 1bfd4810..c4425949 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -271,6 +271,7 @@ int main() { float** fYarray = nullptr; const float** const fYarray_const = const_cast(fYarray); float** fCarray = nullptr; + const float** const fCarray_const = const_cast(fCarray); float** fTauarray = nullptr; double da = 0; @@ -3061,6 +3062,31 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, const rocblas_half* B, int64_t ldb, const rocblas_half* beta, rocblas_half* C, int64_t ldc); // CHECK: blasStatus = rocblas_hgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const Aarray[], int64_t lda, const float* const Barray[], int64_t ldb, const float* beta, float* const Carray[], int64_t ldc, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const A[], int64_t lda, const float* const B[], int64_t ldb, const float* beta, float* const C[], int64_t ldc, int64_t batch_count); + // CHECK: blasStatus = rocblas_sgemm_batched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); + blasStatus = cublasSgemmBatched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const Aarray[], int64_t lda, const double* const Barray[], int64_t ldb, const double* beta, double* const Carray[], int64_t ldc, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const A[], int64_t lda, const double* const B[], int64_t ldb, const double* beta, double* const C[], int64_t ldc, int64_t batch_count); + // CHECK: blasStatus = rocblas_dgemm_batched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); + blasStatus = cublasDgemmBatched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* const Aarray[], int64_t lda, const cuComplex* const Barray[], int64_t ldb, const cuComplex* beta, cuComplex* const Carray[], int64_t ldc, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], int64_t lda, const rocblas_float_complex* const B[], int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], int64_t ldc, int64_t batch_count); + // CHECK: blasStatus = rocblas_cgemm_batched_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); + blasStatus = cublasCgemmBatched_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int64_t lda, const cuDoubleComplex* const Barray[], int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int64_t ldc, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], int64_t lda, const rocblas_double_complex* const B[], int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], int64_t ldc, int64_t batch_count); + // CHECK: blasStatus = rocblas_zgemm_batched_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); + blasStatus = cublasZgemmBatched_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* const Aarray[], int64_t lda, const __half* const Barray[], int64_t ldb, const __half* beta, __half* const Carray[], int64_t ldc, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* const A[], int64_t lda, const rocblas_half* const B[], int64_t ldb, const rocblas_half* beta, rocblas_half* const C[], int64_t ldc, int64_t batch_count); + // CHECK: blasStatus = rocblas_hgemm_batched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); + blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); #endif return 0; From ca0f2e1c36c6cf309be9b548e7f4fb81f8a0b36c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 18 Oct 2024 15:29:58 +0100 Subject: [PATCH 09/51] [HIPIFY][tests][fix][BLAS] Fix `data loss` warnings and integral types mismatch in function arguments - Step 1 - BLAS --- .../synthetic/libraries/cublas2hipblas_v2.cu | 58 +++++++++--------- .../synthetic/libraries/cublas2rocblas_v2.cu | 59 ++++++++++--------- 2 files changed, 60 insertions(+), 57 deletions(-) diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 20369f55..2b29009f 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -154,7 +154,9 @@ int main() { int lda = 0; int64_t lda_64 = 0; int ldb = 0; + int64_t ldb_64 = 0; int ldc = 0; + int64_t ldc_64 = 0; int res = 0; int64_t res_64 = 0; int incx = 0; @@ -2826,61 +2828,61 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* BP, int64_t ldb, const float* beta, float* CP, int64_t ldc); - // CHECK: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // CHECK-NEXT: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - blasStatus = cublasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* BP, int64_t ldb, const double* beta, double* CP, int64_t ldc); - // CHECK: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // CHECK-NEXT: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - blasStatus = cublasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemm_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); - // CHECK: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // CHECK-NEXT: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - blasStatus = cublasCgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemm_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); - // CHECK: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // CHECK-NEXT: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - blasStatus = cublasZgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, const __half* B, int64_t ldb, const __half* beta, __half* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, const hipblasHalf* BP, int64_t ldb, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc); - // CHECK: blasStatus = hipblasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); - blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + // CHECK: blasStatus = hipblasHgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, hB, ldb_64, hb, hC, ldc_64); + blasStatus = cublasHgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, hB, ldb_64, hb, hC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const Aarray[], int64_t lda, const float* const Barray[], int64_t ldb, const float* beta, float* const Carray[], int64_t ldc, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const AP[], int64_t lda, const float* const BP[], int64_t ldb, const float* beta, float* const CP[], int64_t ldc, int64_t batchCount); - // CHECK: blasStatus = hipblasSgemmBatched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); - blasStatus = cublasSgemmBatched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); + // CHECK: blasStatus = hipblasSgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, fAarray_const, lda_64, fBarray_const, ldb_64, &fb, fCarray, ldc_64, batchCount_64); + blasStatus = cublasSgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, fAarray_const, lda_64, fBarray_const, ldb_64, &fb, fCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const Aarray[], int64_t lda, const double* const Barray[], int64_t ldb, const double* beta, double* const Carray[], int64_t ldc, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const AP[], int64_t lda, const double* const BP[], int64_t ldb, const double* beta, double* const CP[], int64_t ldc, int64_t batchCount); - // CHECK: blasStatus = hipblasDgemmBatched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); - blasStatus = cublasDgemmBatched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); + // CHECK: blasStatus = hipblasDgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, dAarray_const, lda_64, dBarray_const, ldb_64, &db, dCarray, ldc_64, batchCount_64); + blasStatus = cublasDgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, dAarray_const, lda_64, dBarray_const, ldb_64, &db, dCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* const Aarray[], int64_t lda, const cuComplex* const Barray[], int64_t ldb, const cuComplex* beta, cuComplex* const Carray[], int64_t ldc, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* const AP[], int64_t lda, const hipComplex* const BP[], int64_t ldb, const hipComplex* beta, hipComplex* const CP[], int64_t ldc, int64_t batchCount); - // CHECK: blasStatus = hipblasCgemmBatched_v2_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); - blasStatus = cublasCgemmBatched_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); + // CHECK: blasStatus = hipblasCgemmBatched_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, complexAarray_const, lda_64, complexBarray_const, ldb_64, &complexb, complexCarray, ldc_64, batchCount_64); + blasStatus = cublasCgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, complexAarray_const, lda_64, complexBarray_const, ldb_64, &complexb, complexCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int64_t lda, const cuDoubleComplex* const Barray[], int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int64_t ldc, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* const AP[], int64_t lda, const hipDoubleComplex* const BP[], int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* const CP[], int64_t ldc, int64_t batchCount); - // CHECK: blasStatus = hipblasZgemmBatched_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); - blasStatus = cublasZgemmBatched_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); + // CHECK: blasStatus = hipblasZgemmBatched_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray_const, ldb_64, &dcomplexb, dcomplexCarray, ldc_64, batchCount_64); + blasStatus = cublasZgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray_const, ldb_64, &dcomplexb, dcomplexCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* const Aarray[], int64_t lda, const __half* const Barray[], int64_t ldb, const __half* beta, __half* const Carray[], int64_t ldc, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* const AP[], int64_t lda, const hipblasHalf* const BP[], int64_t ldb, const hipblasHalf* beta, hipblasHalf* const CP[], int64_t ldc, int64_t batchCount); - // CHECK: blasStatus = hipblasHgemmBatched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); - blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); + // CHECK: blasStatus = hipblasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); + blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index c4425949..dd48b9c9 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -157,9 +157,10 @@ int main() { int num = 0; int lda = 0; int64_t lda_64 = 0; - int64_t ldb_64 = 0; int ldb = 0; + int64_t ldb_64 = 0; int ldc = 0; + int64_t ldc_64 = 0; int res = 0; int64_t res_64 = 0; int incx = 0; @@ -3032,61 +3033,61 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); - // CHECK: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // CHECK-NEXT: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - blasStatus = cublasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); - // CHECK: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // CHECK-NEXT: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - blasStatus = cublasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); - // CHECK: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // CHECK-NEXT: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - blasStatus = cublasCgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); - // CHECK: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // CHECK-NEXT: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - blasStatus = cublasZgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, const __half* B, int64_t ldb, const __half* beta, __half* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, const rocblas_half* B, int64_t ldb, const rocblas_half* beta, rocblas_half* C, int64_t ldc); - // CHECK: blasStatus = rocblas_hgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); - blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + // CHECK: blasStatus = rocblas_hgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, hB, ldb_64, hb, hC, ldc_64); + blasStatus = cublasHgemm_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, hB, ldb_64, hb, hC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const Aarray[], int64_t lda, const float* const Barray[], int64_t ldb, const float* beta, float* const Carray[], int64_t ldc, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* const A[], int64_t lda, const float* const B[], int64_t ldb, const float* beta, float* const C[], int64_t ldc, int64_t batch_count); - // CHECK: blasStatus = rocblas_sgemm_batched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); - blasStatus = cublasSgemmBatched_64(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount_64); + // CHECK: blasStatus = rocblas_sgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, fAarray_const, lda_64, fBarray_const, ldb_64, &fb, fCarray, ldc_64, batchCount_64); + blasStatus = cublasSgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, fAarray_const, lda_64, fBarray_const, ldb_64, &fb, fCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const Aarray[], int64_t lda, const double* const Barray[], int64_t ldb, const double* beta, double* const Carray[], int64_t ldc, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* const A[], int64_t lda, const double* const B[], int64_t ldb, const double* beta, double* const C[], int64_t ldc, int64_t batch_count); - // CHECK: blasStatus = rocblas_dgemm_batched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); - blasStatus = cublasDgemmBatched_64(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount_64); + // CHECK: blasStatus = rocblas_dgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, dAarray_const, lda_64, dBarray_const, ldb_64, &db, dCarray, ldc_64, batchCount_64); + blasStatus = cublasDgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, dAarray_const, lda_64, dBarray_const, ldb_64, &db, dCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* const Aarray[], int64_t lda, const cuComplex* const Barray[], int64_t ldb, const cuComplex* beta, cuComplex* const Carray[], int64_t ldc, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], int64_t lda, const rocblas_float_complex* const B[], int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], int64_t ldc, int64_t batch_count); - // CHECK: blasStatus = rocblas_cgemm_batched_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); - blasStatus = cublasCgemmBatched_64(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount_64); + // CHECK: blasStatus = rocblas_cgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, complexAarray_const, lda_64, complexBarray_const, ldb_64, &complexb, complexCarray, ldc_64, batchCount_64); + blasStatus = cublasCgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, complexAarray_const, lda_64, complexBarray_const, ldb_64, &complexb, complexCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int64_t lda, const cuDoubleComplex* const Barray[], int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int64_t ldc, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], int64_t lda, const rocblas_double_complex* const B[], int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], int64_t ldc, int64_t batch_count); - // CHECK: blasStatus = rocblas_zgemm_batched_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); - blasStatus = cublasZgemmBatched_64(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount_64); + // CHECK: blasStatus = rocblas_zgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray_const, ldb_64, &dcomplexb, dcomplexCarray, ldc_64, batchCount_64); + blasStatus = cublasZgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray_const, ldb_64, &dcomplexb, dcomplexCarray, ldc_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* const Aarray[], int64_t lda, const __half* const Barray[], int64_t ldb, const __half* beta, __half* const Carray[], int64_t ldc, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* const A[], int64_t lda, const rocblas_half* const B[], int64_t ldb, const rocblas_half* beta, rocblas_half* const C[], int64_t ldc, int64_t batch_count); - // CHECK: blasStatus = rocblas_hgemm_batched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); - blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount_64); + // CHECK: blasStatus = rocblas_hgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); + blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); #endif return 0; From c634dd0db875295a1b8773483dbd00990b969458 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 18 Oct 2024 15:41:51 +0100 Subject: [PATCH 10/51] [HIPIFY][RAND] Formatting --- src/CUDA2HIP_RAND_API_functions.cpp | 100 +++---- src/CUDA2HIP_RAND_API_types.cpp | 430 ++++++++++++++-------------- 2 files changed, 265 insertions(+), 265 deletions(-) diff --git a/src/CUDA2HIP_RAND_API_functions.cpp b/src/CUDA2HIP_RAND_API_functions.cpp index 95599386..4d1647cb 100644 --- a/src/CUDA2HIP_RAND_API_functions.cpp +++ b/src/CUDA2HIP_RAND_API_functions.cpp @@ -87,59 +87,59 @@ const std::map CUDA_RAND_FUNCTION_MAP { }; const std::map CUDA_RAND_FUNCTION_VER_MAP { - {"curandGetProperty", {CUDA_80, CUDA_0, CUDA_0 }}, - {"__curand_umul", {CUDA_115, CUDA_0, CUDA_0 }}, + {"curandGetProperty", {CUDA_80, CUDA_0, CUDA_0, }}, + {"__curand_umul", {CUDA_115, CUDA_0, CUDA_0, }}, }; const std::map HIP_RAND_FUNCTION_VER_MAP { - {"hiprandCreateGenerator", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandCreateGeneratorHost", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandCreatePoissonDistribution", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandDestroyDistribution", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandDestroyGenerator", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerate", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateLogNormal", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateLogNormalDouble", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateNormal", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateNormalDouble", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGeneratePoisson", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateSeeds", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateUniform", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerateUniformDouble", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGetVersion", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandSetGeneratorOffset", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandSetPseudoRandomGeneratorSeed", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandSetQuasiRandomGeneratorDimensions", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandSetStream", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandMakeMTGP32Constants", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandMakeMTGP32KernelState", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_init", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_log_normal", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_log_normal_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_log_normal2", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_log_normal2_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_log_normal4", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_log_normal4_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_normal", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_normal_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_normal2", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_normal2_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_normal4", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_normal4_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_uniform", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_uniform_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_uniform2_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_uniform4", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_uniform4_double", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_discrete", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_discrete4", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_poisson", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprand_poisson4", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGetDirectionVectors32", {HIP_6000, HIP_0, HIP_0 }}, - {"hiprandGetDirectionVectors64", {HIP_6000, HIP_0, HIP_0 }}, - {"hiprandGetScrambleConstants32", {HIP_6000, HIP_0, HIP_0 }}, - {"hiprandGetScrambleConstants64", {HIP_6000, HIP_0, HIP_0 }}, + {"hiprandCreateGenerator", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandCreateGeneratorHost", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandCreatePoissonDistribution", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandDestroyDistribution", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandDestroyGenerator", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerate", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateLogNormal", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateLogNormalDouble", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateNormal", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateNormalDouble", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGeneratePoisson", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateSeeds", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateUniform", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerateUniformDouble", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGetVersion", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandSetGeneratorOffset", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandSetPseudoRandomGeneratorSeed", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandSetQuasiRandomGeneratorDimensions", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandSetStream", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandMakeMTGP32Constants", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandMakeMTGP32KernelState", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_init", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_log_normal", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_log_normal_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_log_normal2", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_log_normal2_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_log_normal4", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_log_normal4_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_normal", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_normal_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_normal2", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_normal2_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_normal4", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_normal4_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_uniform", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_uniform_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_uniform2_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_uniform4", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_uniform4_double", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_discrete", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_discrete4", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_poisson", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprand_poisson4", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGetDirectionVectors32", {HIP_6000, HIP_0, HIP_0, }}, + {"hiprandGetDirectionVectors64", {HIP_6000, HIP_0, HIP_0, }}, + {"hiprandGetScrambleConstants32", {HIP_6000, HIP_0, HIP_0, }}, + {"hiprandGetScrambleConstants64", {HIP_6000, HIP_0, HIP_0, }}, {"hiprandSetGeneratorOrdering", {HIP_6020, HIP_0, HIP_0, }}, {"hiprandGenerateLongLong", {HIP_5050, HIP_0, HIP_0, }}, diff --git a/src/CUDA2HIP_RAND_API_types.cpp b/src/CUDA2HIP_RAND_API_types.cpp index 0184a0a6..b236fee1 100644 --- a/src/CUDA2HIP_RAND_API_types.cpp +++ b/src/CUDA2HIP_RAND_API_types.cpp @@ -25,242 +25,242 @@ THE SOFTWARE. // Map of all functions const std::map CUDA_RAND_TYPE_NAME_MAP { // RAND Host types - {"curandStatus", {"hiprandStatus", "rocrand_status", CONV_TYPE, API_RAND, 1}}, - {"curandStatus_t", {"hiprandStatus_t", "rocrand_status", CONV_TYPE, API_RAND, 1}}, + {"curandStatus", {"hiprandStatus", "rocrand_status", CONV_TYPE, API_RAND, 1}}, + {"curandStatus_t", {"hiprandStatus_t", "rocrand_status", CONV_TYPE, API_RAND, 1}}, // RAND function call status types (enum curandStatus) - {"CURAND_STATUS_SUCCESS", {"HIPRAND_STATUS_SUCCESS", "ROCRAND_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_VERSION_MISMATCH", {"HIPRAND_STATUS_VERSION_MISMATCH", "ROCRAND_STATUS_VERSION_MISMATCH", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_NOT_INITIALIZED", {"HIPRAND_STATUS_NOT_INITIALIZED", "ROCRAND_STATUS_NOT_CREATED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_ALLOCATION_FAILED", {"HIPRAND_STATUS_ALLOCATION_FAILED", "ROCRAND_STATUS_ALLOCATION_FAILED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_TYPE_ERROR", {"HIPRAND_STATUS_TYPE_ERROR", "ROCRAND_STATUS_TYPE_ERROR", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_OUT_OF_RANGE", {"HIPRAND_STATUS_OUT_OF_RANGE", "ROCRAND_STATUS_OUT_OF_RANGE", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_LENGTH_NOT_MULTIPLE", {"HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", "ROCRAND_STATUS_LENGTH_NOT_MULTIPLE", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_DOUBLE_PRECISION_REQUIRED", {"HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", "ROCRAND_STATUS_DOUBLE_PRECISION_REQUIRED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_LAUNCH_FAILURE", {"HIPRAND_STATUS_LAUNCH_FAILURE", "ROCRAND_STATUS_LAUNCH_FAILURE", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_STATUS_PREEXISTING_FAILURE", {"HIPRAND_STATUS_PREEXISTING_FAILURE", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, - {"CURAND_STATUS_INITIALIZATION_FAILED", {"HIPRAND_STATUS_INITIALIZATION_FAILED", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, - {"CURAND_STATUS_ARCH_MISMATCH", {"HIPRAND_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, - {"CURAND_STATUS_INTERNAL_ERROR", {"HIPRAND_STATUS_INTERNAL_ERROR", "ROCRAND_STATUS_INTERNAL_ERROR", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_SUCCESS", {"HIPRAND_STATUS_SUCCESS", "ROCRAND_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_VERSION_MISMATCH", {"HIPRAND_STATUS_VERSION_MISMATCH", "ROCRAND_STATUS_VERSION_MISMATCH", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_NOT_INITIALIZED", {"HIPRAND_STATUS_NOT_INITIALIZED", "ROCRAND_STATUS_NOT_CREATED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_ALLOCATION_FAILED", {"HIPRAND_STATUS_ALLOCATION_FAILED", "ROCRAND_STATUS_ALLOCATION_FAILED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_TYPE_ERROR", {"HIPRAND_STATUS_TYPE_ERROR", "ROCRAND_STATUS_TYPE_ERROR", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_OUT_OF_RANGE", {"HIPRAND_STATUS_OUT_OF_RANGE", "ROCRAND_STATUS_OUT_OF_RANGE", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_LENGTH_NOT_MULTIPLE", {"HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", "ROCRAND_STATUS_LENGTH_NOT_MULTIPLE", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_DOUBLE_PRECISION_REQUIRED", {"HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", "ROCRAND_STATUS_DOUBLE_PRECISION_REQUIRED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_LAUNCH_FAILURE", {"HIPRAND_STATUS_LAUNCH_FAILURE", "ROCRAND_STATUS_LAUNCH_FAILURE", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_STATUS_PREEXISTING_FAILURE", {"HIPRAND_STATUS_PREEXISTING_FAILURE", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, + {"CURAND_STATUS_INITIALIZATION_FAILED", {"HIPRAND_STATUS_INITIALIZATION_FAILED", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, + {"CURAND_STATUS_ARCH_MISMATCH", {"HIPRAND_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, + {"CURAND_STATUS_INTERNAL_ERROR", {"HIPRAND_STATUS_INTERNAL_ERROR", "ROCRAND_STATUS_INTERNAL_ERROR", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"curandRngType", {"hiprandRngType_t", "rocrand_rng_type", CONV_TYPE, API_RAND, 1}}, - {"curandRngType_t", {"hiprandRngType_t", "rocrand_rng_type", CONV_TYPE, API_RAND, 1}}, + {"curandRngType", {"hiprandRngType_t", "rocrand_rng_type", CONV_TYPE, API_RAND, 1}}, + {"curandRngType_t", {"hiprandRngType_t", "rocrand_rng_type", CONV_TYPE, API_RAND, 1}}, // RAND generator types (enum curandRngType) - {"CURAND_RNG_TEST", {"HIPRAND_RNG_TEST", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, - {"CURAND_RNG_PSEUDO_DEFAULT", {"HIPRAND_RNG_PSEUDO_DEFAULT", "ROCRAND_RNG_PSEUDO_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_PSEUDO_XORWOW", {"HIPRAND_RNG_PSEUDO_XORWOW", "ROCRAND_RNG_PSEUDO_XORWOW", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_PSEUDO_MRG32K3A", {"HIPRAND_RNG_PSEUDO_MRG32K3A", "ROCRAND_RNG_PSEUDO_MRG32K3A", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_PSEUDO_MTGP32", {"HIPRAND_RNG_PSEUDO_MTGP32", "ROCRAND_RNG_PSEUDO_MTGP32", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_PSEUDO_MT19937", {"HIPRAND_RNG_PSEUDO_MT19937", "ROCRAND_RNG_PSEUDO_MT19937", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_PSEUDO_PHILOX4_32_10", {"HIPRAND_RNG_PSEUDO_PHILOX4_32_10", "ROCRAND_RNG_PSEUDO_PHILOX4_32_10", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_QUASI_DEFAULT", {"HIPRAND_RNG_QUASI_DEFAULT", "ROCRAND_RNG_QUASI_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_QUASI_SOBOL32", {"HIPRAND_RNG_QUASI_SOBOL32", "ROCRAND_RNG_QUASI_SOBOL32", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL32", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", "ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_QUASI_SOBOL64", {"HIPRAND_RNG_QUASI_SOBOL64", "ROCRAND_RNG_QUASI_SOBOL64", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL64", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", "ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_TEST", {"HIPRAND_RNG_TEST", "", CONV_NUMERIC_LITERAL, API_RAND, 1, ROC_UNSUPPORTED}}, + {"CURAND_RNG_PSEUDO_DEFAULT", {"HIPRAND_RNG_PSEUDO_DEFAULT", "ROCRAND_RNG_PSEUDO_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_PSEUDO_XORWOW", {"HIPRAND_RNG_PSEUDO_XORWOW", "ROCRAND_RNG_PSEUDO_XORWOW", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_PSEUDO_MRG32K3A", {"HIPRAND_RNG_PSEUDO_MRG32K3A", "ROCRAND_RNG_PSEUDO_MRG32K3A", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_PSEUDO_MTGP32", {"HIPRAND_RNG_PSEUDO_MTGP32", "ROCRAND_RNG_PSEUDO_MTGP32", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_PSEUDO_MT19937", {"HIPRAND_RNG_PSEUDO_MT19937", "ROCRAND_RNG_PSEUDO_MT19937", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_PSEUDO_PHILOX4_32_10", {"HIPRAND_RNG_PSEUDO_PHILOX4_32_10", "ROCRAND_RNG_PSEUDO_PHILOX4_32_10", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_QUASI_DEFAULT", {"HIPRAND_RNG_QUASI_DEFAULT", "ROCRAND_RNG_QUASI_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_QUASI_SOBOL32", {"HIPRAND_RNG_QUASI_SOBOL32", "ROCRAND_RNG_QUASI_SOBOL32", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL32", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", "ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_QUASI_SOBOL64", {"HIPRAND_RNG_QUASI_SOBOL64", "ROCRAND_RNG_QUASI_SOBOL64", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL64", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", "ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"curandOrdering", {"hiprandOrdering", "rocrand_ordering", CONV_TYPE, API_RAND, 1}}, - {"curandOrdering_t", {"hiprandOrdering_t", "rocrand_ordering", CONV_TYPE, API_RAND, 1}}, + {"curandOrdering", {"hiprandOrdering", "rocrand_ordering", CONV_TYPE, API_RAND, 1}}, + {"curandOrdering_t", {"hiprandOrdering_t", "rocrand_ordering", CONV_TYPE, API_RAND, 1}}, // RAND ordering of results in memory (enum curandOrdering) - {"CURAND_ORDERING_PSEUDO_BEST", {"HIPRAND_ORDERING_PSEUDO_BEST", "ROCRAND_ORDERING_PSEUDO_BEST", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_ORDERING_PSEUDO_DEFAULT", {"HIPRAND_ORDERING_PSEUDO_DEFAULT", "ROCRAND_ORDERING_PSEUDO_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_ORDERING_PSEUDO_SEEDED", {"HIPRAND_ORDERING_PSEUDO_SEEDED", "ROCRAND_ORDERING_PSEUDO_SEEDED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_ORDERING_PSEUDO_LEGACY", {"HIPRAND_ORDERING_PSEUDO_LEGACY", "ROCRAND_ORDERING_PSEUDO_LEGACY", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_ORDERING_PSEUDO_DYNAMIC", {"HIPRAND_ORDERING_PSEUDO_DYNAMIC", "ROCRAND_ORDERING_PSEUDO_DYNAMIC", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_ORDERING_QUASI_DEFAULT", {"HIPRAND_ORDERING_QUASI_DEFAULT", "ROCRAND_ORDERING_QUASI_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_ORDERING_PSEUDO_BEST", {"HIPRAND_ORDERING_PSEUDO_BEST", "ROCRAND_ORDERING_PSEUDO_BEST", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_ORDERING_PSEUDO_DEFAULT", {"HIPRAND_ORDERING_PSEUDO_DEFAULT", "ROCRAND_ORDERING_PSEUDO_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_ORDERING_PSEUDO_SEEDED", {"HIPRAND_ORDERING_PSEUDO_SEEDED", "ROCRAND_ORDERING_PSEUDO_SEEDED", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_ORDERING_PSEUDO_LEGACY", {"HIPRAND_ORDERING_PSEUDO_LEGACY", "ROCRAND_ORDERING_PSEUDO_LEGACY", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_ORDERING_PSEUDO_DYNAMIC", {"HIPRAND_ORDERING_PSEUDO_DYNAMIC", "ROCRAND_ORDERING_PSEUDO_DYNAMIC", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_ORDERING_QUASI_DEFAULT", {"HIPRAND_ORDERING_QUASI_DEFAULT", "ROCRAND_ORDERING_QUASI_DEFAULT", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"curandDirectionVectorSet", {"hiprandDirectionVectorSet_t", "rocrand_direction_vector_set", CONV_TYPE, API_RAND, 1}}, - {"curandDirectionVectorSet_t", {"hiprandDirectionVectorSet_t", "rocrand_direction_vector_set", CONV_TYPE, API_RAND, 1}}, + {"curandDirectionVectorSet", {"hiprandDirectionVectorSet_t", "rocrand_direction_vector_set", CONV_TYPE, API_RAND, 1}}, + {"curandDirectionVectorSet_t", {"hiprandDirectionVectorSet_t", "rocrand_direction_vector_set", CONV_TYPE, API_RAND, 1}}, // RAND choice of direction vector set (enum curandDirectionVectorSet) - {"CURAND_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", "ROCRAND_DIRECTION_VECTORS_32_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", "ROCRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", "ROCRAND_DIRECTION_VECTORS_64_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", "ROCRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", "ROCRAND_DIRECTION_VECTORS_32_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", "ROCRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", "ROCRAND_DIRECTION_VECTORS_64_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, + {"CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", "ROCRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", CONV_NUMERIC_LITERAL, API_RAND, 1}}, - {"curandGenerator_st", {"hiprandGenerator_st", "rocrand_generator_base_type", CONV_TYPE, API_RAND, 1}}, - {"curandGenerator_t", {"hiprandGenerator_t", "rocrand_generator", CONV_TYPE, API_RAND, 1}}, + {"curandGenerator_st", {"hiprandGenerator_st", "rocrand_generator_base_type", CONV_TYPE, API_RAND, 1}}, + {"curandGenerator_t", {"hiprandGenerator_t", "rocrand_generator", CONV_TYPE, API_RAND, 1}}, - {"curandDistribution_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistribution_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistribution_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistribution_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2V_st", {"hiprandHistogramM2V_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2V_t", {"hiprandHistogramM2V_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2V_st", {"hiprandHistogramM2V_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2V_t", {"hiprandHistogramM2V_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistributionShift_st", {"hiprandDistributionShift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistributionShift_t", {"hiprandDistributionShift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistributionShift_st", {"hiprandDistributionShift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistributionShift_t", {"hiprandDistributionShift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistributionM2Shift_st", {"hiprandDistributionM2Shift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistributionM2Shift_t", {"hiprandDistributionM2Shift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2_st", {"hiprandHistogramM2_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2_t", {"hiprandHistogramM2_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2K_st", {"hiprandHistogramM2K_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2K_t", {"hiprandHistogramM2K_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDiscreteDistribution_st", {"hiprandDiscreteDistribution_st", "rocrand_discrete_distribution_st", CONV_TYPE, API_RAND, 1}}, - {"curandDiscreteDistribution_t", {"hiprandDiscreteDistribution_t", "rocrand_discrete_distribution", CONV_TYPE, API_RAND, 1}}, - {"curandMethod", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandMethod_t", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDirectionVectors32_t", {"hiprandDirectionVectors32_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, - {"curandDirectionVectors64_t", {"hiprandDirectionVectors64_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandDistributionM2Shift_st", {"hiprandDistributionM2Shift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistributionM2Shift_t", {"hiprandDistributionM2Shift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2_st", {"hiprandHistogramM2_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2_t", {"hiprandHistogramM2_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2K_st", {"hiprandHistogramM2K_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2K_t", {"hiprandHistogramM2K_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDiscreteDistribution_st", {"hiprandDiscreteDistribution_st", "rocrand_discrete_distribution_st", CONV_TYPE, API_RAND, 1}}, + {"curandDiscreteDistribution_t", {"hiprandDiscreteDistribution_t", "rocrand_discrete_distribution", CONV_TYPE, API_RAND, 1}}, + {"curandMethod", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandMethod_t", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDirectionVectors32_t", {"hiprandDirectionVectors32_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandDirectionVectors64_t", {"hiprandDirectionVectors64_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, // RAND types for Device functions - {"curandStateMtgp32", {"hiprandStateMtgp32", "rocrand_device::mtgp32_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStateMtgp32_t", {"hiprandStateMtgp32_t", "rocrand_state_mtgp32", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol64", {"hiprandStateScrambledSobol64", "rocrand_device::scrambled_sobol64_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol64_t", {"hiprandStateScrambledSobol64_t", "rocrand_state_scrambled_sobol64", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol64", {"hiprandStateSobol64", "rocrand_device::sobol64_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol64_t", {"hiprandStateSobol64_t", "rocrand_state_sobol64", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol32", {"hiprandStateScrambledSobol32", "rocrand_device::scrambled_sobol32_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol32_t", {"hiprandStateScrambledSobol32_t", "rocrand_state_scrambled_sobol32", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol32", {"hiprandStateSobol32", "rocrand_device::sobol32_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol32_t", {"hiprandStateSobol32_t", "rocrand_state_sobol32", CONV_TYPE, API_RAND, 1}}, - {"curandStateMRG32k3a", {"hiprandStateMRG32k3a", "rocrand_device::mrg32k3a_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", "rocrand_state_mrg32k3a", CONV_TYPE, API_RAND, 1}}, - {"curandStatePhilox4_32_10", {"hiprandStatePhilox4_32_10", "rocrand_device::philox4x32_10_engine", CONV_TYPE, API_RAND, 1}}, - {"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", "rocrand_state_philox4x32_10", CONV_TYPE, API_RAND, 1}}, - {"curandStateXORWOW", {"hiprandStateXORWOW", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, - {"curandStateXORWOW_t", {"hiprandStateXORWOW_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, - {"curandState", {"hiprandState", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, - {"curandState_t", {"hiprandState_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandStateMtgp32", {"hiprandStateMtgp32", "rocrand_device::mtgp32_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateMtgp32_t", {"hiprandStateMtgp32_t", "rocrand_state_mtgp32", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol64", {"hiprandStateScrambledSobol64", "rocrand_device::scrambled_sobol64_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol64_t", {"hiprandStateScrambledSobol64_t", "rocrand_state_scrambled_sobol64", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol64", {"hiprandStateSobol64", "rocrand_device::sobol64_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol64_t", {"hiprandStateSobol64_t", "rocrand_state_sobol64", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol32", {"hiprandStateScrambledSobol32", "rocrand_device::scrambled_sobol32_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol32_t", {"hiprandStateScrambledSobol32_t", "rocrand_state_scrambled_sobol32", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol32", {"hiprandStateSobol32", "rocrand_device::sobol32_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol32_t", {"hiprandStateSobol32_t", "rocrand_state_sobol32", CONV_TYPE, API_RAND, 1}}, + {"curandStateMRG32k3a", {"hiprandStateMRG32k3a", "rocrand_device::mrg32k3a_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", "rocrand_state_mrg32k3a", CONV_TYPE, API_RAND, 1}}, + {"curandStatePhilox4_32_10", {"hiprandStatePhilox4_32_10", "rocrand_device::philox4x32_10_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", "rocrand_state_philox4x32_10", CONV_TYPE, API_RAND, 1}}, + {"curandStateXORWOW", {"hiprandStateXORWOW", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandStateXORWOW_t", {"hiprandStateXORWOW_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandState", {"hiprandState", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandState_t", {"hiprandState_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, // RAND method (enum curandMethod) - {"CURAND_CHOOSE_BEST", {"HIPRAND_CHOOSE_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_ITR", {"HIPRAND_ITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_KNUTH", {"HIPRAND_KNUTH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_HITR", {"HIPRAND_HITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_M1", {"HIPRAND_M1", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_M2", {"HIPRAND_M2", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_BINARY_SEARCH", {"HIPRAND_BINARY_SEARCH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_DISCRETE_GAUSS", {"HIPRAND_DISCRETE_GAUSS", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_REJECTION", {"HIPRAND_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_DEVICE_API", {"HIPRAND_DEVICE_API", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_FAST_REJECTION", {"HIPRAND_FAST_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_3RD", {"HIPRAND_3RD", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_DEFINITION", {"HIPRAND_DEFINITION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, - {"CURAND_POISSON", {"HIPRAND_POISSON", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_CHOOSE_BEST", {"HIPRAND_CHOOSE_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_ITR", {"HIPRAND_ITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_KNUTH", {"HIPRAND_KNUTH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_HITR", {"HIPRAND_HITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_M1", {"HIPRAND_M1", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_M2", {"HIPRAND_M2", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_BINARY_SEARCH", {"HIPRAND_BINARY_SEARCH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_DISCRETE_GAUSS", {"HIPRAND_DISCRETE_GAUSS", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_REJECTION", {"HIPRAND_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_DEVICE_API", {"HIPRAND_DEVICE_API", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_FAST_REJECTION", {"HIPRAND_FAST_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_3RD", {"HIPRAND_3RD", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_DEFINITION", {"HIPRAND_DEFINITION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_POISSON", {"HIPRAND_POISSON", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, }; const std::map CUDA_RAND_TYPE_NAME_VER_MAP { - {"CURAND_ORDERING_PSEUDO_LEGACY", {CUDA_110, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11001, CURAND_VERSION 10200, CURAND_VER_MAJOR 10 CURAND_VER_MINOR 2 CURAND_VER_PATCH 0 - {"CURAND_ORDERING_PSEUDO_DYNAMIC", {CUDA_115, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11052, CURAND_VERSION 10207, CURAND_VER_MAJOR 10 CURAND_VER_MINOR 2 CURAND_VER_PATCH 7 + {"CURAND_ORDERING_PSEUDO_LEGACY", {CUDA_110, CUDA_0, CUDA_0, }}, // A: CUDA_VERSION 11001, CURAND_VERSION 10200, CURAND_VER_MAJOR 10 CURAND_VER_MINOR 2 CURAND_VER_PATCH 0 + {"CURAND_ORDERING_PSEUDO_DYNAMIC", {CUDA_115, CUDA_0, CUDA_0, }}, // A: CUDA_VERSION 11052, CURAND_VERSION 10207, CURAND_VER_MAJOR 10 CURAND_VER_MINOR 2 CURAND_VER_PATCH 7 }; const std::map HIP_RAND_TYPE_NAME_VER_MAP { - {"hiprandStatus", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandStatus_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandRngType_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerator_st", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandGenerator_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandDiscreteDistribution_st", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandDiscreteDistribution_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandDirectionVectors32_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandStateMtgp32", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandStateMtgp32_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandStateSobol32", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandStateSobol32_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandStateMRG32k3a", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandStateMRG32k3a_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandStatePhilox4_32_10", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandStatePhilox4_32_10_t", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandStateXORWOW", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandStateXORWOW_t", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandState", {HIP_1080, HIP_0, HIP_0 }}, - {"hiprandState_t", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_SUCCESS", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_VERSION_MISMATCH", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_NOT_INITIALIZED", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_ALLOCATION_FAILED", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_TYPE_ERROR", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_OUT_OF_RANGE", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_LAUNCH_FAILURE", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_PREEXISTING_FAILURE", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_INITIALIZATION_FAILED", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_ARCH_MISMATCH", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_STATUS_INTERNAL_ERROR", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_TEST", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_PSEUDO_DEFAULT", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_PSEUDO_XORWOW", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_PSEUDO_MRG32K3A", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_PSEUDO_MTGP32", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_PSEUDO_MT19937", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_PSEUDO_PHILOX4_32_10", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_QUASI_DEFAULT", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_QUASI_SOBOL32", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_QUASI_SOBOL64", {HIP_1050, HIP_0, HIP_0 }}, - {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", {HIP_1050, HIP_0, HIP_0 }}, - {"hiprandDirectionVectorSet_t", {HIP_6000, HIP_0, HIP_0 }}, - {"HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", {HIP_6000, HIP_0, HIP_0 }}, - {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6",{HIP_6000, HIP_0, HIP_0 }}, - {"HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", {HIP_6000, HIP_0, HIP_0 }}, - {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6",{HIP_6000, HIP_0, HIP_0 }}, - {"hiprandDirectionVectors64_t", {HIP_6000, HIP_0, HIP_0 }}, - {"hiprandOrdering", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandOrdering_t", {HIP_6020, HIP_0, HIP_0, }}, - {"HIPRAND_ORDERING_PSEUDO_BEST", {HIP_6020, HIP_0, HIP_0, }}, - {"HIPRAND_ORDERING_PSEUDO_DEFAULT", {HIP_6020, HIP_0, HIP_0, }}, - {"HIPRAND_ORDERING_PSEUDO_SEEDED", {HIP_6020, HIP_0, HIP_0, }}, - {"HIPRAND_ORDERING_PSEUDO_LEGACY", {HIP_6020, HIP_0, HIP_0, }}, - {"HIPRAND_ORDERING_PSEUDO_DYNAMIC", {HIP_6020, HIP_0, HIP_0, }}, - {"HIPRAND_ORDERING_QUASI_DEFAULT", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandStateScrambledSobol32", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandStateScrambledSobol32_t", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandStateScrambledSobol64", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandStateScrambledSobol64_t", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandStateSobol64", {HIP_6020, HIP_0, HIP_0, }}, - {"hiprandStateSobol64_t", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStatus", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandStatus_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandRngType_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerator_st", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandGenerator_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandDiscreteDistribution_st", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandDiscreteDistribution_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandDirectionVectors32_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandStateMtgp32", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandStateMtgp32_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandStateSobol32", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandStateSobol32_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandStateMRG32k3a", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandStateMRG32k3a_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandStatePhilox4_32_10", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandStatePhilox4_32_10_t", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandStateXORWOW", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandStateXORWOW_t", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandState", {HIP_1080, HIP_0, HIP_0, }}, + {"hiprandState_t", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_SUCCESS", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_VERSION_MISMATCH", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_NOT_INITIALIZED", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_ALLOCATION_FAILED", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_TYPE_ERROR", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_OUT_OF_RANGE", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_LAUNCH_FAILURE", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_PREEXISTING_FAILURE", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_INITIALIZATION_FAILED", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_ARCH_MISMATCH", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_STATUS_INTERNAL_ERROR", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_TEST", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_PSEUDO_DEFAULT", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_PSEUDO_XORWOW", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_PSEUDO_MRG32K3A", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_PSEUDO_MTGP32", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_PSEUDO_MT19937", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_PSEUDO_PHILOX4_32_10", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_QUASI_DEFAULT", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_QUASI_SOBOL32", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_QUASI_SOBOL64", {HIP_1050, HIP_0, HIP_0, }}, + {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", {HIP_1050, HIP_0, HIP_0, }}, + {"hiprandDirectionVectorSet_t", {HIP_6000, HIP_0, HIP_0, }}, + {"HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"hiprandDirectionVectors64_t", {HIP_6000, HIP_0, HIP_0, }}, + {"hiprandOrdering", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandOrdering_t", {HIP_6020, HIP_0, HIP_0, }}, + {"HIPRAND_ORDERING_PSEUDO_BEST", {HIP_6020, HIP_0, HIP_0, }}, + {"HIPRAND_ORDERING_PSEUDO_DEFAULT", {HIP_6020, HIP_0, HIP_0, }}, + {"HIPRAND_ORDERING_PSEUDO_SEEDED", {HIP_6020, HIP_0, HIP_0, }}, + {"HIPRAND_ORDERING_PSEUDO_LEGACY", {HIP_6020, HIP_0, HIP_0, }}, + {"HIPRAND_ORDERING_PSEUDO_DYNAMIC", {HIP_6020, HIP_0, HIP_0, }}, + {"HIPRAND_ORDERING_QUASI_DEFAULT", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStateScrambledSobol32", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStateScrambledSobol32_t", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStateScrambledSobol64", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStateScrambledSobol64_t", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStateSobol64", {HIP_6020, HIP_0, HIP_0, }}, + {"hiprandStateSobol64_t", {HIP_6020, HIP_0, HIP_0, }}, - {"rocrand_status", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_SUCCESS", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_VERSION_MISMATCH", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_NOT_CREATED", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_ALLOCATION_FAILED", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_TYPE_ERROR", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_OUT_OF_RANGE", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_LENGTH_NOT_MULTIPLE", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_DOUBLE_PRECISION_REQUIRED", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_LAUNCH_FAILURE", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_STATUS_INTERNAL_ERROR", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_rng_type", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_PSEUDO_DEFAULT", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_PSEUDO_XORWOW", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_PSEUDO_MRG32K3A", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_PSEUDO_MTGP32", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_PSEUDO_MT19937", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_PSEUDO_PHILOX4_32_10", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_QUASI_DEFAULT", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_QUASI_SOBOL32", {HIP_1050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32", {HIP_5040, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_QUASI_SOBOL64", {HIP_4050, HIP_0, HIP_0 }}, - {"ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64", {HIP_5040, HIP_0, HIP_0 }}, - {"rocrand_ordering", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_ORDERING_PSEUDO_BEST", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_ORDERING_PSEUDO_DEFAULT", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_ORDERING_PSEUDO_SEEDED", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_ORDERING_PSEUDO_LEGACY", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_ORDERING_PSEUDO_DYNAMIC", {HIP_5050, HIP_0, HIP_0 }}, - {"ROCRAND_ORDERING_QUASI_DEFAULT", {HIP_5050, HIP_0, HIP_0 }}, - {"rocrand_direction_vector_set", {HIP_6000, HIP_0, HIP_0 }}, - {"ROCRAND_DIRECTION_VECTORS_32_JOEKUO6", {HIP_6000, HIP_0, HIP_0 }}, - {"ROCRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6",{HIP_6000, HIP_0, HIP_0 }}, - {"ROCRAND_DIRECTION_VECTORS_64_JOEKUO6", {HIP_6000, HIP_0, HIP_0 }}, - {"ROCRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6",{HIP_6000, HIP_0, HIP_0 }}, - {"rocrand_generator_base_type", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_generator", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_discrete_distribution_st", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_discrete_distribution", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_device::philox4x32_10_engine", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_state_philox4x32_10", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_device::mtgp32_engine", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_state_mtgp32", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_device::scrambled_sobol32_engine", {HIP_5040, HIP_0, HIP_0 }}, - {"rocrand_state_scrambled_sobol32", {HIP_5040, HIP_0, HIP_0 }}, - {"rocrand_device::scrambled_sobol64_engine", {HIP_5040, HIP_0, HIP_0 }}, - {"rocrand_state_scrambled_sobol64", {HIP_5040, HIP_0, HIP_0 }}, - {"rocrand_device::sobol32_engine", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_state_sobol32", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_device::sobol64_engine", {HIP_4050, HIP_0, HIP_0 }}, - {"rocrand_state_sobol64", {HIP_4050, HIP_0, HIP_0 }}, - {"rocrand_device::mrg32k3a_engine", {HIP_1050, HIP_0, HIP_0 }}, - {"rocrand_state_mrg32k3a", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_status", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_SUCCESS", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_VERSION_MISMATCH", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_NOT_CREATED", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_ALLOCATION_FAILED", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_TYPE_ERROR", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_OUT_OF_RANGE", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_LENGTH_NOT_MULTIPLE", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_DOUBLE_PRECISION_REQUIRED", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_LAUNCH_FAILURE", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_STATUS_INTERNAL_ERROR", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_rng_type", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_PSEUDO_DEFAULT", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_PSEUDO_XORWOW", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_PSEUDO_MRG32K3A", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_PSEUDO_MTGP32", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_PSEUDO_MT19937", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_PSEUDO_PHILOX4_32_10", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_QUASI_DEFAULT", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_QUASI_SOBOL32", {HIP_1050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32", {HIP_5040, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_QUASI_SOBOL64", {HIP_4050, HIP_0, HIP_0, }}, + {"ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64", {HIP_5040, HIP_0, HIP_0, }}, + {"rocrand_ordering", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_ORDERING_PSEUDO_BEST", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_ORDERING_PSEUDO_DEFAULT", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_ORDERING_PSEUDO_SEEDED", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_ORDERING_PSEUDO_LEGACY", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_ORDERING_PSEUDO_DYNAMIC", {HIP_5050, HIP_0, HIP_0, }}, + {"ROCRAND_ORDERING_QUASI_DEFAULT", {HIP_5050, HIP_0, HIP_0, }}, + {"rocrand_direction_vector_set", {HIP_6000, HIP_0, HIP_0, }}, + {"ROCRAND_DIRECTION_VECTORS_32_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"ROCRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"ROCRAND_DIRECTION_VECTORS_64_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"ROCRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", {HIP_6000, HIP_0, HIP_0, }}, + {"rocrand_generator_base_type", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_generator", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_discrete_distribution_st", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_discrete_distribution", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_device::philox4x32_10_engine", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_state_philox4x32_10", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_device::mtgp32_engine", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_state_mtgp32", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_device::scrambled_sobol32_engine", {HIP_5040, HIP_0, HIP_0, }}, + {"rocrand_state_scrambled_sobol32", {HIP_5040, HIP_0, HIP_0, }}, + {"rocrand_device::scrambled_sobol64_engine", {HIP_5040, HIP_0, HIP_0, }}, + {"rocrand_state_scrambled_sobol64", {HIP_5040, HIP_0, HIP_0, }}, + {"rocrand_device::sobol32_engine", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_state_sobol32", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_device::sobol64_engine", {HIP_4050, HIP_0, HIP_0, }}, + {"rocrand_state_sobol64", {HIP_4050, HIP_0, HIP_0, }}, + {"rocrand_device::mrg32k3a_engine", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_state_mrg32k3a", {HIP_1050, HIP_0, HIP_0, }}, }; From b89d3ba893186fec9b8122f953eac0ac78443df1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 18 Oct 2024 17:09:01 +0100 Subject: [PATCH 11/51] [HIPIFY][SOLVER] Sync with `CUDA 12.6.2` - Step 1 - Solver API + Updated the regenerated `hipify-perl` and `SOLVER` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 4 ++++ docs/tables/CUSOLVER_API_supported_by_HIP.md | 4 ++++ docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md | 4 ++++ docs/tables/CUSOLVER_API_supported_by_ROC.md | 4 ++++ src/CUDA2HIP_SOLVER_API_functions.cpp | 8 ++++++++ 5 files changed, 24 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index c42496c4..0c743d8c 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -9342,6 +9342,8 @@ sub warnUnsupportedFunctions { "cusolverDnXsyevdx", "cusolverDnXsyevd_bufferSize", "cusolverDnXsyevd", + "cusolverDnXsyevBatched_bufferSize", + "cusolverDnXsyevBatched", "cusolverDnXlarft_bufferSize", "cusolverDnXlarft", "cusolverDnXgesvdr_bufferSize", @@ -9350,6 +9352,8 @@ sub warnUnsupportedFunctions { "cusolverDnXgesvdp", "cusolverDnXgesvd_bufferSize", "cusolverDnXgesvd", + "cusolverDnXgeev_bufferSize", + "cusolverDnXgeev", "cusolverDnSyevdx_bufferSize", "cusolverDnSyevdx", "cusolverDnSyevd_bufferSize", diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP.md b/docs/tables/CUSOLVER_API_supported_by_HIP.md index d5f054e0..c2535454 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP.md @@ -426,6 +426,8 @@ |`cusolverDnSyevd_bufferSize`|11.0|11.1| | | | | | | | | |`cusolverDnSyevdx`|11.0|11.1| | | | | | | | | |`cusolverDnSyevdx_bufferSize`|11.0|11.1| | | | | | | | | +|`cusolverDnXgeev`|12.6| | | | | | | | | | +|`cusolverDnXgeev_bufferSize`|12.6| | | | | | | | | | |`cusolverDnXgeqrf`|11.1| | | |`hipsolverDnXgeqrf`|6.3.0| | | |6.3.0| |`cusolverDnXgeqrf_bufferSize`|11.1| | | |`hipsolverDnXgeqrf_bufferSize`|6.3.0| | | |6.3.0| |`cusolverDnXgesvd`|11.1| | | | | | | | | | @@ -447,6 +449,8 @@ |`cusolverDnXpotrf`|11.1| | | |`hipsolverDnXpotrf`|6.3.0| | | |6.3.0| |`cusolverDnXpotrf_bufferSize`|11.1| | | |`hipsolverDnXpotrf_bufferSize`|6.3.0| | | |6.3.0| |`cusolverDnXpotrs`|11.1| | | |`hipsolverDnXpotrs`|6.3.0| | | |6.3.0| +|`cusolverDnXsyevBatched`|12.6| | | | | | | | | | +|`cusolverDnXsyevBatched_bufferSize`|12.6| | | | | | | | | | |`cusolverDnXsyevd`|11.1| | | | | | | | | | |`cusolverDnXsyevd_bufferSize`|11.1| | | | | | | | | | |`cusolverDnXsyevdx`|11.1| | | | | | | | | | diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md index 44ca42b8..44259ab8 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md @@ -426,6 +426,8 @@ |`cusolverDnSyevd_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnSyevdx`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnSyevdx_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | +|`cusolverDnXgeev`|12.6| | | | | | | | | | | | | | | | +|`cusolverDnXgeev_bufferSize`|12.6| | | | | | | | | | | | | | | | |`cusolverDnXgeqrf`|11.1| | | |`hipsolverDnXgeqrf`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnXgeqrf_bufferSize`|11.1| | | |`hipsolverDnXgeqrf_bufferSize`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnXgesvd`|11.1| | | | | | | | | | | | | | | | @@ -447,6 +449,8 @@ |`cusolverDnXpotrf`|11.1| | | |`hipsolverDnXpotrf`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnXpotrf_bufferSize`|11.1| | | |`hipsolverDnXpotrf_bufferSize`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnXpotrs`|11.1| | | |`hipsolverDnXpotrs`|6.3.0| | | |6.3.0| | | | | | | +|`cusolverDnXsyevBatched`|12.6| | | | | | | | | | | | | | | | +|`cusolverDnXsyevBatched_bufferSize`|12.6| | | | | | | | | | | | | | | | |`cusolverDnXsyevd`|11.1| | | | | | | | | | | | | | | | |`cusolverDnXsyevd_bufferSize`|11.1| | | | | | | | | | | | | | | | |`cusolverDnXsyevdx`|11.1| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUSOLVER_API_supported_by_ROC.md b/docs/tables/CUSOLVER_API_supported_by_ROC.md index f27b6b21..b7956085 100644 --- a/docs/tables/CUSOLVER_API_supported_by_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_ROC.md @@ -426,6 +426,8 @@ |`cusolverDnSyevd_bufferSize`|11.0|11.1| | | | | | | | | |`cusolverDnSyevdx`|11.0|11.1| | | | | | | | | |`cusolverDnSyevdx_bufferSize`|11.0|11.1| | | | | | | | | +|`cusolverDnXgeev`|12.6| | | | | | | | | | +|`cusolverDnXgeev_bufferSize`|12.6| | | | | | | | | | |`cusolverDnXgeqrf`|11.1| | | | | | | | | | |`cusolverDnXgeqrf_bufferSize`|11.1| | | | | | | | | | |`cusolverDnXgesvd`|11.1| | | | | | | | | | @@ -447,6 +449,8 @@ |`cusolverDnXpotrf`|11.1| | | | | | | | | | |`cusolverDnXpotrf_bufferSize`|11.1| | | | | | | | | | |`cusolverDnXpotrs`|11.1| | | | | | | | | | +|`cusolverDnXsyevBatched`|12.6| | | | | | | | | | +|`cusolverDnXsyevBatched_bufferSize`|12.6| | | | | | | | | | |`cusolverDnXsyevd`|11.1| | | | | | | | | | |`cusolverDnXsyevd_bufferSize`|11.1| | | | | | | | | | |`cusolverDnXsyevdx`|11.1| | | | | | | | | | diff --git a/src/CUDA2HIP_SOLVER_API_functions.cpp b/src/CUDA2HIP_SOLVER_API_functions.cpp index 0eaf1c92..e24e6484 100644 --- a/src/CUDA2HIP_SOLVER_API_functions.cpp +++ b/src/CUDA2HIP_SOLVER_API_functions.cpp @@ -457,11 +457,15 @@ const std::map CUDA_SOLVER_FUNCTION_MAP { {"cusolverDnXpotrs", {"hipsolverDnXpotrs", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnXgeqrf_bufferSize", {"hipsolverDnXgeqrf_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnXgeqrf", {"hipsolverDnXgeqrf", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnXsyevBatched", {"hipsolverDnXsyevBatched", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnXsyevBatched_bufferSize", {"hipsolverDnXsyevBatched_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXsyevd_bufferSize", {"hipsolverDnXsyevd_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXsyevd", {"hipsolverDnXsyevd", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXsyevdx_bufferSize", {"hipsolverDnXsyevdx_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXsyevdx", {"hipsolverDnXsyevdx", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXgesvd_bufferSize", {"hipsolverDnXgesvd_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnXgeev", {"hipsolverDnXgeev", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnXgeev_bufferSize", {"hipsolverDnXgeev_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXgesvd", {"hipsolverDnXgesvd", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXgesvdp_bufferSize", {"hipsolverDnXgesvdp_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXgesvdp", {"hipsolverDnXgesvdp", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, @@ -1161,6 +1165,10 @@ const std::map CUDA_SOLVER_FUNCTION_VER_MAP { {"cusolverSpZcsrcholDiag", {CUDA_101, CUDA_0, CUDA_0 }}, // CUSOLVER_VERSION 10200 {"cusolverDnXlarft", {CUDA_124, CUDA_0, CUDA_0 }}, {"cusolverDnXlarft_bufferSize", {CUDA_124, CUDA_0, CUDA_0 }}, + {"cusolverDnXgeev", {CUDA_126, CUDA_0, CUDA_0 }}, // CUSOLVER_VERSION 11701 + {"cusolverDnXgeev_bufferSize", {CUDA_126, CUDA_0, CUDA_0 }}, // CUSOLVER_VERSION 11701 + {"cusolverDnXsyevBatched", {CUDA_126, CUDA_0, CUDA_0 }}, // CUSOLVER_VERSION 11701 + {"cusolverDnXsyevBatched_bufferSize", {CUDA_126, CUDA_0, CUDA_0 }}, // CUSOLVER_VERSION 11701 }; const std::map HIP_SOLVER_FUNCTION_VER_MAP { From 1f16423f341ab90530cd60b7e3d32a901000c189 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 18 Oct 2024 17:34:21 +0100 Subject: [PATCH 12/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 2 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 64 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 32 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 32 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 32 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 64 +++++++++++++++++++ 5 files changed, 224 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 0c743d8c..a5d3d574 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12510,9 +12510,20 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_96x64", "CUBLASLT_MATMUL_TILE_96x128", "CUBLASLT_MATMUL_TILE_8x8", + "CUBLASLT_MATMUL_TILE_8x768", + "CUBLASLT_MATMUL_TILE_8x704", + "CUBLASLT_MATMUL_TILE_8x640", "CUBLASLT_MATMUL_TILE_8x64", + "CUBLASLT_MATMUL_TILE_8x576", + "CUBLASLT_MATMUL_TILE_8x512", + "CUBLASLT_MATMUL_TILE_8x448", + "CUBLASLT_MATMUL_TILE_8x384", + "CUBLASLT_MATMUL_TILE_8x320", "CUBLASLT_MATMUL_TILE_8x32", + "CUBLASLT_MATMUL_TILE_8x256", + "CUBLASLT_MATMUL_TILE_8x192", "CUBLASLT_MATMUL_TILE_8x16", + "CUBLASLT_MATMUL_TILE_8x128", "CUBLASLT_MATMUL_TILE_64x96", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x64", @@ -12530,10 +12541,31 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_256x64", "CUBLASLT_MATMUL_TILE_256x32", "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_24x64", + "CUBLASLT_MATMUL_TILE_24x576", + "CUBLASLT_MATMUL_TILE_24x512", + "CUBLASLT_MATMUL_TILE_24x448", + "CUBLASLT_MATMUL_TILE_24x384", + "CUBLASLT_MATMUL_TILE_24x320", + "CUBLASLT_MATMUL_TILE_24x256", + "CUBLASLT_MATMUL_TILE_24x192", + "CUBLASLT_MATMUL_TILE_24x128", "CUBLASLT_MATMUL_TILE_192x128", "CUBLASLT_MATMUL_TILE_16x8", + "CUBLASLT_MATMUL_TILE_16x768", + "CUBLASLT_MATMUL_TILE_16x704", + "CUBLASLT_MATMUL_TILE_16x640", + "CUBLASLT_MATMUL_TILE_16x64", + "CUBLASLT_MATMUL_TILE_16x576", + "CUBLASLT_MATMUL_TILE_16x512", + "CUBLASLT_MATMUL_TILE_16x448", + "CUBLASLT_MATMUL_TILE_16x384", + "CUBLASLT_MATMUL_TILE_16x320", "CUBLASLT_MATMUL_TILE_16x32", + "CUBLASLT_MATMUL_TILE_16x256", + "CUBLASLT_MATMUL_TILE_16x192", "CUBLASLT_MATMUL_TILE_16x16", + "CUBLASLT_MATMUL_TILE_16x128", "CUBLASLT_MATMUL_TILE_160x128", "CUBLASLT_MATMUL_TILE_128x96", "CUBLASLT_MATMUL_TILE_128x64", @@ -13965,9 +13997,20 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_96x64", "CUBLASLT_MATMUL_TILE_96x128", "CUBLASLT_MATMUL_TILE_8x8", + "CUBLASLT_MATMUL_TILE_8x768", + "CUBLASLT_MATMUL_TILE_8x704", + "CUBLASLT_MATMUL_TILE_8x640", "CUBLASLT_MATMUL_TILE_8x64", + "CUBLASLT_MATMUL_TILE_8x576", + "CUBLASLT_MATMUL_TILE_8x512", + "CUBLASLT_MATMUL_TILE_8x448", + "CUBLASLT_MATMUL_TILE_8x384", + "CUBLASLT_MATMUL_TILE_8x320", "CUBLASLT_MATMUL_TILE_8x32", + "CUBLASLT_MATMUL_TILE_8x256", + "CUBLASLT_MATMUL_TILE_8x192", "CUBLASLT_MATMUL_TILE_8x16", + "CUBLASLT_MATMUL_TILE_8x128", "CUBLASLT_MATMUL_TILE_64x96", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x64", @@ -13985,10 +14028,31 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_256x64", "CUBLASLT_MATMUL_TILE_256x32", "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_24x64", + "CUBLASLT_MATMUL_TILE_24x576", + "CUBLASLT_MATMUL_TILE_24x512", + "CUBLASLT_MATMUL_TILE_24x448", + "CUBLASLT_MATMUL_TILE_24x384", + "CUBLASLT_MATMUL_TILE_24x320", + "CUBLASLT_MATMUL_TILE_24x256", + "CUBLASLT_MATMUL_TILE_24x192", + "CUBLASLT_MATMUL_TILE_24x128", "CUBLASLT_MATMUL_TILE_192x128", "CUBLASLT_MATMUL_TILE_16x8", + "CUBLASLT_MATMUL_TILE_16x768", + "CUBLASLT_MATMUL_TILE_16x704", + "CUBLASLT_MATMUL_TILE_16x640", + "CUBLASLT_MATMUL_TILE_16x64", + "CUBLASLT_MATMUL_TILE_16x576", + "CUBLASLT_MATMUL_TILE_16x512", + "CUBLASLT_MATMUL_TILE_16x448", + "CUBLASLT_MATMUL_TILE_16x384", + "CUBLASLT_MATMUL_TILE_16x320", "CUBLASLT_MATMUL_TILE_16x32", + "CUBLASLT_MATMUL_TILE_16x256", + "CUBLASLT_MATMUL_TILE_16x192", "CUBLASLT_MATMUL_TILE_16x16", + "CUBLASLT_MATMUL_TILE_16x128", "CUBLASLT_MATMUL_TILE_160x128", "CUBLASLT_MATMUL_TILE_128x96", "CUBLASLT_MATMUL_TILE_128x64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 8a6cc80e..0f3b9ad5 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -330,10 +330,31 @@ |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | @@ -351,9 +372,20 @@ |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x128`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x64`|11.3| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index eacf573f..b6636f5c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -330,10 +330,31 @@ |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x256`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x32`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | | | | | | | @@ -351,9 +372,20 @@ |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x256`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x32`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x128`|11.3| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x64`|11.3| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 20cbb3db..7584c23e 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -330,10 +330,31 @@ |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_16x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | @@ -351,9 +372,20 @@ |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_8x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x128`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x64`|11.3| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 78321ce6..4eb7b5cf 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -240,6 +240,38 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_128x96", {"HIPBLASLT_MATMUL_TILE_128x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_32x256", {"HIPBLASLT_MATMUL_TILE_32x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_256x32", {"HIPBLASLT_MATMUL_TILE_256x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x128", {"HIPBLASLT_MATMUL_TILE_8x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x192", {"HIPBLASLT_MATMUL_TILE_8x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x256", {"HIPBLASLT_MATMUL_TILE_8x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x320", {"HIPBLASLT_MATMUL_TILE_8x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x384", {"HIPBLASLT_MATMUL_TILE_8x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x448", {"HIPBLASLT_MATMUL_TILE_8x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x512", {"HIPBLASLT_MATMUL_TILE_8x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x576", {"HIPBLASLT_MATMUL_TILE_8x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x640", {"HIPBLASLT_MATMUL_TILE_8x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x704", {"HIPBLASLT_MATMUL_TILE_8x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_8x768", {"HIPBLASLT_MATMUL_TILE_8x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x64", {"HIPBLASLT_MATMUL_TILE_16x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x128", {"HIPBLASLT_MATMUL_TILE_16x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x192", {"HIPBLASLT_MATMUL_TILE_16x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x256", {"HIPBLASLT_MATMUL_TILE_16x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x320", {"HIPBLASLT_MATMUL_TILE_16x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x384", {"HIPBLASLT_MATMUL_TILE_16x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x448", {"HIPBLASLT_MATMUL_TILE_16x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x512", {"HIPBLASLT_MATMUL_TILE_16x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x576", {"HIPBLASLT_MATMUL_TILE_16x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x640", {"HIPBLASLT_MATMUL_TILE_16x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x704", {"HIPBLASLT_MATMUL_TILE_16x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_16x768", {"HIPBLASLT_MATMUL_TILE_16x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x64", {"HIPBLASLT_MATMUL_TILE_24x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x128", {"HIPBLASLT_MATMUL_TILE_24x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x192", {"HIPBLASLT_MATMUL_TILE_24x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x256", {"HIPBLASLT_MATMUL_TILE_24x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x320", {"HIPBLASLT_MATMUL_TILE_24x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x384", {"HIPBLASLT_MATMUL_TILE_24x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x448", {"HIPBLASLT_MATMUL_TILE_24x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x512", {"HIPBLASLT_MATMUL_TILE_24x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x576", {"HIPBLASLT_MATMUL_TILE_24x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -897,6 +929,38 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID", {CUDA_118, CUDA_0, CUDA_0 }}, {"CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID", {CUDA_118, CUDA_0, CUDA_0 }}, {"cublasLtLoggerCallback_t", {CUDA_110, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11003, CUBLAS_VERSION 11200, CUBLAS_VER_MAJOR 11 CUBLAS_VER_MINOR 2 + {"CUBLASLT_MATMUL_TILE_8x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_8x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_16x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 4ddc640614311be7932e3b6588172d3455836fe9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:45:41 +0000 Subject: [PATCH 13/51] Bump rocm-docs-core from 1.8.2 to 1.8.3 in /docs/sphinx Bumps [rocm-docs-core](https://github.com/ROCm/rocm-docs-core) from 1.8.2 to 1.8.3. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.8.2...v1.8.3) --- updated-dependencies: - dependency-name: rocm-docs-core dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 1cf51d3d..7576b679 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core==1.8.2 +rocm-docs-core==1.8.3 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 45b4789f..1abfa146 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.32.2 # via # pygithub # sphinx -rocm-docs-core==1.8.2 +rocm-docs-core==1.8.3 # via -r requirements.in smmap==5.0.1 # via gitdb From a5b02ee1d32f2a025b696a64d3cd9479e7f407cc Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 21 Oct 2024 18:55:29 +0100 Subject: [PATCH 14/51] [HIPIFY][doc] `LLVM 19.1.2` is the latest supported LLVM release + No patches are needed + Updated the `README.md` accordingly + `hipify-clang` built with `LLVM 19.1.2` works correctly with the latest `CUDA 12.6.1`, even though clang may report that `CUDA 12.6.1` is not fully supported + Tested on `Windows 11` (`VS 2019` and `VS 2022`) and `Ubuntu 23.10` --- docs/hipify-clang.rst | 63 ++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index dec00c4d..cc3fd808 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -37,7 +37,7 @@ Dependencies * `LLVM+Clang `_ of at least version `4.0.0 `_; the latest stable and recommended release: - `19.1.1 `_. + `19.1.2 `_. * `CUDA `_ of at least version `7.0 `_, the latest supported version is @@ -185,7 +185,8 @@ Dependencies - ✅ - ✅ * - `19.1.0 `_, - `19.1.1 `_:sup:`4` + `19.1.1 `_, + `19.1.2 `_:sup:`4` - `12.6.1 `_:sup:`4` - **Latest stable config** - **Latest stable config** @@ -228,7 +229,7 @@ Dependencies In most cases, you can get a suitable version of ``LLVM+Clang`` with your package manager. However, you can also `download a release archive `_ and build or install it. In case of multiple versions of ``LLVM`` installed, set `CMAKE_PREFIX_PATH `_ so that -``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\19.1.1\dist``. +``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\19.1.2\dist``. Usage ============================================================ @@ -261,7 +262,7 @@ header files used during the hipification process: .. code:: shell - ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.6 --clang-resource-directory=/usr/llvm/19.1.1/dist/lib/clang/19 + ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.6 --clang-resource-directory=/usr/llvm/19.1.2/dist/lib/clang/19 For more information, refer to the `Clang manual for compiling CUDA `_. @@ -398,7 +399,7 @@ To ensure LLVM being found or in case of multiple LLVM instances, specify the pa .. code-block:: bash - -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.1/dist + -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.2/dist On Windows, specify the following option for CMake in the first place: ``-G "Visual Studio 17 2022"``. @@ -472,7 +473,7 @@ LLVM <= 9.0.1 LLVM >= 10.0.0 ----------------- -1. Download `LLVM project `_ sources. +1. Download `LLVM project `_ sources. 2. Build `LLVM project `_: @@ -575,13 +576,13 @@ LLVM >= 10.0.0 .. code-block:: bash - python /usr/llvm/19.1.1/llvm-project/llvm/utils/lit/setup.py install + python /usr/llvm/19.1.2/llvm-project/llvm/utils/lit/setup.py install **Windows**: .. code-block:: shell - python D:/LLVM/19.1.1/llvm-project/llvm/utils/lit/setup.py install + python D:/LLVM/19.1.2/llvm-project/llvm/utils/lit/setup.py install In case of errors similar to ``ModuleNotFoundError: No module named 'setuptools'``, upgrade the ``setuptools`` package: @@ -595,23 +596,23 @@ LLVM >= 10.0.0 .. code-block:: bash - -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.1/build/bin/llvm-lit + -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.2/build/bin/llvm-lit **Windows**: .. code-block:: shell - -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.1/build/Release/bin/llvm-lit.py + -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.2/build/Release/bin/llvm-lit.py * ``FileCheck``: **Linux**: - Copy from ``/usr/llvm/19.1.1/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``/usr/llvm/19.1.2/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. **Windows**: - Copy from ``D:/LLVM/19.1.1/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``D:/LLVM/19.1.2/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. Alternatively, specify the path to ``FileCheck`` in the ``CMAKE_INSTALL_PREFIX`` option. @@ -638,8 +639,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 19.1.1, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.0 -* Ubuntu 22-23: LLVM 13.0.0 - 19.1.1, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.0 +* Ubuntu 20-21: LLVM 9.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.0 +* Ubuntu 22-23: LLVM 13.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.0 Minimum build system requirements for the above configurations: @@ -657,10 +658,10 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.1/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.6.1 \ -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.5.0 \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.1/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.2/build/bin/llvm-lit \ ../hipify The corresponding successful output is: @@ -684,11 +685,11 @@ The corresponding successful output is: -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.13") - -- Found LLVM 19.1.1: - -- - CMake module path : /usr/llvm/19.1.1/dist/lib/cmake/llvm - -- - Clang include path : /usr/llvm/19.1.1/dist/include - -- - LLVM Include path : /usr/llvm/19.1.1/dist/include - -- - Binary path : /usr/llvm/19.1.1/dist/bin + -- Found LLVM 19.1.2: + -- - CMake module path : /usr/llvm/19.1.2/dist/lib/cmake/llvm + -- - Clang include path : /usr/llvm/19.1.2/dist/include + -- - LLVM Include path : /usr/llvm/19.1.2/dist/include + -- - Binary path : /usr/llvm/19.1.2/dist/bin -- Linker detection: GNU ld -- ---- The below configuring for hipify-clang testing only ---- -- Found Python: /usr/bin/python3.12 (found version "3.12.7") found components: Interpreter @@ -723,7 +724,7 @@ The corresponding successful output is: Running HIPify regression tests =============================================================== CUDA 12.6.68 - will be used for testing - LLVM 19.1.1 - will be used for testing + LLVM 19.1.2 - will be used for testing x86_64 - Platform architecture Linux 6.5.0-15-generic - Platform OS 64 - hipify-clang binary bitness @@ -823,7 +824,7 @@ Tested configurations: - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` - ``3.12.7`` - * - ``19.1.1`` + * - ``19.1.0 - 19.1.2`` - ``7.0 - 12.6.1`` - ``8.0.5 - 9.5.0`` - ``2019.16.11.40, 2022.17.11.4`` @@ -853,11 +854,11 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.1/dist \ + -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5" \ -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.0 \ - -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.1/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.2/build/Release/bin/llvm-lit.py \ ../hipify The corresponding successful output is: @@ -881,15 +882,15 @@ The corresponding successful output is: -- - Build hipify-clang : ON -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF - -- Found LLVM 19.1.1: - -- - CMake module path : D:/LLVM/19.1.1/dist/lib/cmake/llvm - -- - Clang include path : D:/LLVM/19.1.1/dist/include - -- - LLVM Include path : D:/LLVM/19.1.1/dist/include - -- - Binary path : D:/LLVM/19.1.1/dist/bin + -- Found LLVM 19.1.2: + -- - CMake module path : D:/LLVM/19.1.2/dist/lib/cmake/llvm + -- - Clang include path : D:/LLVM/19.1.2/dist/include + -- - LLVM Include path : D:/LLVM/19.1.2/dist/include + -- - Binary path : D:/LLVM/19.1.2/dist/bin -- ---- The below configuring for hipify-clang testing only ---- -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.7") found components: Interpreter -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python312/Scripts/lit.exe - -- Found FileCheck: D:/LLVM/19.1.1/dist/bin/FileCheck.exe + -- Found FileCheck: D:/LLVM/19.1.2/dist/bin/FileCheck.exe -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5 From 5e77c869fe3ac0c133ff80b0f768c672dd93c245 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 22 Oct 2024 14:05:36 +0100 Subject: [PATCH 15/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 3 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 72 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 36 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 36 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 36 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 72 +++++++++++++++++++ 5 files changed, 252 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index a5d3d574..36e0be46 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12532,15 +12532,51 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x256", "CUBLASLT_MATMUL_TILE_64x128", "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_48x768", + "CUBLASLT_MATMUL_TILE_48x704", + "CUBLASLT_MATMUL_TILE_48x640", + "CUBLASLT_MATMUL_TILE_48x64", + "CUBLASLT_MATMUL_TILE_48x576", + "CUBLASLT_MATMUL_TILE_48x512", + "CUBLASLT_MATMUL_TILE_48x448", + "CUBLASLT_MATMUL_TILE_48x384", + "CUBLASLT_MATMUL_TILE_48x320", + "CUBLASLT_MATMUL_TILE_48x256", + "CUBLASLT_MATMUL_TILE_48x192", + "CUBLASLT_MATMUL_TILE_48x128", + "CUBLASLT_MATMUL_TILE_40x768", + "CUBLASLT_MATMUL_TILE_40x704", + "CUBLASLT_MATMUL_TILE_40x640", + "CUBLASLT_MATMUL_TILE_40x64", + "CUBLASLT_MATMUL_TILE_40x576", + "CUBLASLT_MATMUL_TILE_40x512", + "CUBLASLT_MATMUL_TILE_40x448", + "CUBLASLT_MATMUL_TILE_40x384", + "CUBLASLT_MATMUL_TILE_40x320", + "CUBLASLT_MATMUL_TILE_40x256", + "CUBLASLT_MATMUL_TILE_40x192", + "CUBLASLT_MATMUL_TILE_40x128", "CUBLASLT_MATMUL_TILE_32x8", + "CUBLASLT_MATMUL_TILE_32x768", + "CUBLASLT_MATMUL_TILE_32x704", + "CUBLASLT_MATMUL_TILE_32x640", "CUBLASLT_MATMUL_TILE_32x64", + "CUBLASLT_MATMUL_TILE_32x576", + "CUBLASLT_MATMUL_TILE_32x512", + "CUBLASLT_MATMUL_TILE_32x448", + "CUBLASLT_MATMUL_TILE_32x384", + "CUBLASLT_MATMUL_TILE_32x320", "CUBLASLT_MATMUL_TILE_32x32", "CUBLASLT_MATMUL_TILE_32x256", + "CUBLASLT_MATMUL_TILE_32x192", "CUBLASLT_MATMUL_TILE_32x16", "CUBLASLT_MATMUL_TILE_32x128", "CUBLASLT_MATMUL_TILE_256x64", "CUBLASLT_MATMUL_TILE_256x32", "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_24x768", + "CUBLASLT_MATMUL_TILE_24x704", + "CUBLASLT_MATMUL_TILE_24x640", "CUBLASLT_MATMUL_TILE_24x64", "CUBLASLT_MATMUL_TILE_24x576", "CUBLASLT_MATMUL_TILE_24x512", @@ -14019,15 +14055,51 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x256", "CUBLASLT_MATMUL_TILE_64x128", "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_48x768", + "CUBLASLT_MATMUL_TILE_48x704", + "CUBLASLT_MATMUL_TILE_48x640", + "CUBLASLT_MATMUL_TILE_48x64", + "CUBLASLT_MATMUL_TILE_48x576", + "CUBLASLT_MATMUL_TILE_48x512", + "CUBLASLT_MATMUL_TILE_48x448", + "CUBLASLT_MATMUL_TILE_48x384", + "CUBLASLT_MATMUL_TILE_48x320", + "CUBLASLT_MATMUL_TILE_48x256", + "CUBLASLT_MATMUL_TILE_48x192", + "CUBLASLT_MATMUL_TILE_48x128", + "CUBLASLT_MATMUL_TILE_40x768", + "CUBLASLT_MATMUL_TILE_40x704", + "CUBLASLT_MATMUL_TILE_40x640", + "CUBLASLT_MATMUL_TILE_40x64", + "CUBLASLT_MATMUL_TILE_40x576", + "CUBLASLT_MATMUL_TILE_40x512", + "CUBLASLT_MATMUL_TILE_40x448", + "CUBLASLT_MATMUL_TILE_40x384", + "CUBLASLT_MATMUL_TILE_40x320", + "CUBLASLT_MATMUL_TILE_40x256", + "CUBLASLT_MATMUL_TILE_40x192", + "CUBLASLT_MATMUL_TILE_40x128", "CUBLASLT_MATMUL_TILE_32x8", + "CUBLASLT_MATMUL_TILE_32x768", + "CUBLASLT_MATMUL_TILE_32x704", + "CUBLASLT_MATMUL_TILE_32x640", "CUBLASLT_MATMUL_TILE_32x64", + "CUBLASLT_MATMUL_TILE_32x576", + "CUBLASLT_MATMUL_TILE_32x512", + "CUBLASLT_MATMUL_TILE_32x448", + "CUBLASLT_MATMUL_TILE_32x384", + "CUBLASLT_MATMUL_TILE_32x320", "CUBLASLT_MATMUL_TILE_32x32", "CUBLASLT_MATMUL_TILE_32x256", + "CUBLASLT_MATMUL_TILE_32x192", "CUBLASLT_MATMUL_TILE_32x16", "CUBLASLT_MATMUL_TILE_32x128", "CUBLASLT_MATMUL_TILE_256x64", "CUBLASLT_MATMUL_TILE_256x32", "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_24x768", + "CUBLASLT_MATMUL_TILE_24x704", + "CUBLASLT_MATMUL_TILE_24x640", "CUBLASLT_MATMUL_TILE_24x64", "CUBLASLT_MATMUL_TILE_24x576", "CUBLASLT_MATMUL_TILE_24x512", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 0f3b9ad5..dee598fd 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -355,15 +355,51 @@ |`CUBLASLT_MATMUL_TILE_24x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x16`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x256`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index b6636f5c..8b692547 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -355,15 +355,51 @@ |`CUBLASLT_MATMUL_TILE_24x512`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x16`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x256`|12.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x32`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x8`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x768`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 7584c23e..f0a40e7c 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -355,15 +355,51 @@ |`CUBLASLT_MATMUL_TILE_24x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x16`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x256`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_32x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_40x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 4eb7b5cf..23c90533 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -272,6 +272,42 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_24x448", {"HIPBLASLT_MATMUL_TILE_24x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_24x512", {"HIPBLASLT_MATMUL_TILE_24x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_24x576", {"HIPBLASLT_MATMUL_TILE_24x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x640", {"HIPBLASLT_MATMUL_TILE_24x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x704", {"HIPBLASLT_MATMUL_TILE_24x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_24x768", {"HIPBLASLT_MATMUL_TILE_24x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x192", {"HIPBLASLT_MATMUL_TILE_32x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x320", {"HIPBLASLT_MATMUL_TILE_32x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x384", {"HIPBLASLT_MATMUL_TILE_32x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x448", {"HIPBLASLT_MATMUL_TILE_32x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x512", {"HIPBLASLT_MATMUL_TILE_32x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x576", {"HIPBLASLT_MATMUL_TILE_32x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x640", {"HIPBLASLT_MATMUL_TILE_32x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x704", {"HIPBLASLT_MATMUL_TILE_32x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_32x768", {"HIPBLASLT_MATMUL_TILE_32x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x64", {"HIPBLASLT_MATMUL_TILE_40x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x128", {"HIPBLASLT_MATMUL_TILE_40x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x192", {"HIPBLASLT_MATMUL_TILE_40x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x256", {"HIPBLASLT_MATMUL_TILE_40x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x320", {"HIPBLASLT_MATMUL_TILE_40x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x384", {"HIPBLASLT_MATMUL_TILE_40x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x448", {"HIPBLASLT_MATMUL_TILE_40x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x512", {"HIPBLASLT_MATMUL_TILE_40x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x576", {"HIPBLASLT_MATMUL_TILE_40x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x640", {"HIPBLASLT_MATMUL_TILE_40x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x704", {"HIPBLASLT_MATMUL_TILE_40x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_40x768", {"HIPBLASLT_MATMUL_TILE_40x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x64", {"HIPBLASLT_MATMUL_TILE_48x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x128", {"HIPBLASLT_MATMUL_TILE_48x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x192", {"HIPBLASLT_MATMUL_TILE_48x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x256", {"HIPBLASLT_MATMUL_TILE_48x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x320", {"HIPBLASLT_MATMUL_TILE_48x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x384", {"HIPBLASLT_MATMUL_TILE_48x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x448", {"HIPBLASLT_MATMUL_TILE_48x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x512", {"HIPBLASLT_MATMUL_TILE_48x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x576", {"HIPBLASLT_MATMUL_TILE_48x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x640", {"HIPBLASLT_MATMUL_TILE_48x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x704", {"HIPBLASLT_MATMUL_TILE_48x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_48x768", {"HIPBLASLT_MATMUL_TILE_48x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -961,6 +997,42 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_24x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_24x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_24x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_24x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_32x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_40x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_48x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From de363e507cffd19d1c63f13c777cfb55276ecaf7 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 22 Oct 2024 18:57:40 +0100 Subject: [PATCH 16/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 3 + `rocblas_(s|d|c|z|h)gemm_strided_batched_64` and `hipblas(S|D|C|Z|H)gemmStridedBatched_(v2_)?64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 25 +++++++++++-------- docs/tables/CUBLAS_API_supported_by_HIP.md | 10 ++++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 10 ++++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 10 ++++---- src/CUDA2HIP_BLAS_API_functions.cpp | 20 +++++++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 25 +++++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 25 +++++++++++++++++++ 7 files changed, 95 insertions(+), 30 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 36e0be46..ab714494 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1402,6 +1402,11 @@ my %experimental_funcs = ( "cudaGraphNodeSetParams" => "6.3.0", "cudaGraphExecNodeSetParams" => "6.3.0", "cudaGraphExecGetFlags" => "6.3.0", + "cublasZgemmStridedBatched_64" => "6.3.0", + "cublasSgemmStridedBatched_64" => "6.3.0", + "cublasHgemmStridedBatched_64" => "6.3.0", + "cublasDgemmStridedBatched_64" => "6.3.0", + "cublasCgemmStridedBatched_64" => "6.3.0", "cuGraphNodeSetParams" => "6.3.0", "cuGraphMemcpyNodeSetParams" => "6.3.0", "cuGraphMemcpyNodeGetParams" => "6.3.0", @@ -1558,6 +1563,11 @@ sub experimentalSubstitutions { subst("cudaGraphExecGetFlags", "hipGraphExecGetFlags", "graph"); subst("cudaGraphExecNodeSetParams", "hipGraphExecNodeSetParams", "graph"); subst("cudaGraphNodeSetParams", "hipGraphNodeSetParams", "graph"); + subst("cublasCgemmStridedBatched_64", "hipblasCgemmStridedBatched_v2_64", "library"); + subst("cublasDgemmStridedBatched_64", "hipblasDgemmStridedBatched_64", "library"); + subst("cublasHgemmStridedBatched_64", "hipblasHgemmStridedBatched_64", "library"); + subst("cublasSgemmStridedBatched_64", "hipblasSgemmStridedBatched_64", "library"); + subst("cublasZgemmStridedBatched_64", "hipblasZgemmStridedBatched_v2_64", "library"); subst("cusolverDnGetDeterministicMode", "hipsolverDnGetDeterministicMode", "library"); subst("cusolverDnSetDeterministicMode", "hipsolverDnSetDeterministicMode", "library"); subst("cusolverDnXgeqrf", "hipsolverDnXgeqrf", "library"); @@ -1596,6 +1606,7 @@ sub rocSubstitutions { subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); subst("cublasCgemmBatched_64", "rocblas_cgemm_batched_64", "library"); subst("cublasCgemmStridedBatched", "rocblas_cgemm_strided_batched", "library"); + subst("cublasCgemmStridedBatched_64", "rocblas_cgemm_strided_batched_64", "library"); subst("cublasCgemm_64", "rocblas_cgemm_64", "library"); subst("cublasCgemm_v2", "rocblas_cgemm", "library"); subst("cublasCgemm_v2_64", "rocblas_cgemm_64", "library"); @@ -1753,6 +1764,7 @@ sub rocSubstitutions { subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); subst("cublasDgemmBatched_64", "rocblas_dgemm_batched_64", "library"); subst("cublasDgemmStridedBatched", "rocblas_dgemm_strided_batched", "library"); + subst("cublasDgemmStridedBatched_64", "rocblas_dgemm_strided_batched_64", "library"); subst("cublasDgemm_64", "rocblas_dgemm_64", "library"); subst("cublasDgemm_v2", "rocblas_dgemm", "library"); subst("cublasDgemm_v2_64", "rocblas_dgemm_64", "library"); @@ -1897,6 +1909,7 @@ sub rocSubstitutions { subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmBatched_64", "rocblas_hgemm_batched_64", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); + subst("cublasHgemmStridedBatched_64", "rocblas_hgemm_strided_batched_64", "library"); subst("cublasHgemm_64", "rocblas_hgemm_64", "library"); subst("cublasIcamax", "rocblas_icamax", "library"); subst("cublasIcamax_64", "rocblas_icamax_64", "library"); @@ -1981,6 +1994,7 @@ sub rocSubstitutions { subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); subst("cublasSgemmBatched_64", "rocblas_sgemm_batched_64", "library"); subst("cublasSgemmStridedBatched", "rocblas_sgemm_strided_batched", "library"); + subst("cublasSgemmStridedBatched_64", "rocblas_sgemm_strided_batched_64", "library"); subst("cublasSgemm_64", "rocblas_sgemm_64", "library"); subst("cublasSgemm_v2", "rocblas_sgemm", "library"); subst("cublasSgemm_v2_64", "rocblas_sgemm_64", "library"); @@ -2129,6 +2143,7 @@ sub rocSubstitutions { subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); subst("cublasZgemmBatched_64", "rocblas_zgemm_batched_64", "library"); subst("cublasZgemmStridedBatched", "rocblas_zgemm_strided_batched", "library"); + subst("cublasZgemmStridedBatched_64", "rocblas_zgemm_strided_batched_64", "library"); subst("cublasZgemm_64", "rocblas_zgemm_64", "library"); subst("cublasZgemm_v2", "rocblas_zgemm", "library"); subst("cublasZgemm_v2_64", "rocblas_zgemm_64", "library"); @@ -11543,7 +11558,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZher2k_64", "cublasZhemm_v2_64", "cublasZhemm_64", - "cublasZgemmStridedBatched_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgeam_64", @@ -11576,7 +11590,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSsymm_64", "cublasSmatinvBatched", "cublasShutdown", - "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", "cublasSgemmEx_64", @@ -11638,7 +11651,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemmStridedBatched_64", "cublasHSSgemvStridedBatched_64", "cublasHSSgemvStridedBatched", "cublasHSSgemvBatched_64", @@ -11681,7 +11693,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDsymm_v2_64", "cublasDsymm_64", "cublasDmatinvBatched", - "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgeam_64", @@ -11719,7 +11730,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCher2k_64", "cublasChemm_v2_64", "cublasChemm_64", - "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", "cublasCgemm3m_64", @@ -13302,7 +13312,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetriBatched", "cublasZgetrfBatched", "cublasZgeqrfBatched", - "cublasZgemmStridedBatched_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgelsBatched", @@ -13329,7 +13338,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgetriBatched", "cublasSgetrfBatched", "cublasSgeqrfBatched", - "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", "cublasSgemmEx_64", @@ -13418,7 +13426,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemmStridedBatched_64", "cublasGetVersion_v2", "cublasGetVersion", "cublasGetVector_64", @@ -13453,7 +13460,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgetriBatched", "cublasDgetrfBatched", "cublasDgeqrfBatched", - "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgelsBatched", @@ -13492,7 +13498,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgetriBatched", "cublasCgetrfBatched", "cublasCgeqrfBatched", - "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", "cublasCgemm3m_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index dee598fd..ee8ad63c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1095,7 +1095,7 @@ |`cublasCgemmBatched`| | | | |`hipblasCgemmBatched_v2`|6.0.0| | | | | |`cublasCgemmBatched_64`|12.0| | | |`hipblasCgemmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | | -|`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasCgemmStridedBatched_64`|12.0| | | |`hipblasCgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | | |`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| @@ -1145,7 +1145,7 @@ |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | | -|`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasDgemmStridedBatched_64`|12.0| | | |`hipblasDgemmStridedBatched_64`|6.3.0| | | |6.3.0| |`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | | |`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| @@ -1189,7 +1189,7 @@ |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | | |`cublasHgemmBatched_64`|12.0| | | |`hipblasHgemmBatched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | | -|`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasHgemmStridedBatched_64`|12.0| | | |`hipblasHgemmStridedBatched_64`|6.3.0| | | |6.3.0| |`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | | @@ -1197,7 +1197,7 @@ |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | | -|`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasSgemmStridedBatched_64`|12.0| | | |`hipblasSgemmStridedBatched_64`|6.3.0| | | |6.3.0| |`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| @@ -1241,7 +1241,7 @@ |`cublasZgemmBatched`| | | | |`hipblasZgemmBatched_v2`|6.0.0| | | | | |`cublasZgemmBatched_64`|12.0| | | |`hipblasZgemmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | | -|`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasZgemmStridedBatched_64`|12.0| | | |`hipblasZgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | | |`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 8b692547..a95fbf83 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1095,7 +1095,7 @@ |`cublasCgemmBatched`| | | | |`hipblasCgemmBatched_v2`|6.0.0| | | | |`rocblas_cgemm_batched`|3.5.0| | | | | |`cublasCgemmBatched_64`|12.0| | | |`hipblasCgemmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | -|`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemmStridedBatched_64`|12.0| | | |`hipblasCgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | |`rocblas_cgemm`|1.5.0| | | | | |`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| @@ -1145,7 +1145,7 @@ |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | -|`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemmStridedBatched_64`|12.0| | | |`hipblasDgemmStridedBatched_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| @@ -1189,7 +1189,7 @@ |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | |`rocblas_hgemm_batched`|3.5.0| | | | | |`cublasHgemmBatched_64`|12.0| | | |`hipblasHgemmBatched_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | -|`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHgemmStridedBatched_64`|12.0| | | |`hipblasHgemmStridedBatched_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | @@ -1197,7 +1197,7 @@ |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | -|`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemmStridedBatched_64`|12.0| | | |`hipblasSgemmStridedBatched_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| @@ -1241,7 +1241,7 @@ |`cublasZgemmBatched`| | | | |`hipblasZgemmBatched_v2`|6.0.0| | | | |`rocblas_zgemm_batched`|3.5.0| | | | | |`cublasZgemmBatched_64`|12.0| | | |`hipblasZgemmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | -|`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemmStridedBatched_64`|12.0| | | |`hipblasZgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index f0a40e7c..a85c584a 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1095,7 +1095,7 @@ |`cublasCgemmBatched`| | | | |`rocblas_cgemm_batched`|3.5.0| | | | | |`cublasCgemmBatched_64`|12.0| | | |`rocblas_cgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | -|`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasCgemmStridedBatched_64`|12.0| | | |`rocblas_cgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemm_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`rocblas_cgemm`|1.5.0| | | | | |`cublasCgemm_v2_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| @@ -1145,7 +1145,7 @@ |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | -|`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasDgemmStridedBatched_64`|12.0| | | |`rocblas_dgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasDgemm_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemm_v2_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| @@ -1189,7 +1189,7 @@ |`cublasHgemmBatched`|9.0| | | |`rocblas_hgemm_batched`|3.5.0| | | | | |`cublasHgemmBatched_64`|12.0| | | |`rocblas_hgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | -|`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasHgemmStridedBatched_64`|12.0| | | |`rocblas_hgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemm_64`|12.0| | | |`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | @@ -1197,7 +1197,7 @@ |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | -|`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasSgemmStridedBatched_64`|12.0| | | |`rocblas_sgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasSgemm_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemm_v2_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| @@ -1241,7 +1241,7 @@ |`cublasZgemmBatched`| | | | |`rocblas_zgemm_batched`|3.5.0| | | | | |`cublasZgemmBatched_64`|12.0| | | |`rocblas_zgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | -|`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasZgemmStridedBatched_64`|12.0| | | |`rocblas_zgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemm_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm_v2_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index a8fc6cff..568cc17c 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -421,9 +421,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasHgemmBatched", {"hipblasHgemmBatched", "rocblas_hgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasHgemmBatched_64", {"hipblasHgemmBatched_64", "rocblas_hgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasSgemmStridedBatched", {"hipblasSgemmStridedBatched", "rocblas_sgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSgemmStridedBatched_64", {"hipblasSgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemmStridedBatched_64", {"hipblasSgemmStridedBatched_64", "rocblas_sgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasDgemmStridedBatched", {"hipblasDgemmStridedBatched", "rocblas_dgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDgemmStridedBatched_64", {"hipblasDgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemmStridedBatched_64", {"hipblasDgemmStridedBatched_64", "rocblas_dgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasCgemmBatched", {"hipblasCgemmBatched_v2", "rocblas_cgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemmBatched_64", {"hipblasCgemmBatched_v2_64", "rocblas_cgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm3mBatched", {"hipblasCgemm3mBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -431,13 +431,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZgemmBatched", {"hipblasZgemmBatched_v2", "rocblas_zgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemmBatched_64", {"hipblasZgemmBatched_v2_64", "rocblas_zgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemmStridedBatched", {"hipblasCgemmStridedBatched_v2", "rocblas_cgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemmStridedBatched_64", {"hipblasCgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemmStridedBatched_64", {"hipblasCgemmStridedBatched_v2_64", "rocblas_cgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasCgemm3mStridedBatched", {"hipblasCgemm3mStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mStridedBatched_64", {"hipblasCgemm3mStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemmStridedBatched", {"hipblasZgemmStridedBatched_v2", "rocblas_zgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemmStridedBatched_64", {"hipblasZgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemmStridedBatched_64", {"hipblasZgemmStridedBatched_v2_64", "rocblas_zgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasHgemmStridedBatched_64", {"hipblasHgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHgemmStridedBatched_64", {"hipblasHgemmStridedBatched_64", "rocblas_hgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasGemmGroupedBatchedEx", {"hipblasGemmGroupedBatchedEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasGemmGroupedBatchedEx_64", {"hipblasGemmGroupedBatchedEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -2033,6 +2033,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgemmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCgemmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZgemmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasHgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemmStridedBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemmStridedBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2431,6 +2436,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_hgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 2b29009f..18433fa7 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2883,6 +2883,31 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* const AP[], int64_t lda, const hipblasHalf* const BP[], int64_t ldb, const hipblasHalf* beta, hipblasHalf* const CP[], int64_t ldc, int64_t batchCount); // CHECK: blasStatus = hipblasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* B, int64_t ldb, long long int strideB, const float* beta, float* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, long long strideA, const float* BP, int64_t ldb, long long strideB, const float* beta, float* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasSgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + blasStatus = cublasSgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* B, int64_t ldb, long long int strideB, const double* beta, double* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, long long strideA, const double* BP, int64_t ldb, long long strideB, const double* beta, double* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasDgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + blasStatus = cublasDgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* B, int64_t ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, long long strideA, const hipComplex* BP, int64_t ldb, long long strideB, const hipComplex* beta, hipComplex* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasCgemmStridedBatched_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasCgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* B, int64_t ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, long long strideA, const hipDoubleComplex* BP, int64_t ldb, long long strideB, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasZgemmStridedBatched_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasZgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, long long int strideA, const __half* B, int64_t ldb, long long int strideB, const __half* beta, __half* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, long long strideA, const hipblasHalf* BP, int64_t ldb, long long strideB, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); + blasStatus = cublasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index dd48b9c9..cc1c1168 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3088,6 +3088,31 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* const A[], int64_t lda, const rocblas_half* const B[], int64_t ldb, const rocblas_half* beta, rocblas_half* const C[], int64_t ldc, int64_t batch_count); // CHECK: blasStatus = rocblas_hgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* B, int64_t ldb, long long int strideB, const float* beta, float* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, rocblas_stride stride_a, const float* B, int64_t ldb, rocblas_stride stride_b, const float* beta, float* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_sgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + blasStatus = cublasSgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* B, int64_t ldb, long long int strideB, const double* beta, double* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, rocblas_stride stride_a, const double* B, int64_t ldb, rocblas_stride stride_b, const double* beta, double* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_dgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + blasStatus = cublasDgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* B, int64_t ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, rocblas_stride stride_a, const rocblas_float_complex* B, int64_t ldb, rocblas_stride stride_b, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_cgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasCgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* B, int64_t ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, rocblas_stride stride_a, const rocblas_double_complex* B, int64_t ldb, rocblas_stride stride_b, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_zgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasZgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, long long int strideA, const __half* B, int64_t ldb, long long int strideB, const __half* beta, __half* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, rocblas_stride stride_a, const rocblas_half* B, int64_t ldb, rocblas_stride stride_b, const rocblas_half* beta, rocblas_half* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_hgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); + blasStatus = cublasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); #endif return 0; From 6c8afa4bfd786b783db9993595400b65702ed06b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 22 Oct 2024 21:55:30 +0100 Subject: [PATCH 17/51] [HIPIFY][doc] Updated `CHANGELOG.md` to conform to the documentation standard --- CHANGELOG.md | 128 +++++++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f823f5..8a2e9344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 6.3.0 -### Additions +### Added * CUDA 12.6.1 support * cuDNN 9.5.0 support @@ -15,7 +15,7 @@ Documentation for HIPIFY is available at * Initial support for direct hipification of `cuRAND` into `rocRAND` under the `--roc` option * [#1650] Added a filtering ability for the supplementary hipification scripts -### Fixes +### Resolved issues * Correct `roc` header files support @@ -25,11 +25,11 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 6.2.4 -### Additions +### Added * cuDNN 9.3.0 support -### Fixes +### Resolved issues * Removed some post HIP 6.2 APIs from support * Added hipification support for HIP functions `hipSetValidDevices`, `hipMemcpy2DArrayToArray`, `hipMemcpyAtoA`, `hipMemcpyAtoD`, `hipMemcpyAtoA`, `hipMemcpyAtoHAsync`, and `hipMemcpyHtoAAsync` @@ -37,7 +37,7 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 6.2.1 -### Additions +### Added * CUDA 12.5.1 support * cuDNN 9.2.1 support @@ -47,21 +47,21 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 6.2.0 -### Additions +### Added * CUDA 12.4.1 support * cuDNN 9.1.1 support * LLVM 18.1.6 support * Full `hipBLASLt` support -### Fixes +### Resolved issues * Apply `reinterpret_cast` for an explicit conversion between `pointer-to-function` and `pointer-to-object`; affected functions: `hipFuncGetAttributes`, `hipFuncSetAttribute`, `hipFuncSetCacheConfig`, `hipFuncSetSharedMemConfig`, `hipLaunchKernel`, and `hipLaunchCooperativeKernel` ## HIPIFY for ROCm 6.1.2 -### Additions +### Added * cuDNN 9.0.0 support * LLVM 18.1.2 support @@ -69,13 +69,13 @@ Documentation for HIPIFY is available at * `--clang-resource-directory` to specify the clang resource path - the path to the parent folder for the `include` folder that contains `__clang_cuda_runtime_wrapper.h` and other header files used during the hipification process -### Fixes +### Resolved issues * Clang resource files used during hipification are now searchable and also can be specified by the `--clang-resource-directory` option ## HIPIFY for ROCm 6.1.0 -### Additions +### Added * CUDA 12.3.2 support * cuDNN 8.9.7 support @@ -85,19 +85,19 @@ Documentation for HIPIFY is available at * New options: * `--amap` to hipify as much as possible, ignoring `--default-preprocessor` behavior -### Fixes +### Resolved issues * Code blocks skipped by the Preprocessor are not hipified anymore under the `--default-preprocessor` option ## HIPIFY for ROCm 6.0.2 -### Fixes +### Resolved issues * Use the new locations of header files of some HIP and ROCm libraries (`hipRAND`, `hipFFT`, `rocSOLVER`) ## HIPIFY for ROCm 6.0.0 -### Additions +### Added * CUDA 12.2.2 support * cuDNN 8.9.5 support @@ -113,7 +113,7 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 5.7.0 -### Additions +### Added * CUDA 12.2.0 support * cuDNN 8.9.2 support @@ -134,7 +134,7 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 5.6.0 -### Additions +### Added * CUDA 12.1.0 support * cuDNN 8.8.1 support @@ -145,14 +145,14 @@ Documentation for HIPIFY is available at * `--no-warnings-on-undocumented-features` * `--versions` -### Fixes +### Resolved issues * Accessing `half2 struct` members (undocumented feature) * Retargeted `INSTALL` to the `bin` subfolder ## HIPIFY for ROCm 5.5.0 -### Additions +### Added * Partial CUDA 12.0.0 support * cuDNN 8.7.0 support @@ -161,26 +161,26 @@ Documentation for HIPIFY is available at * rocBLAS and MIOpen synthetic tests * LLVM 15.0.7 support -### Changes +### Changed * Synthetic unit tests for `cuBLAS2rocBLAS` and `cuDNN2MIOpen` ## HIPIFY for ROCm 5.4.1 -### Additions +### Added * CUDA 11.8.0 support * cuDNN 8.6.0 support * Device types support * LLVM 15.0.4 support -### Fixes +### Resolved issues * Removed `RPATH` definitions (Linux) ## HIPIFY for ROCm 5.4.0 -### Additions +### Added * hipRTC support * Error handling API support @@ -189,7 +189,7 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 5.3.0 -### Additions +### Added * CUDA 11.7.0 support * cuDNN 8.4.1 support @@ -198,19 +198,19 @@ Documentation for HIPIFY is available at * New options: * `--hip-kernel-execution-syntax` -### Fixes - -* Patches for LLVM 14.0.x (Windows only) -* Add `GNUInstallDirs` for CMake on Linux - -### Changes +### Changed * LLVM 3.8.0 is out of support * HIPIFY-specific options support in unit testing +### Resolved issues + +* Patches for LLVM 14.0.x (Windows only) +* Add `GNUInstallDirs` for CMake on Linux + ## HIPIFY for ROCm 5.2.0 -### Additions +### Added * CUDA 11.6.0 support * cuDNN 8.3.3 support @@ -218,19 +218,19 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 5.1.0 -### Additions +### Added * CUDA 11.5.1 support * cuDNN 8.3.2 support -### Fixes +### Resolved issues * Hipification of `cuOccupancyMaxPotentialBlockSize` and `cuOccupancyMaxPotentialBlockSizeWithFlags` ## HIPIFY for ROCm 5.0.0 -### Additions +### Added * CUDA 11.4.2 support * cuDNN 8.3.2 support @@ -242,52 +242,52 @@ Documentation for HIPIFY is available at * `--experimental` * `--cuda-kernel-execution-syntax` -### Fixes +### Changed + +* Support for different formats of locally generated documentation +* Experimentally supported APIs + +### Resolved issues * Packaging for Debian and RPM Linux distributions * Undo argument typecasting for four driver API functions (`cuStreamWaitValue32`, `cuStreamWaitValue64`, `cuStreamWriteValue32`, and `cuStreamWriteValue64`) because the arguments in the corresponding HIP functions are now `uint` -### Changes - -* Support for different formats of locally generated documentation -* Experimentally supported APIs - ## HIPIFY for ROCm 4.5.0 -### Additions +### Added * cuDNN 8.2.4 support * Initial graph API support * GNU C/C++ 11.1 support * LLVM 12.0.1 support -### Fixes - -* Abandoned `HIP_DYNAMIC_SHARED` - -### Changes +### Changed * Synthetic unit tests * `-std=c++14` by default +### Resolved issues + +* Abandoned `HIP_DYNAMIC_SHARED` + ## HIPIFY for ROCm 4.3.0 -### Additions +### Added * CUDA 11.3.0 support * cuDNN 8.2.0 support * LLVM 12.0.0 support -### Fixes +### Resolved issues * Added missing type casting arguments for `cuStreamWaitValue32(64)` and `cuStreamWriteValue32(64)` ## HIPIFY for ROCm 4.2.0 -### Additions +### Added * CUDA 11.2.2 support * cuDNN 8.1.1 support @@ -296,43 +296,43 @@ Documentation for HIPIFY is available at * New options: * `--doc-format=`, with `full` (default), `strict`, and `compact` options -### Changes +### Changed * Tests on kernel launch syntax ## HIPIFY for ROCm 4.1.0 -### Additions +### Added * CUDA 11.2.0 support * Stream-ordered memory API support * cuDNN 8.1.1 support * LLVM 11.0.1 support -### Fixes +### Changed -* Patches for LLVM 10.0.x and 11.0.0 (Windows and Linux) +* Initial support for API versioning -### Changes +### Resolved issues -* Initial support for API versioning +* Patches for LLVM 10.0.x and 11.0.0 (Windows and Linux) ## HIPIFY for ROCm 4.0.0 -### Additions +### Added * LLVM 11.0.0 support ## HIPIFY for ROCm 3.10.0 -### Changes +### Changed * Revised CUDA and HIP API and data type versioning * Revised and removed deprecated CUDA and HIP APIs and data types ## HIPIFY for ROCm 3.9.0 -### Additions +### Added * CUDA 11.0.1 support * `CUDA2HIP` documentation generation in Markdown and CSV formats @@ -341,24 +341,24 @@ Documentation for HIPIFY is available at * `--md` (generate Markdown documentation) * `--csv` (generate CSV documentation) - ### Changes + ### Changed * Improved `hipify-perl` generation ## HIPIFY for ROCm 3.8.0 -### Additions +### Added * cuDNN 8.0.2 support * `compile_commands.json` support (`-p `) -### Changes +### Changed * Improved `hipify-perl` generation ## HIPIFY for ROCm 3.7.0 -### Additions +### Added * CUDA 11.0.0 support * Linux packaging @@ -366,22 +366,22 @@ Documentation for HIPIFY is available at ## HIPIFY for ROCm 3.6.0 -### Additions +### Added * `deprecated` flag for all corresponding CUDA and HIP APIs -### Changes +### Changed * Added warning for all deprecated APIs ## HIPIFY for ROCm 3.5.0 -### Additions +### Added * CUDA 10.2.0 support * cuDNN 7.6.5 support * LLVM 10.0.0 support -### Changes +### Changed * `hipify-clang` and `clang` options separator (`--`) support From 1524768f4c12142fcd02339687e7e334990ec85b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 23 Oct 2024 16:53:04 +0100 Subject: [PATCH 18/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 4 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 76 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 38 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 38 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 38 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 76 +++++++++++++++++++ 5 files changed, 266 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index ab714494..6191c4e1 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12534,13 +12534,51 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_8x192", "CUBLASLT_MATMUL_TILE_8x16", "CUBLASLT_MATMUL_TILE_8x128", + "CUBLASLT_MATMUL_TILE_80x64", + "CUBLASLT_MATMUL_TILE_80x576", + "CUBLASLT_MATMUL_TILE_80x512", + "CUBLASLT_MATMUL_TILE_80x448", + "CUBLASLT_MATMUL_TILE_80x384", + "CUBLASLT_MATMUL_TILE_80x320", + "CUBLASLT_MATMUL_TILE_80x256", + "CUBLASLT_MATMUL_TILE_80x192", + "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_72x640", + "CUBLASLT_MATMUL_TILE_72x64", + "CUBLASLT_MATMUL_TILE_72x576", + "CUBLASLT_MATMUL_TILE_72x512", + "CUBLASLT_MATMUL_TILE_72x448", + "CUBLASLT_MATMUL_TILE_72x384", + "CUBLASLT_MATMUL_TILE_72x320", + "CUBLASLT_MATMUL_TILE_72x256", + "CUBLASLT_MATMUL_TILE_72x192", + "CUBLASLT_MATMUL_TILE_72x128", "CUBLASLT_MATMUL_TILE_64x96", "CUBLASLT_MATMUL_TILE_64x8", + "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x704", + "CUBLASLT_MATMUL_TILE_64x640", "CUBLASLT_MATMUL_TILE_64x64", + "CUBLASLT_MATMUL_TILE_64x576", "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x448", + "CUBLASLT_MATMUL_TILE_64x384", + "CUBLASLT_MATMUL_TILE_64x320", "CUBLASLT_MATMUL_TILE_64x32", "CUBLASLT_MATMUL_TILE_64x256", + "CUBLASLT_MATMUL_TILE_64x192", "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_56x768", + "CUBLASLT_MATMUL_TILE_56x704", + "CUBLASLT_MATMUL_TILE_56x640", + "CUBLASLT_MATMUL_TILE_56x576", + "CUBLASLT_MATMUL_TILE_56x512", + "CUBLASLT_MATMUL_TILE_56x448", + "CUBLASLT_MATMUL_TILE_56x384", + "CUBLASLT_MATMUL_TILE_56x320", + "CUBLASLT_MATMUL_TILE_56x256", + "CUBLASLT_MATMUL_TILE_56x192", + "CUBLASLT_MATMUL_TILE_56x128", "CUBLASLT_MATMUL_TILE_512x64", "CUBLASLT_MATMUL_TILE_48x768", "CUBLASLT_MATMUL_TILE_48x704", @@ -14052,13 +14090,51 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_8x192", "CUBLASLT_MATMUL_TILE_8x16", "CUBLASLT_MATMUL_TILE_8x128", + "CUBLASLT_MATMUL_TILE_80x64", + "CUBLASLT_MATMUL_TILE_80x576", + "CUBLASLT_MATMUL_TILE_80x512", + "CUBLASLT_MATMUL_TILE_80x448", + "CUBLASLT_MATMUL_TILE_80x384", + "CUBLASLT_MATMUL_TILE_80x320", + "CUBLASLT_MATMUL_TILE_80x256", + "CUBLASLT_MATMUL_TILE_80x192", + "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_72x640", + "CUBLASLT_MATMUL_TILE_72x64", + "CUBLASLT_MATMUL_TILE_72x576", + "CUBLASLT_MATMUL_TILE_72x512", + "CUBLASLT_MATMUL_TILE_72x448", + "CUBLASLT_MATMUL_TILE_72x384", + "CUBLASLT_MATMUL_TILE_72x320", + "CUBLASLT_MATMUL_TILE_72x256", + "CUBLASLT_MATMUL_TILE_72x192", + "CUBLASLT_MATMUL_TILE_72x128", "CUBLASLT_MATMUL_TILE_64x96", "CUBLASLT_MATMUL_TILE_64x8", + "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x704", + "CUBLASLT_MATMUL_TILE_64x640", "CUBLASLT_MATMUL_TILE_64x64", + "CUBLASLT_MATMUL_TILE_64x576", "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x448", + "CUBLASLT_MATMUL_TILE_64x384", + "CUBLASLT_MATMUL_TILE_64x320", "CUBLASLT_MATMUL_TILE_64x32", "CUBLASLT_MATMUL_TILE_64x256", + "CUBLASLT_MATMUL_TILE_64x192", "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_56x768", + "CUBLASLT_MATMUL_TILE_56x704", + "CUBLASLT_MATMUL_TILE_56x640", + "CUBLASLT_MATMUL_TILE_56x576", + "CUBLASLT_MATMUL_TILE_56x512", + "CUBLASLT_MATMUL_TILE_56x448", + "CUBLASLT_MATMUL_TILE_56x384", + "CUBLASLT_MATMUL_TILE_56x320", + "CUBLASLT_MATMUL_TILE_56x256", + "CUBLASLT_MATMUL_TILE_56x192", + "CUBLASLT_MATMUL_TILE_56x128", "CUBLASLT_MATMUL_TILE_512x64", "CUBLASLT_MATMUL_TILE_48x768", "CUBLASLT_MATMUL_TILE_48x704", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index ee8ad63c..1e33e012 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -401,13 +401,51 @@ |`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index a95fbf83..853e7afa 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -401,13 +401,51 @@ |`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x32`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index a85c584a..d3e5d316 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -401,13 +401,51 @@ |`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_72x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_80x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 23c90533..330a3223 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -308,6 +308,44 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_48x640", {"HIPBLASLT_MATMUL_TILE_48x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_48x704", {"HIPBLASLT_MATMUL_TILE_48x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_48x768", {"HIPBLASLT_MATMUL_TILE_48x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x128", {"HIPBLASLT_MATMUL_TILE_56x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x192", {"HIPBLASLT_MATMUL_TILE_56x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x256", {"HIPBLASLT_MATMUL_TILE_56x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x320", {"HIPBLASLT_MATMUL_TILE_56x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x384", {"HIPBLASLT_MATMUL_TILE_56x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x448", {"HIPBLASLT_MATMUL_TILE_56x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x512", {"HIPBLASLT_MATMUL_TILE_56x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x576", {"HIPBLASLT_MATMUL_TILE_56x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x640", {"HIPBLASLT_MATMUL_TILE_56x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x704", {"HIPBLASLT_MATMUL_TILE_56x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_56x768", {"HIPBLASLT_MATMUL_TILE_56x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x192", {"HIPBLASLT_MATMUL_TILE_64x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x320", {"HIPBLASLT_MATMUL_TILE_64x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x384", {"HIPBLASLT_MATMUL_TILE_64x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x448", {"HIPBLASLT_MATMUL_TILE_64x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x576", {"HIPBLASLT_MATMUL_TILE_64x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x640", {"HIPBLASLT_MATMUL_TILE_64x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x704", {"HIPBLASLT_MATMUL_TILE_64x704", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x768", {"HIPBLASLT_MATMUL_TILE_64x768", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x64", {"HIPBLASLT_MATMUL_TILE_72x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x128", {"HIPBLASLT_MATMUL_TILE_72x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x192", {"HIPBLASLT_MATMUL_TILE_72x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x256", {"HIPBLASLT_MATMUL_TILE_72x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x320", {"HIPBLASLT_MATMUL_TILE_72x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x384", {"HIPBLASLT_MATMUL_TILE_72x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x448", {"HIPBLASLT_MATMUL_TILE_72x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x512", {"HIPBLASLT_MATMUL_TILE_72x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x576", {"HIPBLASLT_MATMUL_TILE_72x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_72x640", {"HIPBLASLT_MATMUL_TILE_72x640", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x64", {"HIPBLASLT_MATMUL_TILE_80x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x128", {"HIPBLASLT_MATMUL_TILE_80x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x192", {"HIPBLASLT_MATMUL_TILE_80x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x256", {"HIPBLASLT_MATMUL_TILE_80x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x320", {"HIPBLASLT_MATMUL_TILE_80x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x384", {"HIPBLASLT_MATMUL_TILE_80x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x448", {"HIPBLASLT_MATMUL_TILE_80x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x512", {"HIPBLASLT_MATMUL_TILE_80x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_80x576", {"HIPBLASLT_MATMUL_TILE_80x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1033,6 +1071,44 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_48x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_48x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_48x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_56x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x704", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x768", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_72x640", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_80x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From d63588eccbe7e40fccbbe098b5b919ec5d099faf Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 23 Oct 2024 19:46:26 +0100 Subject: [PATCH 19/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 4 + `rocblas_(c|z)herk(x)?_64` and `hipblas(C|Z)herk(x)?_v2_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 24 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 12 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 12 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 12 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 20 +++++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 24 +++++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 24 +++++++++++++++++++ 7 files changed, 92 insertions(+), 36 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 6191c4e1..03f4d3e1 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1647,8 +1647,11 @@ sub rocSubstitutions { subst("cublasCher_v2", "rocblas_cher", "library"); subst("cublasCher_v2_64", "rocblas_cher_64", "library"); subst("cublasCherk", "rocblas_cherk", "library"); + subst("cublasCherk_64", "rocblas_cherk_64", "library"); subst("cublasCherk_v2", "rocblas_cherk", "library"); + subst("cublasCherk_v2_64", "rocblas_cherk_64", "library"); subst("cublasCherkx", "rocblas_cherkx", "library"); + subst("cublasCherkx_64", "rocblas_cherkx_64", "library"); subst("cublasChpmv", "rocblas_chpmv", "library"); subst("cublasChpmv_64", "rocblas_chpmv_64", "library"); subst("cublasChpmv_v2", "rocblas_chpmv", "library"); @@ -2184,8 +2187,11 @@ sub rocSubstitutions { subst("cublasZher_v2", "rocblas_zher", "library"); subst("cublasZher_v2_64", "rocblas_zher_64", "library"); subst("cublasZherk", "rocblas_zherk", "library"); + subst("cublasZherk_64", "rocblas_zherk_64", "library"); subst("cublasZherk_v2", "rocblas_zherk", "library"); + subst("cublasZherk_v2_64", "rocblas_zherk_64", "library"); subst("cublasZherkx", "rocblas_zherkx", "library"); + subst("cublasZherkx_64", "rocblas_zherkx_64", "library"); subst("cublasZhpmv", "rocblas_zhpmv", "library"); subst("cublasZhpmv_64", "rocblas_zhpmv_64", "library"); subst("cublasZhpmv_v2", "rocblas_zhpmv", "library"); @@ -4391,8 +4397,11 @@ sub simpleSubstitutions { subst("cublasCher_v2", "hipblasCher_v2", "library"); subst("cublasCher_v2_64", "hipblasCher_v2_64", "library"); subst("cublasCherk", "hipblasCherk_v2", "library"); + subst("cublasCherk_64", "hipblasCherk_v2_64", "library"); subst("cublasCherk_v2", "hipblasCherk_v2", "library"); + subst("cublasCherk_v2_64", "hipblasCherk_v2_64", "library"); subst("cublasCherkx", "hipblasCherkx_v2", "library"); + subst("cublasCherkx_64", "hipblasCherkx_v2_64", "library"); subst("cublasChpmv", "hipblasChpmv_v2", "library"); subst("cublasChpmv_64", "hipblasChpmv_v2_64", "library"); subst("cublasChpmv_v2", "hipblasChpmv_v2", "library"); @@ -4933,8 +4942,11 @@ sub simpleSubstitutions { subst("cublasZher_v2", "hipblasZher_v2", "library"); subst("cublasZher_v2_64", "hipblasZher_v2_64", "library"); subst("cublasZherk", "hipblasZherk_v2", "library"); + subst("cublasZherk_64", "hipblasZherk_v2_64", "library"); subst("cublasZherk_v2", "hipblasZherk_v2", "library"); + subst("cublasZherk_v2_64", "hipblasZherk_v2_64", "library"); subst("cublasZherkx", "hipblasZherkx_v2", "library"); + subst("cublasZherkx_64", "hipblasZherkx_v2_64", "library"); subst("cublasZhpmv", "hipblasZhpmv_v2", "library"); subst("cublasZhpmv_64", "hipblasZhpmv_v2_64", "library"); subst("cublasZhpmv_v2", "hipblasZhpmv_v2", "library"); @@ -11551,9 +11563,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZsymm_v2_64", "cublasZsymm_64", "cublasZmatinvBatched", - "cublasZherkx_64", - "cublasZherk_v2_64", - "cublasZherk_64", "cublasZher2k_v2_64", "cublasZher2k_64", "cublasZhemm_v2_64", @@ -11719,9 +11728,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCopyEx", "cublasContext", "cublasCmatinvBatched", - "cublasCherkx_64", - "cublasCherk_v2_64", - "cublasCherk_64", "cublasCherkEx_64", "cublasCherkEx", "cublasCherk3mEx_64", @@ -13339,9 +13345,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZsymm_v2_64", "cublasZsymm_64", "cublasZmatinvBatched", - "cublasZherkx_64", - "cublasZherk_v2_64", - "cublasZherk_64", "cublasZher2k_v2_64", "cublasZher2k_64", "cublasZhemm_v2_64", @@ -13521,9 +13524,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCopyEx_64", "cublasCopyEx", "cublasCmatinvBatched", - "cublasCherkx_64", - "cublasCherk_v2_64", - "cublasCherk_64", "cublasCherkEx_64", "cublasCherkEx", "cublasCherk3mEx_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 1e33e012..d1ab8247 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1150,11 +1150,11 @@ |`cublasCher2k_v2`| | | | |`hipblasCher2k_v2`|6.0.0| | | | | |`cublasCher2k_v2_64`|12.0| | | | | | | | | | |`cublasCherk`| | | | |`hipblasCherk_v2`|6.0.0| | | | | -|`cublasCherk_64`|12.0| | | | | | | | | | +|`cublasCherk_64`|12.0| | | |`hipblasCherk_v2_64`|6.3.0| | | |6.3.0| |`cublasCherk_v2`| | | | |`hipblasCherk_v2`|6.0.0| | | | | -|`cublasCherk_v2_64`|12.0| | | | | | | | | | +|`cublasCherk_v2_64`|12.0| | | |`hipblasCherk_v2_64`|6.3.0| | | |6.3.0| |`cublasCherkx`| | | | |`hipblasCherkx_v2`|6.0.0| | | | | -|`cublasCherkx_64`|12.0| | | | | | | | | | +|`cublasCherkx_64`|12.0| | | |`hipblasCherkx_v2_64`|6.3.0| | | |6.3.0| |`cublasCsymm`| | | | |`hipblasCsymm_v2`|6.0.0| | | | | |`cublasCsymm_64`|12.0| | | | | | | | | | |`cublasCsymm_v2`| | | | |`hipblasCsymm_v2`|6.0.0| | | | | @@ -1296,11 +1296,11 @@ |`cublasZher2k_v2`| | | | |`hipblasZher2k_v2`|6.0.0| | | | | |`cublasZher2k_v2_64`|12.0| | | | | | | | | | |`cublasZherk`| | | | |`hipblasZherk_v2`|6.0.0| | | | | -|`cublasZherk_64`|12.0| | | | | | | | | | +|`cublasZherk_64`|12.0| | | |`hipblasZherk_v2_64`|6.3.0| | | |6.3.0| |`cublasZherk_v2`| | | | |`hipblasZherk_v2`|6.0.0| | | | | -|`cublasZherk_v2_64`|12.0| | | | | | | | | | +|`cublasZherk_v2_64`|12.0| | | |`hipblasZherk_v2_64`|6.3.0| | | |6.3.0| |`cublasZherkx`| | | | |`hipblasZherkx_v2`|6.0.0| | | | | -|`cublasZherkx_64`|12.0| | | | | | | | | | +|`cublasZherkx_64`|12.0| | | |`hipblasZherkx_v2_64`|6.3.0| | | |6.3.0| |`cublasZsymm`| | | | |`hipblasZsymm_v2`|6.0.0| | | | | |`cublasZsymm_64`|12.0| | | | | | | | | | |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 853e7afa..eb805c4a 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1150,11 +1150,11 @@ |`cublasCher2k_v2`| | | | |`hipblasCher2k_v2`|6.0.0| | | | |`rocblas_cher2k`|3.5.0| | | | | |`cublasCher2k_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasCherk`| | | | |`hipblasCherk_v2`|6.0.0| | | | |`rocblas_cherk`|3.5.0| | | | | -|`cublasCherk_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCherk_64`|12.0| | | |`hipblasCherk_v2_64`|6.3.0| | | |6.3.0|`rocblas_cherk_64`|6.3.0| | | |6.3.0| |`cublasCherk_v2`| | | | |`hipblasCherk_v2`|6.0.0| | | | |`rocblas_cherk`|3.5.0| | | | | -|`cublasCherk_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCherk_v2_64`|12.0| | | |`hipblasCherk_v2_64`|6.3.0| | | |6.3.0|`rocblas_cherk_64`|6.3.0| | | |6.3.0| |`cublasCherkx`| | | | |`hipblasCherkx_v2`|6.0.0| | | | |`rocblas_cherkx`|3.5.0| | | | | -|`cublasCherkx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCherkx_64`|12.0| | | |`hipblasCherkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_cherkx_64`|6.3.0| | | |6.3.0| |`cublasCsymm`| | | | |`hipblasCsymm_v2`|6.0.0| | | | |`rocblas_csymm`|3.5.0| | | | | |`cublasCsymm_64`|12.0| | | | | | | | | | | | | | | | |`cublasCsymm_v2`| | | | |`hipblasCsymm_v2`|6.0.0| | | | |`rocblas_csymm`|3.5.0| | | | | @@ -1296,11 +1296,11 @@ |`cublasZher2k_v2`| | | | |`hipblasZher2k_v2`|6.0.0| | | | |`rocblas_zher2k`|3.5.0| | | | | |`cublasZher2k_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZherk`| | | | |`hipblasZherk_v2`|6.0.0| | | | |`rocblas_zherk`|3.5.0| | | | | -|`cublasZherk_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZherk_64`|12.0| | | |`hipblasZherk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zherk_64`|6.3.0| | | |6.3.0| |`cublasZherk_v2`| | | | |`hipblasZherk_v2`|6.0.0| | | | |`rocblas_zherk`|3.5.0| | | | | -|`cublasZherk_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZherk_v2_64`|12.0| | | |`hipblasZherk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zherk_64`|6.3.0| | | |6.3.0| |`cublasZherkx`| | | | |`hipblasZherkx_v2`|6.0.0| | | | |`rocblas_zherkx`|3.5.0| | | | | -|`cublasZherkx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZherkx_64`|12.0| | | |`hipblasZherkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_zherkx_64`|6.3.0| | | |6.3.0| |`cublasZsymm`| | | | |`hipblasZsymm_v2`|6.0.0| | | | |`rocblas_zsymm`|3.5.0| | | | | |`cublasZsymm_64`|12.0| | | | | | | | | | | | | | | | |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | |`rocblas_zsymm`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index d3e5d316..47c59316 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1150,11 +1150,11 @@ |`cublasCher2k_v2`| | | | |`rocblas_cher2k`|3.5.0| | | | | |`cublasCher2k_v2_64`|12.0| | | | | | | | | | |`cublasCherk`| | | | |`rocblas_cherk`|3.5.0| | | | | -|`cublasCherk_64`|12.0| | | | | | | | | | +|`cublasCherk_64`|12.0| | | |`rocblas_cherk_64`|6.3.0| | | |6.3.0| |`cublasCherk_v2`| | | | |`rocblas_cherk`|3.5.0| | | | | -|`cublasCherk_v2_64`|12.0| | | | | | | | | | +|`cublasCherk_v2_64`|12.0| | | |`rocblas_cherk_64`|6.3.0| | | |6.3.0| |`cublasCherkx`| | | | |`rocblas_cherkx`|3.5.0| | | | | -|`cublasCherkx_64`|12.0| | | | | | | | | | +|`cublasCherkx_64`|12.0| | | |`rocblas_cherkx_64`|6.3.0| | | |6.3.0| |`cublasCsymm`| | | | |`rocblas_csymm`|3.5.0| | | | | |`cublasCsymm_64`|12.0| | | | | | | | | | |`cublasCsymm_v2`| | | | |`rocblas_csymm`|3.5.0| | | | | @@ -1296,11 +1296,11 @@ |`cublasZher2k_v2`| | | | |`rocblas_zher2k`|3.5.0| | | | | |`cublasZher2k_v2_64`|12.0| | | | | | | | | | |`cublasZherk`| | | | |`rocblas_zherk`|3.5.0| | | | | -|`cublasZherk_64`|12.0| | | | | | | | | | +|`cublasZherk_64`|12.0| | | |`rocblas_zherk_64`|6.3.0| | | |6.3.0| |`cublasZherk_v2`| | | | |`rocblas_zherk`|3.5.0| | | | | -|`cublasZherk_v2_64`|12.0| | | | | | | | | | +|`cublasZherk_v2_64`|12.0| | | |`rocblas_zherk_64`|6.3.0| | | |6.3.0| |`cublasZherkx`| | | | |`rocblas_zherkx`|3.5.0| | | | | -|`cublasZherkx_64`|12.0| | | | | | | | | | +|`cublasZherkx_64`|12.0| | | |`rocblas_zherkx_64`|6.3.0| | | |6.3.0| |`cublasZsymm`| | | | |`rocblas_zsymm`|3.5.0| | | | | |`cublasZsymm_64`|12.0| | | | | | | | | | |`cublasZsymm_v2`| | | | |`rocblas_zsymm`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 568cc17c..1fc1e134 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -487,9 +487,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HERK {"cublasCherk", {"hipblasCherk_v2", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCherk_64", {"hipblasCherk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCherk_64", {"hipblasCherk_v2_64", "rocblas_cherk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZherk", {"hipblasZherk_v2", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZherk_64", {"hipblasZherk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZherk_64", {"hipblasZherk_v2_64", "rocblas_zherk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // SYR2K {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -519,9 +519,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HERKX - eXtended HERK {"cublasCherkx", {"hipblasCherkx_v2", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCherkx_64", {"hipblasCherkx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCherkx_64", {"hipblasCherkx_v2_64", "rocblas_cherkx_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZherkx", {"hipblasZherkx_v2", "rocblas_zherkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZherkx_64", {"hipblasZherkx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZherkx_64", {"hipblasZherkx_v2_64", "rocblas_zherkx_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // SYMM {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -864,7 +864,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HERK {"cublasCherk_v2", {"hipblasCherk_v2", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCherk_v2_64", {"hipblasCherk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCherk_v2_64", {"hipblasCherk_v2_64", "rocblas_cherk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // IO in Int8 complex/cuComplex, computation in cuComplex {"cublasCherkEx", {"hipblasCherkEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, {"cublasCherkEx_64", {"hipblasCherkEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -872,7 +872,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasCherk3mEx", {"hipblasCherk3mEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, {"cublasCherk3mEx_64", {"hipblasCherk3mEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, {"cublasZherk_v2", {"hipblasZherk_v2", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZherk_v2_64", {"hipblasZherk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZherk_v2_64", {"hipblasZherk_v2_64", "rocblas_zherk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // SYR2K {"cublasSsyr2k_v2", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2038,6 +2038,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCgemmStridedBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZgemmStridedBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCherk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZherk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCherkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZherkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2441,6 +2445,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cherk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zherk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cherkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zherkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 18433fa7..676db71e 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2908,6 +2908,30 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, long long strideA, const hipblasHalf* BP, int64_t ldb, long long strideB, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc, long long strideC, int64_t batchCount); // CHECK: blasStatus = hipblasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); blasStatus = cublasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const cuComplex* A, int64_t lda, const float* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherk_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const hipComplex* A, int64_t lda, const float* beta, hipComplex* C, int64_t ldc); + // CHECK: blasStatus = hipblasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const cuDoubleComplex* A, int64_t lda, const double* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherk_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const hipDoubleComplex* AP, int64_t lda, const double* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZherk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const float* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const float* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCherkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const double* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const double* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZherkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index cc1c1168..37d12d82 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3113,6 +3113,30 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, rocblas_stride stride_a, const rocblas_half* B, int64_t ldb, rocblas_stride stride_b, const rocblas_half* beta, rocblas_half* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); // CHECK: blasStatus = rocblas_hgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); blasStatus = cublasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const cuComplex* A, int64_t lda, const float* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const float* alpha, const rocblas_float_complex* A, int64_t lda, const float* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_cherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const cuDoubleComplex* A, int64_t lda, const double* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const double* alpha, const rocblas_double_complex* A, int64_t lda, const double* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const float* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const float* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const double* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const double* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); #endif return 0; From e748ae6b0e46295d8459153eb2f3160a078ea7d1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 24 Oct 2024 14:24:59 +0100 Subject: [PATCH 20/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 5 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 80 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 40 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 40 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 40 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 80 +++++++++++++++++++ 5 files changed, 280 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 03f4d3e1..b1eef9e1 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12524,6 +12524,12 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_UNDEFINED", "CUBLASLT_MATMUL_TILE_END", "CUBLASLT_MATMUL_TILE_96x64", + "CUBLASLT_MATMUL_TILE_96x512", + "CUBLASLT_MATMUL_TILE_96x448", + "CUBLASLT_MATMUL_TILE_96x384", + "CUBLASLT_MATMUL_TILE_96x320", + "CUBLASLT_MATMUL_TILE_96x256", + "CUBLASLT_MATMUL_TILE_96x192", "CUBLASLT_MATMUL_TILE_96x128", "CUBLASLT_MATMUL_TILE_8x8", "CUBLASLT_MATMUL_TILE_8x768", @@ -12540,6 +12546,14 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_8x192", "CUBLASLT_MATMUL_TILE_8x16", "CUBLASLT_MATMUL_TILE_8x128", + "CUBLASLT_MATMUL_TILE_88x64", + "CUBLASLT_MATMUL_TILE_88x512", + "CUBLASLT_MATMUL_TILE_88x448", + "CUBLASLT_MATMUL_TILE_88x384", + "CUBLASLT_MATMUL_TILE_88x320", + "CUBLASLT_MATMUL_TILE_88x256", + "CUBLASLT_MATMUL_TILE_88x192", + "CUBLASLT_MATMUL_TILE_88x128", "CUBLASLT_MATMUL_TILE_80x64", "CUBLASLT_MATMUL_TILE_80x576", "CUBLASLT_MATMUL_TILE_80x512", @@ -12657,13 +12671,39 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_16x16", "CUBLASLT_MATMUL_TILE_16x128", "CUBLASLT_MATMUL_TILE_160x128", + "CUBLASLT_MATMUL_TILE_136x64", + "CUBLASLT_MATMUL_TILE_136x320", + "CUBLASLT_MATMUL_TILE_136x256", + "CUBLASLT_MATMUL_TILE_136x192", + "CUBLASLT_MATMUL_TILE_136x128", "CUBLASLT_MATMUL_TILE_128x96", "CUBLASLT_MATMUL_TILE_128x64", + "CUBLASLT_MATMUL_TILE_128x384", + "CUBLASLT_MATMUL_TILE_128x320", "CUBLASLT_MATMUL_TILE_128x32", "CUBLASLT_MATMUL_TILE_128x256", "CUBLASLT_MATMUL_TILE_128x192", "CUBLASLT_MATMUL_TILE_128x160", "CUBLASLT_MATMUL_TILE_128x128", + "CUBLASLT_MATMUL_TILE_120x64", + "CUBLASLT_MATMUL_TILE_120x384", + "CUBLASLT_MATMUL_TILE_120x320", + "CUBLASLT_MATMUL_TILE_120x256", + "CUBLASLT_MATMUL_TILE_120x192", + "CUBLASLT_MATMUL_TILE_120x128", + "CUBLASLT_MATMUL_TILE_112x64", + "CUBLASLT_MATMUL_TILE_112x384", + "CUBLASLT_MATMUL_TILE_112x320", + "CUBLASLT_MATMUL_TILE_112x256", + "CUBLASLT_MATMUL_TILE_112x192", + "CUBLASLT_MATMUL_TILE_112x128", + "CUBLASLT_MATMUL_TILE_104x64", + "CUBLASLT_MATMUL_TILE_104x448", + "CUBLASLT_MATMUL_TILE_104x384", + "CUBLASLT_MATMUL_TILE_104x320", + "CUBLASLT_MATMUL_TILE_104x256", + "CUBLASLT_MATMUL_TILE_104x192", + "CUBLASLT_MATMUL_TILE_104x128", "CUBLASLT_MATMUL_STAGES_UNDEFINED", "CUBLASLT_MATMUL_STAGES_END", "CUBLASLT_MATMUL_STAGES_8xAUTO", @@ -14074,6 +14114,12 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_UNDEFINED", "CUBLASLT_MATMUL_TILE_END", "CUBLASLT_MATMUL_TILE_96x64", + "CUBLASLT_MATMUL_TILE_96x512", + "CUBLASLT_MATMUL_TILE_96x448", + "CUBLASLT_MATMUL_TILE_96x384", + "CUBLASLT_MATMUL_TILE_96x320", + "CUBLASLT_MATMUL_TILE_96x256", + "CUBLASLT_MATMUL_TILE_96x192", "CUBLASLT_MATMUL_TILE_96x128", "CUBLASLT_MATMUL_TILE_8x8", "CUBLASLT_MATMUL_TILE_8x768", @@ -14090,6 +14136,14 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_8x192", "CUBLASLT_MATMUL_TILE_8x16", "CUBLASLT_MATMUL_TILE_8x128", + "CUBLASLT_MATMUL_TILE_88x64", + "CUBLASLT_MATMUL_TILE_88x512", + "CUBLASLT_MATMUL_TILE_88x448", + "CUBLASLT_MATMUL_TILE_88x384", + "CUBLASLT_MATMUL_TILE_88x320", + "CUBLASLT_MATMUL_TILE_88x256", + "CUBLASLT_MATMUL_TILE_88x192", + "CUBLASLT_MATMUL_TILE_88x128", "CUBLASLT_MATMUL_TILE_80x64", "CUBLASLT_MATMUL_TILE_80x576", "CUBLASLT_MATMUL_TILE_80x512", @@ -14207,13 +14261,39 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_16x16", "CUBLASLT_MATMUL_TILE_16x128", "CUBLASLT_MATMUL_TILE_160x128", + "CUBLASLT_MATMUL_TILE_136x64", + "CUBLASLT_MATMUL_TILE_136x320", + "CUBLASLT_MATMUL_TILE_136x256", + "CUBLASLT_MATMUL_TILE_136x192", + "CUBLASLT_MATMUL_TILE_136x128", "CUBLASLT_MATMUL_TILE_128x96", "CUBLASLT_MATMUL_TILE_128x64", + "CUBLASLT_MATMUL_TILE_128x384", + "CUBLASLT_MATMUL_TILE_128x320", "CUBLASLT_MATMUL_TILE_128x32", "CUBLASLT_MATMUL_TILE_128x256", "CUBLASLT_MATMUL_TILE_128x192", "CUBLASLT_MATMUL_TILE_128x160", "CUBLASLT_MATMUL_TILE_128x128", + "CUBLASLT_MATMUL_TILE_120x64", + "CUBLASLT_MATMUL_TILE_120x384", + "CUBLASLT_MATMUL_TILE_120x320", + "CUBLASLT_MATMUL_TILE_120x256", + "CUBLASLT_MATMUL_TILE_120x192", + "CUBLASLT_MATMUL_TILE_120x128", + "CUBLASLT_MATMUL_TILE_112x64", + "CUBLASLT_MATMUL_TILE_112x384", + "CUBLASLT_MATMUL_TILE_112x320", + "CUBLASLT_MATMUL_TILE_112x256", + "CUBLASLT_MATMUL_TILE_112x192", + "CUBLASLT_MATMUL_TILE_112x128", + "CUBLASLT_MATMUL_TILE_104x64", + "CUBLASLT_MATMUL_TILE_104x448", + "CUBLASLT_MATMUL_TILE_104x384", + "CUBLASLT_MATMUL_TILE_104x320", + "CUBLASLT_MATMUL_TILE_104x256", + "CUBLASLT_MATMUL_TILE_104x192", + "CUBLASLT_MATMUL_TILE_104x128", "CUBLASLT_MATMUL_STAGES_UNDEFINED", "CUBLASLT_MATMUL_STAGES_END", "CUBLASLT_MATMUL_STAGES_8xAUTO", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index d1ab8247..f2c13bc6 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -322,13 +322,39 @@ |`CUBLASLT_MATMUL_STAGES_8xAUTO`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_STAGES_END`|11.0| | | | | | | | | | |`CUBLASLT_MATMUL_STAGES_UNDEFINED`|11.0| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | @@ -446,6 +472,14 @@ |`CUBLASLT_MATMUL_TILE_80x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | @@ -462,6 +496,12 @@ |`CUBLASLT_MATMUL_TILE_8x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x64`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_END`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_UNDEFINED`|10.1| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index eb805c4a..75126a24 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -322,13 +322,39 @@ |`CUBLASLT_MATMUL_STAGES_8xAUTO`|11.8| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_STAGES_END`|11.0| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_STAGES_UNDEFINED`|11.0| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | | | | | | | @@ -446,6 +472,14 @@ |`CUBLASLT_MATMUL_TILE_80x512`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x512`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | | | | | | | @@ -462,6 +496,12 @@ |`CUBLASLT_MATMUL_TILE_8x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x128`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x512`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x64`|11.3| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_END`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_UNDEFINED`|10.1| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 47c59316..2a476058 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -322,13 +322,39 @@ |`CUBLASLT_MATMUL_STAGES_8xAUTO`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_STAGES_END`|11.0| | | | | | | | | | |`CUBLASLT_MATMUL_STAGES_UNDEFINED`|11.0| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_104x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_112x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_120x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_136x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | @@ -446,6 +472,14 @@ |`CUBLASLT_MATMUL_TILE_80x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x512`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_88x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x192`|12.6| | | | | | | | | | @@ -462,6 +496,12 @@ |`CUBLASLT_MATMUL_TILE_8x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_8x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_96x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_96x64`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_END`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_UNDEFINED`|10.1| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 330a3223..05cb227f 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -346,6 +346,46 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_80x448", {"HIPBLASLT_MATMUL_TILE_80x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_80x512", {"HIPBLASLT_MATMUL_TILE_80x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_80x576", {"HIPBLASLT_MATMUL_TILE_80x576", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x64", {"HIPBLASLT_MATMUL_TILE_88x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x128", {"HIPBLASLT_MATMUL_TILE_88x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x192", {"HIPBLASLT_MATMUL_TILE_88x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x256", {"HIPBLASLT_MATMUL_TILE_88x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x320", {"HIPBLASLT_MATMUL_TILE_88x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x384", {"HIPBLASLT_MATMUL_TILE_88x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x448", {"HIPBLASLT_MATMUL_TILE_88x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_88x512", {"HIPBLASLT_MATMUL_TILE_88x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_96x192", {"HIPBLASLT_MATMUL_TILE_96x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_96x256", {"HIPBLASLT_MATMUL_TILE_96x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_96x320", {"HIPBLASLT_MATMUL_TILE_96x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_96x384", {"HIPBLASLT_MATMUL_TILE_96x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_96x448", {"HIPBLASLT_MATMUL_TILE_96x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_96x512", {"HIPBLASLT_MATMUL_TILE_96x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x64", {"HIPBLASLT_MATMUL_TILE_104x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x128", {"HIPBLASLT_MATMUL_TILE_104x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x192", {"HIPBLASLT_MATMUL_TILE_104x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x256", {"HIPBLASLT_MATMUL_TILE_104x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x320", {"HIPBLASLT_MATMUL_TILE_104x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x384", {"HIPBLASLT_MATMUL_TILE_104x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_104x448", {"HIPBLASLT_MATMUL_TILE_104x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_112x64", {"HIPBLASLT_MATMUL_TILE_112x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_112x128", {"HIPBLASLT_MATMUL_TILE_112x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_112x192", {"HIPBLASLT_MATMUL_TILE_112x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_112x256", {"HIPBLASLT_MATMUL_TILE_112x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_112x320", {"HIPBLASLT_MATMUL_TILE_112x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_112x384", {"HIPBLASLT_MATMUL_TILE_112x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_120x64", {"HIPBLASLT_MATMUL_TILE_120x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_120x128", {"HIPBLASLT_MATMUL_TILE_120x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_120x192", {"HIPBLASLT_MATMUL_TILE_120x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_120x256", {"HIPBLASLT_MATMUL_TILE_120x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_120x320", {"HIPBLASLT_MATMUL_TILE_120x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_120x384", {"HIPBLASLT_MATMUL_TILE_120x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x320", {"HIPBLASLT_MATMUL_TILE_128x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x384", {"HIPBLASLT_MATMUL_TILE_128x384", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_136x64", {"HIPBLASLT_MATMUL_TILE_136x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_136x128", {"HIPBLASLT_MATMUL_TILE_136x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_136x192", {"HIPBLASLT_MATMUL_TILE_136x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_136x256", {"HIPBLASLT_MATMUL_TILE_136x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_136x320", {"HIPBLASLT_MATMUL_TILE_136x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1109,6 +1149,46 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_80x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_80x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_80x576", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_88x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_96x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_96x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_96x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_96x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_96x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_96x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_104x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_112x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_112x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_112x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_112x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_112x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_112x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_120x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_120x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_120x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_120x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_120x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_120x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x384", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_136x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_136x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_136x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_136x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_136x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From ab9a773e2e1d8f3053b54a0f61ceabb50b6de4f7 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 24 Oct 2024 20:09:30 +0100 Subject: [PATCH 21/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 5 + `rocblas_(c|z)her2k(x)?_64` and `hipblas(C|Z)her2k(x)?_v2_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++++------- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 +++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 +++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 +++---- src/CUDA2HIP_BLAS_API_functions.cpp | 12 ++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 22 +++++++++++++++---- .../synthetic/libraries/cublas2rocblas_v2.cu | 22 +++++++++++++++---- 7 files changed, 64 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index b1eef9e1..11879b48 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1642,7 +1642,9 @@ sub rocSubstitutions { subst("cublasCher2_v2", "rocblas_cher2", "library"); subst("cublasCher2_v2_64", "rocblas_cher2_64", "library"); subst("cublasCher2k", "rocblas_cher2k", "library"); + subst("cublasCher2k_64", "rocblas_cher2k_64", "library"); subst("cublasCher2k_v2", "rocblas_cher2k", "library"); + subst("cublasCher2k_v2_64", "rocblas_cher2k_64", "library"); subst("cublasCher_64", "rocblas_cher_64", "library"); subst("cublasCher_v2", "rocblas_cher", "library"); subst("cublasCher_v2_64", "rocblas_cher_64", "library"); @@ -2182,7 +2184,9 @@ sub rocSubstitutions { subst("cublasZher2_v2", "rocblas_zher2", "library"); subst("cublasZher2_v2_64", "rocblas_zher2_64", "library"); subst("cublasZher2k", "rocblas_zher2k", "library"); + subst("cublasZher2k_64", "rocblas_zher2k_64", "library"); subst("cublasZher2k_v2", "rocblas_zher2k", "library"); + subst("cublasZher2k_v2_64", "rocblas_zher2k_64", "library"); subst("cublasZher_64", "rocblas_zher_64", "library"); subst("cublasZher_v2", "rocblas_zher", "library"); subst("cublasZher_v2_64", "rocblas_zher_64", "library"); @@ -4392,7 +4396,9 @@ sub simpleSubstitutions { subst("cublasCher2_v2", "hipblasCher2_v2", "library"); subst("cublasCher2_v2_64", "hipblasCher2_v2_64", "library"); subst("cublasCher2k", "hipblasCher2k_v2", "library"); + subst("cublasCher2k_64", "hipblasCher2k_v2_64", "library"); subst("cublasCher2k_v2", "hipblasCher2k_v2", "library"); + subst("cublasCher2k_v2_64", "hipblasCher2k_v2_64", "library"); subst("cublasCher_64", "hipblasCher_v2_64", "library"); subst("cublasCher_v2", "hipblasCher_v2", "library"); subst("cublasCher_v2_64", "hipblasCher_v2_64", "library"); @@ -4937,7 +4943,9 @@ sub simpleSubstitutions { subst("cublasZher2_v2", "hipblasZher2_v2", "library"); subst("cublasZher2_v2_64", "hipblasZher2_v2_64", "library"); subst("cublasZher2k", "hipblasZher2k_v2", "library"); + subst("cublasZher2k_64", "hipblasZher2k_v2_64", "library"); subst("cublasZher2k_v2", "hipblasZher2k_v2", "library"); + subst("cublasZher2k_v2_64", "hipblasZher2k_v2_64", "library"); subst("cublasZher_64", "hipblasZher_v2_64", "library"); subst("cublasZher_v2", "hipblasZher_v2", "library"); subst("cublasZher_v2_64", "hipblasZher_v2_64", "library"); @@ -11563,8 +11571,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZsymm_v2_64", "cublasZsymm_64", "cublasZmatinvBatched", - "cublasZher2k_v2_64", - "cublasZher2k_64", "cublasZhemm_v2_64", "cublasZhemm_64", "cublasZgemm3m_64", @@ -11732,8 +11738,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCherkEx", "cublasCherk3mEx_64", "cublasCherk3mEx", - "cublasCher2k_v2_64", - "cublasCher2k_64", "cublasChemm_v2_64", "cublasChemm_64", "cublasCgemmEx_64", @@ -13385,8 +13389,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZsymm_v2_64", "cublasZsymm_64", "cublasZmatinvBatched", - "cublasZher2k_v2_64", - "cublasZher2k_64", "cublasZhemm_v2_64", "cublasZhemm_64", "cublasZgetrsBatched", @@ -13568,8 +13570,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCherkEx", "cublasCherk3mEx_64", "cublasCherk3mEx", - "cublasCher2k_v2_64", - "cublasCher2k_64", "cublasChemm_v2_64", "cublasChemm_64", "cublasCgetrsBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index f2c13bc6..f877723f 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1186,9 +1186,9 @@ |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | | |`cublasChemm_v2_64`|12.0| | | | | | | | | | |`cublasCher2k`| | | | |`hipblasCher2k_v2`|6.0.0| | | | | -|`cublasCher2k_64`|12.0| | | | | | | | | | +|`cublasCher2k_64`|12.0| | | |`hipblasCher2k_v2_64`|6.3.0| | | |6.3.0| |`cublasCher2k_v2`| | | | |`hipblasCher2k_v2`|6.0.0| | | | | -|`cublasCher2k_v2_64`|12.0| | | | | | | | | | +|`cublasCher2k_v2_64`|12.0| | | |`hipblasCher2k_v2_64`|6.3.0| | | |6.3.0| |`cublasCherk`| | | | |`hipblasCherk_v2`|6.0.0| | | | | |`cublasCherk_64`|12.0| | | |`hipblasCherk_v2_64`|6.3.0| | | |6.3.0| |`cublasCherk_v2`| | | | |`hipblasCherk_v2`|6.0.0| | | | | @@ -1332,9 +1332,9 @@ |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | | |`cublasZhemm_v2_64`|12.0| | | | | | | | | | |`cublasZher2k`| | | | |`hipblasZher2k_v2`|6.0.0| | | | | -|`cublasZher2k_64`|12.0| | | | | | | | | | +|`cublasZher2k_64`|12.0| | | |`hipblasZher2k_v2_64`|6.3.0| | | |6.3.0| |`cublasZher2k_v2`| | | | |`hipblasZher2k_v2`|6.0.0| | | | | -|`cublasZher2k_v2_64`|12.0| | | | | | | | | | +|`cublasZher2k_v2_64`|12.0| | | |`hipblasZher2k_v2_64`|6.3.0| | | |6.3.0| |`cublasZherk`| | | | |`hipblasZherk_v2`|6.0.0| | | | | |`cublasZherk_64`|12.0| | | |`hipblasZherk_v2_64`|6.3.0| | | |6.3.0| |`cublasZherk_v2`| | | | |`hipblasZherk_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 75126a24..abf257c1 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1186,9 +1186,9 @@ |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | |`cublasChemm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasCher2k`| | | | |`hipblasCher2k_v2`|6.0.0| | | | |`rocblas_cher2k`|3.5.0| | | | | -|`cublasCher2k_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCher2k_64`|12.0| | | |`hipblasCher2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_cher2k_64`|6.3.0| | | |6.3.0| |`cublasCher2k_v2`| | | | |`hipblasCher2k_v2`|6.0.0| | | | |`rocblas_cher2k`|3.5.0| | | | | -|`cublasCher2k_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCher2k_v2_64`|12.0| | | |`hipblasCher2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_cher2k_64`|6.3.0| | | |6.3.0| |`cublasCherk`| | | | |`hipblasCherk_v2`|6.0.0| | | | |`rocblas_cherk`|3.5.0| | | | | |`cublasCherk_64`|12.0| | | |`hipblasCherk_v2_64`|6.3.0| | | |6.3.0|`rocblas_cherk_64`|6.3.0| | | |6.3.0| |`cublasCherk_v2`| | | | |`hipblasCherk_v2`|6.0.0| | | | |`rocblas_cherk`|3.5.0| | | | | @@ -1332,9 +1332,9 @@ |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | |`cublasZhemm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZher2k`| | | | |`hipblasZher2k_v2`|6.0.0| | | | |`rocblas_zher2k`|3.5.0| | | | | -|`cublasZher2k_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZher2k_64`|12.0| | | |`hipblasZher2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_zher2k_64`|6.3.0| | | |6.3.0| |`cublasZher2k_v2`| | | | |`hipblasZher2k_v2`|6.0.0| | | | |`rocblas_zher2k`|3.5.0| | | | | -|`cublasZher2k_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZher2k_v2_64`|12.0| | | |`hipblasZher2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_zher2k_64`|6.3.0| | | |6.3.0| |`cublasZherk`| | | | |`hipblasZherk_v2`|6.0.0| | | | |`rocblas_zherk`|3.5.0| | | | | |`cublasZherk_64`|12.0| | | |`hipblasZherk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zherk_64`|6.3.0| | | |6.3.0| |`cublasZherk_v2`| | | | |`hipblasZherk_v2`|6.0.0| | | | |`rocblas_zherk`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 2a476058..2fa9f121 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1186,9 +1186,9 @@ |`cublasChemm_v2`| | | | |`rocblas_chemm`|3.5.0| | | | | |`cublasChemm_v2_64`|12.0| | | | | | | | | | |`cublasCher2k`| | | | |`rocblas_cher2k`|3.5.0| | | | | -|`cublasCher2k_64`|12.0| | | | | | | | | | +|`cublasCher2k_64`|12.0| | | |`rocblas_cher2k_64`|6.3.0| | | |6.3.0| |`cublasCher2k_v2`| | | | |`rocblas_cher2k`|3.5.0| | | | | -|`cublasCher2k_v2_64`|12.0| | | | | | | | | | +|`cublasCher2k_v2_64`|12.0| | | |`rocblas_cher2k_64`|6.3.0| | | |6.3.0| |`cublasCherk`| | | | |`rocblas_cherk`|3.5.0| | | | | |`cublasCherk_64`|12.0| | | |`rocblas_cherk_64`|6.3.0| | | |6.3.0| |`cublasCherk_v2`| | | | |`rocblas_cherk`|3.5.0| | | | | @@ -1332,9 +1332,9 @@ |`cublasZhemm_v2`| | | | |`rocblas_zhemm`|3.5.0| | | | | |`cublasZhemm_v2_64`|12.0| | | | | | | | | | |`cublasZher2k`| | | | |`rocblas_zher2k`|3.5.0| | | | | -|`cublasZher2k_64`|12.0| | | | | | | | | | +|`cublasZher2k_64`|12.0| | | |`rocblas_zher2k_64`|6.3.0| | | |6.3.0| |`cublasZher2k_v2`| | | | |`rocblas_zher2k`|3.5.0| | | | | -|`cublasZher2k_v2_64`|12.0| | | | | | | | | | +|`cublasZher2k_v2_64`|12.0| | | |`rocblas_zher2k_64`|6.3.0| | | |6.3.0| |`cublasZherk`| | | | |`rocblas_zherk`|3.5.0| | | | | |`cublasZherk_64`|12.0| | | |`rocblas_zherk_64`|6.3.0| | | |6.3.0| |`cublasZherk_v2`| | | | |`rocblas_zherk`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 1fc1e134..45de7c59 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -513,9 +513,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HER2K {"cublasCher2k", {"hipblasCher2k_v2", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCher2k_64", {"hipblasCher2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCher2k_64", {"hipblasCher2k_v2_64", "rocblas_cher2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZher2k", {"hipblasZher2k_v2", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZher2k_64", {"hipblasZher2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZher2k_64", {"hipblasZher2k_v2_64", "rocblas_zher2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // HERKX - eXtended HERK {"cublasCherkx", {"hipblasCherkx_v2", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -886,9 +886,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HER2K {"cublasCher2k_v2", {"hipblasCher2k_v2", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCher2k_v2_64", {"hipblasCher2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCher2k_v2_64", {"hipblasCher2k_v2_64", "rocblas_cher2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZher2k_v2", {"hipblasZher2k_v2", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZher2k_v2_64", {"hipblasZher2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZher2k_v2_64", {"hipblasZher2k_v2_64", "rocblas_zher2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // SYMM {"cublasSsymm_v2", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2042,6 +2042,8 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasZherk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCherkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZherkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCher2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZher2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2449,6 +2451,8 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zherk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cherkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zherkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cher2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zher2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 676db71e..65cbb611 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2911,10 +2911,10 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const cuComplex* A, int64_t lda, const float* beta, cuComplex* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherk_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const hipComplex* A, int64_t lda, const float* beta, hipComplex* C, int64_t ldc); - // CHECK: blasStatus = hipblasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); - // CHECK-NEXT: blasStatus = hipblasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); - blasStatus = cublasCherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); - blasStatus = cublasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + // CHECK: blasStatus = hipblasCherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const cuDoubleComplex* A, int64_t lda, const double* beta, cuDoubleComplex* C, int64_t ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherk_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const hipDoubleComplex* AP, int64_t lda, const double* beta, hipDoubleComplex* CP, int64_t ldc); @@ -2932,6 +2932,20 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const double* beta, hipDoubleComplex* CP, int64_t ldc); // CHECK: blasStatus = hipblasZherkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); blasStatus = cublasZherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const float* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher2k_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const float* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + blasStatus = cublasCher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + blasStatus = cublasCher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const double* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher2k_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const double* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 37d12d82..8570864b 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3116,10 +3116,10 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const cuComplex* A, int64_t lda, const float* beta, cuComplex* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const float* alpha, const rocblas_float_complex* A, int64_t lda, const float* beta, rocblas_float_complex* C, int64_t ldc); - // CHECK: blasStatus = rocblas_cherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); - // CHECK-NEXT: blasStatus = rocblas_cherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); - blasStatus = cublasCherk_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); - blasStatus = cublasCherk_v2_64(blasHandle, blasFillMode, blasOperation,n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + // CHECK: blasStatus = rocblas_cherk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_cherk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); + blasStatus = cublasCherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &complexA, lda_64, &fb, &complexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const cuDoubleComplex* A, int64_t lda, const double* beta, cuDoubleComplex* C, int64_t ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const double* alpha, const rocblas_double_complex* A, int64_t lda, const double* beta, rocblas_double_complex* C, int64_t ldc); @@ -3137,6 +3137,20 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const double* beta, rocblas_double_complex* C, int64_t ldc); // CHECK: blasStatus = rocblas_zherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); blasStatus = cublasZherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const float* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher2k_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const float* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_cher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + blasStatus = cublasCher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + blasStatus = cublasCher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const double* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher2k_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const double* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + blasStatus = cublasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); #endif return 0; From dd6de7007ebf60d7be16a3f65d2a44a8e6661016 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 25 Oct 2024 12:46:22 +0100 Subject: [PATCH 22/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 6 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 86 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 43 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 43 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 43 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 86 +++++++++++++++++++ 5 files changed, 301 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 11879b48..f3dbbd17 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12658,7 +12658,33 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_24x256", "CUBLASLT_MATMUL_TILE_24x192", "CUBLASLT_MATMUL_TILE_24x128", + "CUBLASLT_MATMUL_TILE_232x64", + "CUBLASLT_MATMUL_TILE_232x192", + "CUBLASLT_MATMUL_TILE_232x128", + "CUBLASLT_MATMUL_TILE_224x64", + "CUBLASLT_MATMUL_TILE_224x192", + "CUBLASLT_MATMUL_TILE_224x128", + "CUBLASLT_MATMUL_TILE_216x64", + "CUBLASLT_MATMUL_TILE_216x192", + "CUBLASLT_MATMUL_TILE_216x128", + "CUBLASLT_MATMUL_TILE_208x64", + "CUBLASLT_MATMUL_TILE_208x192", + "CUBLASLT_MATMUL_TILE_208x128", + "CUBLASLT_MATMUL_TILE_200x64", + "CUBLASLT_MATMUL_TILE_200x192", + "CUBLASLT_MATMUL_TILE_200x128", + "CUBLASLT_MATMUL_TILE_192x64", + "CUBLASLT_MATMUL_TILE_192x256", + "CUBLASLT_MATMUL_TILE_192x192", "CUBLASLT_MATMUL_TILE_192x128", + "CUBLASLT_MATMUL_TILE_184x64", + "CUBLASLT_MATMUL_TILE_184x256", + "CUBLASLT_MATMUL_TILE_184x192", + "CUBLASLT_MATMUL_TILE_184x128", + "CUBLASLT_MATMUL_TILE_176x64", + "CUBLASLT_MATMUL_TILE_176x256", + "CUBLASLT_MATMUL_TILE_176x192", + "CUBLASLT_MATMUL_TILE_176x128", "CUBLASLT_MATMUL_TILE_16x8", "CUBLASLT_MATMUL_TILE_16x768", "CUBLASLT_MATMUL_TILE_16x704", @@ -12674,7 +12700,24 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_16x192", "CUBLASLT_MATMUL_TILE_16x16", "CUBLASLT_MATMUL_TILE_16x128", + "CUBLASLT_MATMUL_TILE_168x64", + "CUBLASLT_MATMUL_TILE_168x256", + "CUBLASLT_MATMUL_TILE_168x192", + "CUBLASLT_MATMUL_TILE_168x128", + "CUBLASLT_MATMUL_TILE_160x64", + "CUBLASLT_MATMUL_TILE_160x256", + "CUBLASLT_MATMUL_TILE_160x192", "CUBLASLT_MATMUL_TILE_160x128", + "CUBLASLT_MATMUL_TILE_152x64", + "CUBLASLT_MATMUL_TILE_152x320", + "CUBLASLT_MATMUL_TILE_152x256", + "CUBLASLT_MATMUL_TILE_152x192", + "CUBLASLT_MATMUL_TILE_152x128", + "CUBLASLT_MATMUL_TILE_144x64", + "CUBLASLT_MATMUL_TILE_144x320", + "CUBLASLT_MATMUL_TILE_144x256", + "CUBLASLT_MATMUL_TILE_144x192", + "CUBLASLT_MATMUL_TILE_144x128", "CUBLASLT_MATMUL_TILE_136x64", "CUBLASLT_MATMUL_TILE_136x320", "CUBLASLT_MATMUL_TILE_136x256", @@ -14244,7 +14287,33 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_24x256", "CUBLASLT_MATMUL_TILE_24x192", "CUBLASLT_MATMUL_TILE_24x128", + "CUBLASLT_MATMUL_TILE_232x64", + "CUBLASLT_MATMUL_TILE_232x192", + "CUBLASLT_MATMUL_TILE_232x128", + "CUBLASLT_MATMUL_TILE_224x64", + "CUBLASLT_MATMUL_TILE_224x192", + "CUBLASLT_MATMUL_TILE_224x128", + "CUBLASLT_MATMUL_TILE_216x64", + "CUBLASLT_MATMUL_TILE_216x192", + "CUBLASLT_MATMUL_TILE_216x128", + "CUBLASLT_MATMUL_TILE_208x64", + "CUBLASLT_MATMUL_TILE_208x192", + "CUBLASLT_MATMUL_TILE_208x128", + "CUBLASLT_MATMUL_TILE_200x64", + "CUBLASLT_MATMUL_TILE_200x192", + "CUBLASLT_MATMUL_TILE_200x128", + "CUBLASLT_MATMUL_TILE_192x64", + "CUBLASLT_MATMUL_TILE_192x256", + "CUBLASLT_MATMUL_TILE_192x192", "CUBLASLT_MATMUL_TILE_192x128", + "CUBLASLT_MATMUL_TILE_184x64", + "CUBLASLT_MATMUL_TILE_184x256", + "CUBLASLT_MATMUL_TILE_184x192", + "CUBLASLT_MATMUL_TILE_184x128", + "CUBLASLT_MATMUL_TILE_176x64", + "CUBLASLT_MATMUL_TILE_176x256", + "CUBLASLT_MATMUL_TILE_176x192", + "CUBLASLT_MATMUL_TILE_176x128", "CUBLASLT_MATMUL_TILE_16x8", "CUBLASLT_MATMUL_TILE_16x768", "CUBLASLT_MATMUL_TILE_16x704", @@ -14260,7 +14329,24 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_16x192", "CUBLASLT_MATMUL_TILE_16x16", "CUBLASLT_MATMUL_TILE_16x128", + "CUBLASLT_MATMUL_TILE_168x64", + "CUBLASLT_MATMUL_TILE_168x256", + "CUBLASLT_MATMUL_TILE_168x192", + "CUBLASLT_MATMUL_TILE_168x128", + "CUBLASLT_MATMUL_TILE_160x64", + "CUBLASLT_MATMUL_TILE_160x256", + "CUBLASLT_MATMUL_TILE_160x192", "CUBLASLT_MATMUL_TILE_160x128", + "CUBLASLT_MATMUL_TILE_152x64", + "CUBLASLT_MATMUL_TILE_152x320", + "CUBLASLT_MATMUL_TILE_152x256", + "CUBLASLT_MATMUL_TILE_152x192", + "CUBLASLT_MATMUL_TILE_152x128", + "CUBLASLT_MATMUL_TILE_144x64", + "CUBLASLT_MATMUL_TILE_144x320", + "CUBLASLT_MATMUL_TILE_144x256", + "CUBLASLT_MATMUL_TILE_144x192", + "CUBLASLT_MATMUL_TILE_144x128", "CUBLASLT_MATMUL_TILE_136x64", "CUBLASLT_MATMUL_TILE_136x320", "CUBLASLT_MATMUL_TILE_136x256", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index f877723f..7ec4909a 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -355,7 +355,24 @@ |`CUBLASLT_MATMUL_TILE_136x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x320`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x192`|12.6| | | | | | | | | | @@ -371,7 +388,33 @@ |`CUBLASLT_MATMUL_TILE_16x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index abf257c1..7f77207c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -355,7 +355,24 @@ |`CUBLASLT_MATMUL_TILE_136x256`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x320`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x192`|12.6| | | | | | | | | | | | | | | | @@ -371,7 +388,33 @@ |`CUBLASLT_MATMUL_TILE_16x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x8`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 2fa9f121..7df319b4 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -355,7 +355,24 @@ |`CUBLASLT_MATMUL_TILE_136x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x320`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_144x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_152x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_160x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_160x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_168x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x192`|12.6| | | | | | | | | | @@ -371,7 +388,33 @@ |`CUBLASLT_MATMUL_TILE_16x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_16x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_176x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_184x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_200x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_208x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_216x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_224x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_232x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 05cb227f..ec73b52c 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -386,6 +386,49 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_136x192", {"HIPBLASLT_MATMUL_TILE_136x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_136x256", {"HIPBLASLT_MATMUL_TILE_136x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_136x320", {"HIPBLASLT_MATMUL_TILE_136x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x64", {"CUBLASLT_MATMUL_TILE_144x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x128", {"CUBLASLT_MATMUL_TILE_144x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x192", {"CUBLASLT_MATMUL_TILE_144x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x256", {"CUBLASLT_MATMUL_TILE_144x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x320", {"CUBLASLT_MATMUL_TILE_144x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x64", {"CUBLASLT_MATMUL_TILE_152x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x128", {"CUBLASLT_MATMUL_TILE_152x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x192", {"CUBLASLT_MATMUL_TILE_152x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x256", {"CUBLASLT_MATMUL_TILE_152x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x320", {"CUBLASLT_MATMUL_TILE_152x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_160x64", {"CUBLASLT_MATMUL_TILE_160x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_160x192", {"CUBLASLT_MATMUL_TILE_160x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_160x256", {"CUBLASLT_MATMUL_TILE_160x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x64", {"CUBLASLT_MATMUL_TILE_168x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x128", {"CUBLASLT_MATMUL_TILE_168x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x192", {"CUBLASLT_MATMUL_TILE_168x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x256", {"CUBLASLT_MATMUL_TILE_168x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x64", {"CUBLASLT_MATMUL_TILE_176x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x128", {"CUBLASLT_MATMUL_TILE_176x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x192", {"CUBLASLT_MATMUL_TILE_176x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x256", {"CUBLASLT_MATMUL_TILE_176x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x64", {"CUBLASLT_MATMUL_TILE_184x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x128", {"CUBLASLT_MATMUL_TILE_184x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x192", {"CUBLASLT_MATMUL_TILE_184x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x256", {"CUBLASLT_MATMUL_TILE_184x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x64", {"CUBLASLT_MATMUL_TILE_192x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x192", {"CUBLASLT_MATMUL_TILE_192x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x256", {"CUBLASLT_MATMUL_TILE_192x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_200x64", {"CUBLASLT_MATMUL_TILE_200x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_200x128", {"CUBLASLT_MATMUL_TILE_200x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_200x192", {"CUBLASLT_MATMUL_TILE_200x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_208x64", {"CUBLASLT_MATMUL_TILE_208x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_208x128", {"CUBLASLT_MATMUL_TILE_208x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_208x192", {"CUBLASLT_MATMUL_TILE_208x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_216x64", {"CUBLASLT_MATMUL_TILE_216x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_216x128", {"CUBLASLT_MATMUL_TILE_216x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_216x192", {"CUBLASLT_MATMUL_TILE_216x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_224x64", {"CUBLASLT_MATMUL_TILE_224x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_224x128", {"CUBLASLT_MATMUL_TILE_224x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_224x192", {"CUBLASLT_MATMUL_TILE_224x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_232x64", {"CUBLASLT_MATMUL_TILE_232x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_232x128", {"CUBLASLT_MATMUL_TILE_232x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_232x192", {"CUBLASLT_MATMUL_TILE_232x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1189,6 +1232,49 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_136x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_136x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_136x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_144x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_144x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_144x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_144x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_144x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_152x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_152x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_152x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_152x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_152x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_160x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_160x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_160x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_168x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_168x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_168x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_168x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_176x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_176x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_176x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_176x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_184x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_184x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_184x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_184x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_200x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_200x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_200x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_208x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_208x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_208x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_216x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_216x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_216x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_224x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_224x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_224x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_232x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_232x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_232x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From c2341762c4cf70c8acdb30060e77428b9c9f3618 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 25 Oct 2024 13:47:44 +0100 Subject: [PATCH 23/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 6 + `rocblas_(s|d|c|z)symm_64` and `hipblas(S|D|C|Z)symm(_v2)?_v2_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 32 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 24 +++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 ++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 ++++++++++++++++ 7 files changed, 112 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index f3dbbd17..78c3fb34 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1691,7 +1691,9 @@ sub rocSubstitutions { subst("cublasCswap_v2", "rocblas_cswap", "library"); subst("cublasCswap_v2_64", "rocblas_cswap_64", "library"); subst("cublasCsymm", "rocblas_csymm", "library"); + subst("cublasCsymm_64", "rocblas_csymm_64", "library"); subst("cublasCsymm_v2", "rocblas_csymm", "library"); + subst("cublasCsymm_v2_64", "rocblas_csymm_64", "library"); subst("cublasCsymv", "rocblas_csymv", "library"); subst("cublasCsymv_64", "rocblas_csymv_64", "library"); subst("cublasCsymv_v2", "rocblas_csymv", "library"); @@ -1830,7 +1832,9 @@ sub rocSubstitutions { subst("cublasDswap_v2", "rocblas_dswap", "library"); subst("cublasDswap_v2_64", "rocblas_dswap_64", "library"); subst("cublasDsymm", "rocblas_dsymm", "library"); + subst("cublasDsymm_64", "rocblas_dsymm_64", "library"); subst("cublasDsymm_v2", "rocblas_dsymm", "library"); + subst("cublasDsymm_v2_64", "rocblas_dsymm_64", "library"); subst("cublasDsymv", "rocblas_dsymv", "library"); subst("cublasDsymv_64", "rocblas_dsymv_64", "library"); subst("cublasDsymv_v2", "rocblas_dsymv", "library"); @@ -2056,7 +2060,9 @@ sub rocSubstitutions { subst("cublasSswap_v2", "rocblas_sswap", "library"); subst("cublasSswap_v2_64", "rocblas_sswap_64", "library"); subst("cublasSsymm", "rocblas_ssymm", "library"); + subst("cublasSsymm_64", "rocblas_ssymm_64", "library"); subst("cublasSsymm_v2", "rocblas_ssymm", "library"); + subst("cublasSsymm_v2_64", "rocblas_ssymm_64", "library"); subst("cublasSsymv", "rocblas_ssymv", "library"); subst("cublasSsymv_64", "rocblas_ssymv_64", "library"); subst("cublasSsymv_v2", "rocblas_ssymv", "library"); @@ -2223,7 +2229,9 @@ sub rocSubstitutions { subst("cublasZswap_v2", "rocblas_zswap", "library"); subst("cublasZswap_v2_64", "rocblas_zswap_64", "library"); subst("cublasZsymm", "rocblas_zsymm", "library"); + subst("cublasZsymm_64", "rocblas_zsymm_64", "library"); subst("cublasZsymm_v2", "rocblas_zsymm", "library"); + subst("cublasZsymm_v2_64", "rocblas_zsymm_64", "library"); subst("cublasZsymv", "rocblas_zsymv", "library"); subst("cublasZsymv_64", "rocblas_zsymv_64", "library"); subst("cublasZsymv_v2", "rocblas_zsymv", "library"); @@ -4445,7 +4453,9 @@ sub simpleSubstitutions { subst("cublasCswap_v2", "hipblasCswap_v2", "library"); subst("cublasCswap_v2_64", "hipblasCswap_v2_64", "library"); subst("cublasCsymm", "hipblasCsymm_v2", "library"); + subst("cublasCsymm_64", "hipblasCsymm_v2_64", "library"); subst("cublasCsymm_v2", "hipblasCsymm_v2", "library"); + subst("cublasCsymm_v2_64", "hipblasCsymm_v2_64", "library"); subst("cublasCsymv", "hipblasCsymv_v2", "library"); subst("cublasCsymv_64", "hipblasCsymv_v2_64", "library"); subst("cublasCsymv_v2", "hipblasCsymv_v2", "library"); @@ -4585,7 +4595,9 @@ sub simpleSubstitutions { subst("cublasDswap_v2", "hipblasDswap", "library"); subst("cublasDswap_v2_64", "hipblasDswap_64", "library"); subst("cublasDsymm", "hipblasDsymm", "library"); + subst("cublasDsymm_64", "hipblasDsymm_64", "library"); subst("cublasDsymm_v2", "hipblasDsymm", "library"); + subst("cublasDsymm_v2_64", "hipblasDsymm_64", "library"); subst("cublasDsymv", "hipblasDsymv", "library"); subst("cublasDsymv_64", "hipblasDsymv_64", "library"); subst("cublasDsymv_v2", "hipblasDsymv", "library"); @@ -4822,7 +4834,9 @@ sub simpleSubstitutions { subst("cublasSswap_v2", "hipblasSswap", "library"); subst("cublasSswap_v2_64", "hipblasSswap_64", "library"); subst("cublasSsymm", "hipblasSsymm", "library"); + subst("cublasSsymm_64", "hipblasSsymm_64", "library"); subst("cublasSsymm_v2", "hipblasSsymm", "library"); + subst("cublasSsymm_v2_64", "hipblasSsymm_64", "library"); subst("cublasSsymv", "hipblasSsymv", "library"); subst("cublasSsymv_64", "hipblasSsymv_64", "library"); subst("cublasSsymv_v2", "hipblasSsymv", "library"); @@ -4982,7 +4996,9 @@ sub simpleSubstitutions { subst("cublasZswap_v2", "hipblasZswap_v2", "library"); subst("cublasZswap_v2_64", "hipblasZswap_v2_64", "library"); subst("cublasZsymm", "hipblasZsymm_v2", "library"); + subst("cublasZsymm_64", "hipblasZsymm_v2_64", "library"); subst("cublasZsymm_v2", "hipblasZsymm_v2", "library"); + subst("cublasZsymm_v2_64", "hipblasZsymm_v2_64", "library"); subst("cublasZsymv", "hipblasZsymv_v2", "library"); subst("cublasZsymv_64", "hipblasZsymv_v2_64", "library"); subst("cublasZsymv_v2", "hipblasZsymv_v2", "library"); @@ -11568,8 +11584,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZsyrk_64", "cublasZsyr2k_v2_64", "cublasZsyr2k_64", - "cublasZsymm_v2_64", - "cublasZsymm_64", "cublasZmatinvBatched", "cublasZhemm_v2_64", "cublasZhemm_64", @@ -11601,8 +11615,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSsyrk_64", "cublasSsyr2k_v2_64", "cublasSsyr2k_64", - "cublasSsymm_v2_64", - "cublasSsymm_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgemmGroupedBatched_64", @@ -11705,8 +11717,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDsyrk_64", "cublasDsyr2k_v2_64", "cublasDsyr2k_64", - "cublasDsymm_v2_64", - "cublasDsymm_64", "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -11728,8 +11738,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCsyrk3mEx", "cublasCsyr2k_v2_64", "cublasCsyr2k_64", - "cublasCsymm_v2_64", - "cublasCsymm_64", "cublasCopyEx_64", "cublasCopyEx", "cublasContext", @@ -13429,8 +13437,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZsyrk_64", "cublasZsyr2k_v2_64", "cublasZsyr2k_64", - "cublasZsymm_v2_64", - "cublasZsymm_64", "cublasZmatinvBatched", "cublasZhemm_v2_64", "cublasZhemm_64", @@ -13456,8 +13462,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSsyrk_64", "cublasSsyr2k_v2_64", "cublasSsyr2k_64", - "cublasSsymm_v2_64", - "cublasSsymm_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -13579,8 +13583,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDsyrk_64", "cublasDsyr2k_v2_64", "cublasDsyr2k_64", - "cublasDsymm_v2_64", - "cublasDsymm_64", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", @@ -13604,8 +13606,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCsyrk3mEx", "cublasCsyr2k_v2_64", "cublasCsyr2k_64", - "cublasCsymm_v2_64", - "cublasCsymm_64", "cublasCopyEx_64", "cublasCopyEx", "cublasCmatinvBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 7ec4909a..d3b89b71 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1239,9 +1239,9 @@ |`cublasCherkx`| | | | |`hipblasCherkx_v2`|6.0.0| | | | | |`cublasCherkx_64`|12.0| | | |`hipblasCherkx_v2_64`|6.3.0| | | |6.3.0| |`cublasCsymm`| | | | |`hipblasCsymm_v2`|6.0.0| | | | | -|`cublasCsymm_64`|12.0| | | | | | | | | | +|`cublasCsymm_64`|12.0| | | |`hipblasCsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasCsymm_v2`| | | | |`hipblasCsymm_v2`|6.0.0| | | | | -|`cublasCsymm_v2_64`|12.0| | | | | | | | | | +|`cublasCsymm_v2_64`|12.0| | | |`hipblasCsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | | |`cublasCsyr2k_64`|12.0| | | | | | | | | | |`cublasCsyr2k_v2`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | | @@ -1275,9 +1275,9 @@ |`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | | |`cublasDgemvStridedBatched_64`|12.0| | | |`hipblasDgemvStridedBatched_64`|6.2.0| | | | | |`cublasDsymm`| | | | |`hipblasDsymm`|3.6.0| | | | | -|`cublasDsymm_64`|12.0| | | | | | | | | | +|`cublasDsymm_64`|12.0| | | |`hipblasDsymm_64`|6.3.0| | | |6.3.0| |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | | -|`cublasDsymm_v2_64`|12.0| | | | | | | | | | +|`cublasDsymm_v2_64`|12.0| | | |`hipblasDsymm_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k`| | | | |`hipblasDsyr2k`|3.5.0| | | | | |`cublasDsyr2k_64`|12.0| | | | | | | | | | |`cublasDsyr2k_v2`| | | | |`hipblasDsyr2k`|3.5.0| | | | | @@ -1327,9 +1327,9 @@ |`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | | |`cublasSgemvStridedBatched_64`|12.0| | | |`hipblasSgemvStridedBatched_64`|6.2.0| | | | | |`cublasSsymm`| | | | |`hipblasSsymm`|3.6.0| | | | | -|`cublasSsymm_64`|12.0| | | | | | | | | | +|`cublasSsymm_64`|12.0| | | |`hipblasSsymm_64`|6.3.0| | | |6.3.0| |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | | -|`cublasSsymm_v2_64`|12.0| | | | | | | | | | +|`cublasSsymm_v2_64`|12.0| | | |`hipblasSsymm_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k`| | | | |`hipblasSsyr2k`|3.5.0| | | | | |`cublasSsyr2k_64`|12.0| | | | | | | | | | |`cublasSsyr2k_v2`| | | | |`hipblasSsyr2k`|3.5.0| | | | | @@ -1385,9 +1385,9 @@ |`cublasZherkx`| | | | |`hipblasZherkx_v2`|6.0.0| | | | | |`cublasZherkx_64`|12.0| | | |`hipblasZherkx_v2_64`|6.3.0| | | |6.3.0| |`cublasZsymm`| | | | |`hipblasZsymm_v2`|6.0.0| | | | | -|`cublasZsymm_64`|12.0| | | | | | | | | | +|`cublasZsymm_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | | -|`cublasZsymm_v2_64`|12.0| | | | | | | | | | +|`cublasZsymm_v2_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | |`cublasZsyr2k_64`|12.0| | | | | | | | | | |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 7f77207c..7e323ec4 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1239,9 +1239,9 @@ |`cublasCherkx`| | | | |`hipblasCherkx_v2`|6.0.0| | | | |`rocblas_cherkx`|3.5.0| | | | | |`cublasCherkx_64`|12.0| | | |`hipblasCherkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_cherkx_64`|6.3.0| | | |6.3.0| |`cublasCsymm`| | | | |`hipblasCsymm_v2`|6.0.0| | | | |`rocblas_csymm`|3.5.0| | | | | -|`cublasCsymm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsymm_64`|12.0| | | |`hipblasCsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_csymm_64`|6.3.0| | | |6.3.0| |`cublasCsymm_v2`| | | | |`hipblasCsymm_v2`|6.0.0| | | | |`rocblas_csymm`|3.5.0| | | | | -|`cublasCsymm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsymm_v2_64`|12.0| | | |`hipblasCsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_csymm_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | |`rocblas_csyr2k`|3.5.0| | | | | |`cublasCsyr2k_64`|12.0| | | | | | | | | | | | | | | | |`cublasCsyr2k_v2`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | |`rocblas_csyr2k`|3.5.0| | | | | @@ -1275,9 +1275,9 @@ |`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | |`cublasDgemvStridedBatched_64`|12.0| | | |`hipblasDgemvStridedBatched_64`|6.2.0| | | | |`rocblas_dgemv_strided_batched_64`|6.2.0| | | | | |`cublasDsymm`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | -|`cublasDsymm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsymm_64`|12.0| | | |`hipblasDsymm_64`|6.3.0| | | |6.3.0|`rocblas_dsymm_64`|6.3.0| | | |6.3.0| |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | -|`cublasDsymm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsymm_v2_64`|12.0| | | |`hipblasDsymm_64`|6.3.0| | | |6.3.0|`rocblas_dsymm_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k`| | | | |`hipblasDsyr2k`|3.5.0| | | | |`rocblas_dsyr2k`|3.5.0| | | | | |`cublasDsyr2k_64`|12.0| | | | | | | | | | | | | | | | |`cublasDsyr2k_v2`| | | | |`hipblasDsyr2k`|3.5.0| | | | |`rocblas_dsyr2k`|3.5.0| | | | | @@ -1327,9 +1327,9 @@ |`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | |`cublasSgemvStridedBatched_64`|12.0| | | |`hipblasSgemvStridedBatched_64`|6.2.0| | | | |`rocblas_sgemv_strided_batched_64`|6.2.0| | | | | |`cublasSsymm`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | -|`cublasSsymm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsymm_64`|12.0| | | |`hipblasSsymm_64`|6.3.0| | | |6.3.0|`rocblas_ssymm_64`|6.3.0| | | |6.3.0| |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | -|`cublasSsymm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsymm_v2_64`|12.0| | | |`hipblasSsymm_64`|6.3.0| | | |6.3.0|`rocblas_ssymm_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k`| | | | |`hipblasSsyr2k`|3.5.0| | | | |`rocblas_ssyr2k`|3.5.0| | | | | |`cublasSsyr2k_64`|12.0| | | | | | | | | | | | | | | | |`cublasSsyr2k_v2`| | | | |`hipblasSsyr2k`|3.5.0| | | | |`rocblas_ssyr2k`|3.5.0| | | | | @@ -1385,9 +1385,9 @@ |`cublasZherkx`| | | | |`hipblasZherkx_v2`|6.0.0| | | | |`rocblas_zherkx`|3.5.0| | | | | |`cublasZherkx_64`|12.0| | | |`hipblasZherkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_zherkx_64`|6.3.0| | | |6.3.0| |`cublasZsymm`| | | | |`hipblasZsymm_v2`|6.0.0| | | | |`rocblas_zsymm`|3.5.0| | | | | -|`cublasZsymm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsymm_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | |`rocblas_zsymm`|3.5.0| | | | | -|`cublasZsymm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsymm_v2_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | |`cublasZsyr2k_64`|12.0| | | | | | | | | | | | | | | | |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 7df319b4..8bdde27c 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1239,9 +1239,9 @@ |`cublasCherkx`| | | | |`rocblas_cherkx`|3.5.0| | | | | |`cublasCherkx_64`|12.0| | | |`rocblas_cherkx_64`|6.3.0| | | |6.3.0| |`cublasCsymm`| | | | |`rocblas_csymm`|3.5.0| | | | | -|`cublasCsymm_64`|12.0| | | | | | | | | | +|`cublasCsymm_64`|12.0| | | |`rocblas_csymm_64`|6.3.0| | | |6.3.0| |`cublasCsymm_v2`| | | | |`rocblas_csymm`|3.5.0| | | | | -|`cublasCsymm_v2_64`|12.0| | | | | | | | | | +|`cublasCsymm_v2_64`|12.0| | | |`rocblas_csymm_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k`| | | | |`rocblas_csyr2k`|3.5.0| | | | | |`cublasCsyr2k_64`|12.0| | | | | | | | | | |`cublasCsyr2k_v2`| | | | |`rocblas_csyr2k`|3.5.0| | | | | @@ -1275,9 +1275,9 @@ |`cublasDgemvStridedBatched`|11.6| | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | |`cublasDgemvStridedBatched_64`|12.0| | | |`rocblas_dgemv_strided_batched_64`|6.2.0| | | | | |`cublasDsymm`| | | | |`rocblas_dsymm`|3.5.0| | | | | -|`cublasDsymm_64`|12.0| | | | | | | | | | +|`cublasDsymm_64`|12.0| | | |`rocblas_dsymm_64`|6.3.0| | | |6.3.0| |`cublasDsymm_v2`| | | | |`rocblas_dsymm`|3.5.0| | | | | -|`cublasDsymm_v2_64`|12.0| | | | | | | | | | +|`cublasDsymm_v2_64`|12.0| | | |`rocblas_dsymm_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k`| | | | |`rocblas_dsyr2k`|3.5.0| | | | | |`cublasDsyr2k_64`|12.0| | | | | | | | | | |`cublasDsyr2k_v2`| | | | |`rocblas_dsyr2k`|3.5.0| | | | | @@ -1327,9 +1327,9 @@ |`cublasSgemvStridedBatched`|11.6| | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | |`cublasSgemvStridedBatched_64`|12.0| | | |`rocblas_sgemv_strided_batched_64`|6.2.0| | | | | |`cublasSsymm`| | | | |`rocblas_ssymm`|3.5.0| | | | | -|`cublasSsymm_64`|12.0| | | | | | | | | | +|`cublasSsymm_64`|12.0| | | |`rocblas_ssymm_64`|6.3.0| | | |6.3.0| |`cublasSsymm_v2`| | | | |`rocblas_ssymm`|3.5.0| | | | | -|`cublasSsymm_v2_64`|12.0| | | | | | | | | | +|`cublasSsymm_v2_64`|12.0| | | |`rocblas_ssymm_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k`| | | | |`rocblas_ssyr2k`|3.5.0| | | | | |`cublasSsyr2k_64`|12.0| | | | | | | | | | |`cublasSsyr2k_v2`| | | | |`rocblas_ssyr2k`|3.5.0| | | | | @@ -1385,9 +1385,9 @@ |`cublasZherkx`| | | | |`rocblas_zherkx`|3.5.0| | | | | |`cublasZherkx_64`|12.0| | | |`rocblas_zherkx_64`|6.3.0| | | |6.3.0| |`cublasZsymm`| | | | |`rocblas_zsymm`|3.5.0| | | | | -|`cublasZsymm_64`|12.0| | | | | | | | | | +|`cublasZsymm_64`|12.0| | | |`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsymm_v2`| | | | |`rocblas_zsymm`|3.5.0| | | | | -|`cublasZsymm_v2_64`|12.0| | | | | | | | | | +|`cublasZsymm_v2_64`|12.0| | | |`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`rocblas_zsyr2k`|3.5.0| | | | | |`cublasZsyr2k_64`|12.0| | | | | | | | | | |`cublasZsyr2k_v2`| | | | |`rocblas_zsyr2k`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 45de7c59..378e1c21 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -525,13 +525,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYMM {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsymm_64", {"hipblasSsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsymm_64", {"hipblasSsymm_64", "rocblas_ssymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsymm_64", {"hipblasDsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsymm_64", {"hipblasDsymm_64", "rocblas_dsymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsymm", {"hipblasCsymm_v2", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsymm_64", {"hipblasCsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsymm_64", {"hipblasCsymm_v2_64", "rocblas_csymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsymm", {"hipblasZsymm_v2", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsymm_64", {"hipblasZsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsymm_64", {"hipblasZsymm_v2_64", "rocblas_zsymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // HEMM {"cublasChemm", {"hipblasChemm_v2", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -892,13 +892,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYMM {"cublasSsymm_v2", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSsymm_v2_64", {"hipblasSsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsymm_v2_64", {"hipblasSsymm_64", "rocblas_ssymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsymm_v2", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDsymm_v2_64", {"hipblasDsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsymm_v2_64", {"hipblasDsymm_64", "rocblas_dsymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsymm_v2", {"hipblasCsymm_v2", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCsymm_v2_64", {"hipblasCsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsymm_v2_64", {"hipblasCsymm_v2_64", "rocblas_csymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsymm_v2", {"hipblasZsymm_v2", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZsymm_v2_64", {"hipblasZsymm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsymm_v2_64", {"hipblasZsymm_v2_64", "rocblas_zsymm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // HEMM {"cublasChemm_v2", {"hipblasChemm_v2", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2044,6 +2044,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasZherkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCher2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZher2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsymm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsymm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2453,6 +2457,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zherkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cher2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zher2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ssymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_csymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 65cbb611..cf2188bd 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2946,6 +2946,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); blasStatus = cublasZher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); blasStatus = cublasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsymm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, const float* BP, int64_t ldb, const float* beta, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasSsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsymm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, const double* BP, int64_t ldb, const double* beta, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasDsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsymm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsymm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 8570864b..103d4661 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3151,6 +3151,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); blasStatus = cublasZher2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); blasStatus = cublasZher2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &db, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssymm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ssymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ssymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsymm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_dsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csymm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_csymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_csymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsymm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; From f90bc293361c79d74d5de71d2f32d0482de82dd8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 25 Oct 2024 17:43:59 +0100 Subject: [PATCH 24/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 7 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 80 +++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 40 +++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 40 +++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 40 +++++ src/CUDA2HIP_BLAS_API_types.cpp | 166 +++++++++++++----- 5 files changed, 323 insertions(+), 43 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 78c3fb34..cd7e8dfa 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12636,6 +12636,21 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_40x256", "CUBLASLT_MATMUL_TILE_40x192", "CUBLASLT_MATMUL_TILE_40x128", + "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x64", + "CUBLASLT_MATMUL_TILE_384x128", + "CUBLASLT_MATMUL_TILE_376x64", + "CUBLASLT_MATMUL_TILE_376x128", + "CUBLASLT_MATMUL_TILE_368x64", + "CUBLASLT_MATMUL_TILE_368x128", + "CUBLASLT_MATMUL_TILE_360x64", + "CUBLASLT_MATMUL_TILE_360x128", + "CUBLASLT_MATMUL_TILE_352x64", + "CUBLASLT_MATMUL_TILE_352x128", + "CUBLASLT_MATMUL_TILE_344x64", + "CUBLASLT_MATMUL_TILE_344x128", + "CUBLASLT_MATMUL_TILE_336x64", + "CUBLASLT_MATMUL_TILE_336x128", "CUBLASLT_MATMUL_TILE_32x8", "CUBLASLT_MATMUL_TILE_32x768", "CUBLASLT_MATMUL_TILE_32x704", @@ -12651,8 +12666,27 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_32x192", "CUBLASLT_MATMUL_TILE_32x16", "CUBLASLT_MATMUL_TILE_32x128", + "CUBLASLT_MATMUL_TILE_328x64", + "CUBLASLT_MATMUL_TILE_328x128", + "CUBLASLT_MATMUL_TILE_320x64", + "CUBLASLT_MATMUL_TILE_320x128", + "CUBLASLT_MATMUL_TILE_312x64", + "CUBLASLT_MATMUL_TILE_312x128", + "CUBLASLT_MATMUL_TILE_304x64", + "CUBLASLT_MATMUL_TILE_304x128", + "CUBLASLT_MATMUL_TILE_296x64", + "CUBLASLT_MATMUL_TILE_296x128", + "CUBLASLT_MATMUL_TILE_288x64", + "CUBLASLT_MATMUL_TILE_288x128", + "CUBLASLT_MATMUL_TILE_280x64", + "CUBLASLT_MATMUL_TILE_280x128", + "CUBLASLT_MATMUL_TILE_272x64", + "CUBLASLT_MATMUL_TILE_272x128", + "CUBLASLT_MATMUL_TILE_264x64", + "CUBLASLT_MATMUL_TILE_264x128", "CUBLASLT_MATMUL_TILE_256x64", "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x192", "CUBLASLT_MATMUL_TILE_256x128", "CUBLASLT_MATMUL_TILE_24x768", "CUBLASLT_MATMUL_TILE_24x704", @@ -12666,6 +12700,12 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_24x256", "CUBLASLT_MATMUL_TILE_24x192", "CUBLASLT_MATMUL_TILE_24x128", + "CUBLASLT_MATMUL_TILE_248x64", + "CUBLASLT_MATMUL_TILE_248x192", + "CUBLASLT_MATMUL_TILE_248x128", + "CUBLASLT_MATMUL_TILE_240x64", + "CUBLASLT_MATMUL_TILE_240x192", + "CUBLASLT_MATMUL_TILE_240x128", "CUBLASLT_MATMUL_TILE_232x64", "CUBLASLT_MATMUL_TILE_232x192", "CUBLASLT_MATMUL_TILE_232x128", @@ -14257,6 +14297,21 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_40x256", "CUBLASLT_MATMUL_TILE_40x192", "CUBLASLT_MATMUL_TILE_40x128", + "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x64", + "CUBLASLT_MATMUL_TILE_384x128", + "CUBLASLT_MATMUL_TILE_376x64", + "CUBLASLT_MATMUL_TILE_376x128", + "CUBLASLT_MATMUL_TILE_368x64", + "CUBLASLT_MATMUL_TILE_368x128", + "CUBLASLT_MATMUL_TILE_360x64", + "CUBLASLT_MATMUL_TILE_360x128", + "CUBLASLT_MATMUL_TILE_352x64", + "CUBLASLT_MATMUL_TILE_352x128", + "CUBLASLT_MATMUL_TILE_344x64", + "CUBLASLT_MATMUL_TILE_344x128", + "CUBLASLT_MATMUL_TILE_336x64", + "CUBLASLT_MATMUL_TILE_336x128", "CUBLASLT_MATMUL_TILE_32x8", "CUBLASLT_MATMUL_TILE_32x768", "CUBLASLT_MATMUL_TILE_32x704", @@ -14272,8 +14327,27 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_32x192", "CUBLASLT_MATMUL_TILE_32x16", "CUBLASLT_MATMUL_TILE_32x128", + "CUBLASLT_MATMUL_TILE_328x64", + "CUBLASLT_MATMUL_TILE_328x128", + "CUBLASLT_MATMUL_TILE_320x64", + "CUBLASLT_MATMUL_TILE_320x128", + "CUBLASLT_MATMUL_TILE_312x64", + "CUBLASLT_MATMUL_TILE_312x128", + "CUBLASLT_MATMUL_TILE_304x64", + "CUBLASLT_MATMUL_TILE_304x128", + "CUBLASLT_MATMUL_TILE_296x64", + "CUBLASLT_MATMUL_TILE_296x128", + "CUBLASLT_MATMUL_TILE_288x64", + "CUBLASLT_MATMUL_TILE_288x128", + "CUBLASLT_MATMUL_TILE_280x64", + "CUBLASLT_MATMUL_TILE_280x128", + "CUBLASLT_MATMUL_TILE_272x64", + "CUBLASLT_MATMUL_TILE_272x128", + "CUBLASLT_MATMUL_TILE_264x64", + "CUBLASLT_MATMUL_TILE_264x128", "CUBLASLT_MATMUL_TILE_256x64", "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x192", "CUBLASLT_MATMUL_TILE_256x128", "CUBLASLT_MATMUL_TILE_24x768", "CUBLASLT_MATMUL_TILE_24x704", @@ -14287,6 +14361,12 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_24x256", "CUBLASLT_MATMUL_TILE_24x192", "CUBLASLT_MATMUL_TILE_24x128", + "CUBLASLT_MATMUL_TILE_248x64", + "CUBLASLT_MATMUL_TILE_248x192", + "CUBLASLT_MATMUL_TILE_248x128", + "CUBLASLT_MATMUL_TILE_240x64", + "CUBLASLT_MATMUL_TILE_240x192", + "CUBLASLT_MATMUL_TILE_240x128", "CUBLASLT_MATMUL_TILE_232x64", "CUBLASLT_MATMUL_TILE_232x192", "CUBLASLT_MATMUL_TILE_232x128", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index d3b89b71..abdbdc41 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -415,6 +415,12 @@ |`CUBLASLT_MATMUL_TILE_232x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_232x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_232x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | @@ -428,8 +434,27 @@ |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_264x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_264x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_272x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_272x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_280x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_280x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_288x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_288x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_296x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_296x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_304x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_304x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_312x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_312x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_328x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_328x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x192`|12.6| | | | | | | | | | @@ -445,6 +470,21 @@ |`CUBLASLT_MATMUL_TILE_32x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_336x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_336x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_344x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_344x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_352x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_352x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_360x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_360x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_368x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_368x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_376x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 7e323ec4..cec7369c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -415,6 +415,12 @@ |`CUBLASLT_MATMUL_TILE_232x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_232x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_232x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | | | | | | | @@ -428,8 +434,27 @@ |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_264x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_264x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_272x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_272x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_280x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_280x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_288x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_288x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_296x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_296x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_304x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_304x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_312x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_312x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_328x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_328x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x16`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x192`|12.6| | | | | | | | | | | | | | | | @@ -445,6 +470,21 @@ |`CUBLASLT_MATMUL_TILE_32x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x8`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_336x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_336x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_344x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_344x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_352x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_352x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_360x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_360x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_368x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_368x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_376x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 8bdde27c..27a4b28e 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -415,6 +415,12 @@ |`CUBLASLT_MATMUL_TILE_232x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_232x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_232x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_240x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_248x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x256`|12.6| | | | | | | | | | @@ -428,8 +434,27 @@ |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_264x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_264x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_272x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_272x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_280x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_280x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_288x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_288x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_296x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_296x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_304x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_304x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_312x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_312x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_328x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_328x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x16`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x192`|12.6| | | | | | | | | | @@ -445,6 +470,21 @@ |`CUBLASLT_MATMUL_TILE_32x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_336x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_336x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_344x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_344x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_352x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_352x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_360x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_360x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_368x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_368x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_376x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index ec73b52c..747a27c0 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -386,49 +386,89 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_136x192", {"HIPBLASLT_MATMUL_TILE_136x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_136x256", {"HIPBLASLT_MATMUL_TILE_136x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_136x320", {"HIPBLASLT_MATMUL_TILE_136x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_144x64", {"CUBLASLT_MATMUL_TILE_144x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_144x128", {"CUBLASLT_MATMUL_TILE_144x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_144x192", {"CUBLASLT_MATMUL_TILE_144x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_144x256", {"CUBLASLT_MATMUL_TILE_144x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_144x320", {"CUBLASLT_MATMUL_TILE_144x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_152x64", {"CUBLASLT_MATMUL_TILE_152x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_152x128", {"CUBLASLT_MATMUL_TILE_152x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_152x192", {"CUBLASLT_MATMUL_TILE_152x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_152x256", {"CUBLASLT_MATMUL_TILE_152x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_152x320", {"CUBLASLT_MATMUL_TILE_152x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_160x64", {"CUBLASLT_MATMUL_TILE_160x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_160x192", {"CUBLASLT_MATMUL_TILE_160x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_160x256", {"CUBLASLT_MATMUL_TILE_160x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_168x64", {"CUBLASLT_MATMUL_TILE_168x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_168x128", {"CUBLASLT_MATMUL_TILE_168x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_168x192", {"CUBLASLT_MATMUL_TILE_168x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_168x256", {"CUBLASLT_MATMUL_TILE_168x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_176x64", {"CUBLASLT_MATMUL_TILE_176x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_176x128", {"CUBLASLT_MATMUL_TILE_176x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_176x192", {"CUBLASLT_MATMUL_TILE_176x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_176x256", {"CUBLASLT_MATMUL_TILE_176x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_184x64", {"CUBLASLT_MATMUL_TILE_184x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_184x128", {"CUBLASLT_MATMUL_TILE_184x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_184x192", {"CUBLASLT_MATMUL_TILE_184x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_184x256", {"CUBLASLT_MATMUL_TILE_184x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x64", {"CUBLASLT_MATMUL_TILE_192x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x192", {"CUBLASLT_MATMUL_TILE_192x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x256", {"CUBLASLT_MATMUL_TILE_192x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_200x64", {"CUBLASLT_MATMUL_TILE_200x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_200x128", {"CUBLASLT_MATMUL_TILE_200x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_200x192", {"CUBLASLT_MATMUL_TILE_200x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_208x64", {"CUBLASLT_MATMUL_TILE_208x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_208x128", {"CUBLASLT_MATMUL_TILE_208x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_208x192", {"CUBLASLT_MATMUL_TILE_208x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_216x64", {"CUBLASLT_MATMUL_TILE_216x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_216x128", {"CUBLASLT_MATMUL_TILE_216x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_216x192", {"CUBLASLT_MATMUL_TILE_216x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_224x64", {"CUBLASLT_MATMUL_TILE_224x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_224x128", {"CUBLASLT_MATMUL_TILE_224x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_224x192", {"CUBLASLT_MATMUL_TILE_224x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_232x64", {"CUBLASLT_MATMUL_TILE_232x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_232x128", {"CUBLASLT_MATMUL_TILE_232x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_232x192", {"CUBLASLT_MATMUL_TILE_232x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x64", {"HIPBLASLT_MATMUL_TILE_144x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x128", {"HIPBLASLT_MATMUL_TILE_144x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x192", {"HIPBLASLT_MATMUL_TILE_144x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x256", {"HIPBLASLT_MATMUL_TILE_144x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_144x320", {"HIPBLASLT_MATMUL_TILE_144x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x64", {"HIPBLASLT_MATMUL_TILE_152x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x128", {"HIPBLASLT_MATMUL_TILE_152x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x192", {"HIPBLASLT_MATMUL_TILE_152x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x256", {"HIPBLASLT_MATMUL_TILE_152x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_152x320", {"HIPBLASLT_MATMUL_TILE_152x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_160x64", {"HIPBLASLT_MATMUL_TILE_160x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_160x192", {"HIPBLASLT_MATMUL_TILE_160x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_160x256", {"HIPBLASLT_MATMUL_TILE_160x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x64", {"HIPBLASLT_MATMUL_TILE_168x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x128", {"HIPBLASLT_MATMUL_TILE_168x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x192", {"HIPBLASLT_MATMUL_TILE_168x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_168x256", {"HIPBLASLT_MATMUL_TILE_168x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x64", {"HIPBLASLT_MATMUL_TILE_176x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x128", {"HIPBLASLT_MATMUL_TILE_176x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x192", {"HIPBLASLT_MATMUL_TILE_176x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_176x256", {"HIPBLASLT_MATMUL_TILE_176x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x64", {"HIPBLASLT_MATMUL_TILE_184x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x128", {"HIPBLASLT_MATMUL_TILE_184x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x192", {"HIPBLASLT_MATMUL_TILE_184x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_184x256", {"HIPBLASLT_MATMUL_TILE_184x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x64", {"HIPBLASLT_MATMUL_TILE_192x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x192", {"HIPBLASLT_MATMUL_TILE_192x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x256", {"HIPBLASLT_MATMUL_TILE_192x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_200x64", {"HIPBLASLT_MATMUL_TILE_200x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_200x128", {"HIPBLASLT_MATMUL_TILE_200x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_200x192", {"HIPBLASLT_MATMUL_TILE_200x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_208x64", {"HIPBLASLT_MATMUL_TILE_208x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_208x128", {"HIPBLASLT_MATMUL_TILE_208x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_208x192", {"HIPBLASLT_MATMUL_TILE_208x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_216x64", {"HIPBLASLT_MATMUL_TILE_216x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_216x128", {"HIPBLASLT_MATMUL_TILE_216x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_216x192", {"HIPBLASLT_MATMUL_TILE_216x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_224x64", {"HIPBLASLT_MATMUL_TILE_224x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_224x128", {"HIPBLASLT_MATMUL_TILE_224x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_224x192", {"HIPBLASLT_MATMUL_TILE_224x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_232x64", {"HIPBLASLT_MATMUL_TILE_232x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_232x128", {"HIPBLASLT_MATMUL_TILE_232x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_232x192", {"HIPBLASLT_MATMUL_TILE_232x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_240x64", {"HIPBLASLT_MATMUL_TILE_240x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_240x128", {"HIPBLASLT_MATMUL_TILE_240x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_240x192", {"HIPBLASLT_MATMUL_TILE_240x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_248x64", {"HIPBLASLT_MATMUL_TILE_248x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_248x128", {"HIPBLASLT_MATMUL_TILE_248x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_248x192", {"HIPBLASLT_MATMUL_TILE_248x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x192", {"HIPBLASLT_MATMUL_TILE_256x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_264x64", {"HIPBLASLT_MATMUL_TILE_264x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_264x128", {"HIPBLASLT_MATMUL_TILE_264x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_272x64", {"HIPBLASLT_MATMUL_TILE_272x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_272x128", {"HIPBLASLT_MATMUL_TILE_272x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_280x64", {"HIPBLASLT_MATMUL_TILE_280x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_280x128", {"HIPBLASLT_MATMUL_TILE_280x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_288x64", {"HIPBLASLT_MATMUL_TILE_288x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_288x128", {"HIPBLASLT_MATMUL_TILE_288x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_296x64", {"HIPBLASLT_MATMUL_TILE_296x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_296x128", {"HIPBLASLT_MATMUL_TILE_296x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_304x64", {"HIPBLASLT_MATMUL_TILE_304x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_304x128", {"HIPBLASLT_MATMUL_TILE_304x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_312x64", {"HIPBLASLT_MATMUL_TILE_312x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_312x128", {"HIPBLASLT_MATMUL_TILE_312x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x64", {"HIPBLASLT_MATMUL_TILE_320x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x128", {"HIPBLASLT_MATMUL_TILE_320x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_328x64", {"HIPBLASLT_MATMUL_TILE_328x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_328x128", {"HIPBLASLT_MATMUL_TILE_328x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_336x64", {"HIPBLASLT_MATMUL_TILE_336x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_336x128", {"HIPBLASLT_MATMUL_TILE_336x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_344x64", {"HIPBLASLT_MATMUL_TILE_344x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_344x128", {"HIPBLASLT_MATMUL_TILE_344x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_352x64", {"HIPBLASLT_MATMUL_TILE_352x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_352x128", {"HIPBLASLT_MATMUL_TILE_352x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_360x64", {"HIPBLASLT_MATMUL_TILE_360x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_360x128", {"HIPBLASLT_MATMUL_TILE_360x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_368x64", {"HIPBLASLT_MATMUL_TILE_368x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_368x128", {"HIPBLASLT_MATMUL_TILE_368x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_376x64", {"HIPBLASLT_MATMUL_TILE_376x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_376x128", {"HIPBLASLT_MATMUL_TILE_376x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x64", {"HIPBLASLT_MATMUL_TILE_384x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x128", {"HIPBLASLT_MATMUL_TILE_384x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_392x64", {"HIPBLASLT_MATMUL_TILE_392x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1275,6 +1315,46 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_232x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_232x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_232x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_240x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_240x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_240x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_248x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_248x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_248x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_264x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_264x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_272x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_272x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_280x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_280x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_288x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_288x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_296x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_296x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_304x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_304x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_312x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_312x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_328x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_328x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_336x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_336x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_344x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_344x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_352x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_352x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_360x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_360x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_368x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_368x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_376x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_376x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_392x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 25edda3de3156f80b8aaab589abe1703f87892c3 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 28 Oct 2024 10:46:54 +0000 Subject: [PATCH 25/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 7 + `rocblas_(s|d|c|z)syrk_64` and `hipblas(S|D|C|Z)syrk(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 32 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 24 +++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 ++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 ++++++++++++++++ 7 files changed, 112 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index cd7e8dfa..68dbcf16 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1709,7 +1709,9 @@ sub rocSubstitutions { subst("cublasCsyr_v2", "rocblas_csyr", "library"); subst("cublasCsyr_v2_64", "rocblas_csyr_64", "library"); subst("cublasCsyrk", "rocblas_csyrk", "library"); + subst("cublasCsyrk_64", "rocblas_csyrk_64", "library"); subst("cublasCsyrk_v2", "rocblas_csyrk", "library"); + subst("cublasCsyrk_v2_64", "rocblas_csyrk_64", "library"); subst("cublasCsyrkx", "rocblas_csyrkx", "library"); subst("cublasCtbmv", "rocblas_ctbmv", "library"); subst("cublasCtbmv_64", "rocblas_ctbmv_64", "library"); @@ -1850,7 +1852,9 @@ sub rocSubstitutions { subst("cublasDsyr_v2", "rocblas_dsyr", "library"); subst("cublasDsyr_v2_64", "rocblas_dsyr_64", "library"); subst("cublasDsyrk", "rocblas_dsyrk", "library"); + subst("cublasDsyrk_64", "rocblas_dsyrk_64", "library"); subst("cublasDsyrk_v2", "rocblas_dsyrk", "library"); + subst("cublasDsyrk_v2_64", "rocblas_dsyrk_64", "library"); subst("cublasDsyrkx", "rocblas_dsyrkx", "library"); subst("cublasDtbmv", "rocblas_dtbmv", "library"); subst("cublasDtbmv_64", "rocblas_dtbmv_64", "library"); @@ -2078,7 +2082,9 @@ sub rocSubstitutions { subst("cublasSsyr_v2", "rocblas_ssyr", "library"); subst("cublasSsyr_v2_64", "rocblas_ssyr_64", "library"); subst("cublasSsyrk", "rocblas_ssyrk", "library"); + subst("cublasSsyrk_64", "rocblas_ssyrk_64", "library"); subst("cublasSsyrk_v2", "rocblas_ssyrk", "library"); + subst("cublasSsyrk_v2_64", "rocblas_ssyrk_64", "library"); subst("cublasSsyrkx", "rocblas_ssyrkx", "library"); subst("cublasStbmv", "rocblas_stbmv", "library"); subst("cublasStbmv_64", "rocblas_stbmv_64", "library"); @@ -2247,7 +2253,9 @@ sub rocSubstitutions { subst("cublasZsyr_v2", "rocblas_zsyr", "library"); subst("cublasZsyr_v2_64", "rocblas_zsyr_64", "library"); subst("cublasZsyrk", "rocblas_zsyrk", "library"); + subst("cublasZsyrk_64", "rocblas_zsyrk_64", "library"); subst("cublasZsyrk_v2", "rocblas_zsyrk", "library"); + subst("cublasZsyrk_v2_64", "rocblas_zsyrk_64", "library"); subst("cublasZsyrkx", "rocblas_zsyrkx", "library"); subst("cublasZtbmv", "rocblas_ztbmv", "library"); subst("cublasZtbmv_64", "rocblas_ztbmv_64", "library"); @@ -4471,7 +4479,9 @@ sub simpleSubstitutions { subst("cublasCsyr_v2", "hipblasCsyr_v2", "library"); subst("cublasCsyr_v2_64", "hipblasCsyr_v2_64", "library"); subst("cublasCsyrk", "hipblasCsyrk_v2", "library"); + subst("cublasCsyrk_64", "hipblasCsyrk_v2_64", "library"); subst("cublasCsyrk_v2", "hipblasCsyrk_v2", "library"); + subst("cublasCsyrk_v2_64", "hipblasCsyrk_v2_64", "library"); subst("cublasCsyrkx", "hipblasCsyrkx_v2", "library"); subst("cublasCtbmv", "hipblasCtbmv_v2", "library"); subst("cublasCtbmv_64", "hipblasCtbmv_v2_64", "library"); @@ -4613,7 +4623,9 @@ sub simpleSubstitutions { subst("cublasDsyr_v2", "hipblasDsyr", "library"); subst("cublasDsyr_v2_64", "hipblasDsyr_64", "library"); subst("cublasDsyrk", "hipblasDsyrk", "library"); + subst("cublasDsyrk_64", "hipblasDsyrk_64", "library"); subst("cublasDsyrk_v2", "hipblasDsyrk", "library"); + subst("cublasDsyrk_v2_64", "hipblasDsyrk_64", "library"); subst("cublasDsyrkx", "hipblasDsyrkx", "library"); subst("cublasDtbmv", "hipblasDtbmv", "library"); subst("cublasDtbmv_64", "hipblasDtbmv_64", "library"); @@ -4852,7 +4864,9 @@ sub simpleSubstitutions { subst("cublasSsyr_v2", "hipblasSsyr", "library"); subst("cublasSsyr_v2_64", "hipblasSsyr_64", "library"); subst("cublasSsyrk", "hipblasSsyrk", "library"); + subst("cublasSsyrk_64", "hipblasSsyrk_64", "library"); subst("cublasSsyrk_v2", "hipblasSsyrk", "library"); + subst("cublasSsyrk_v2_64", "hipblasSsyrk_64", "library"); subst("cublasSsyrkx", "hipblasSsyrkx", "library"); subst("cublasStbmv", "hipblasStbmv", "library"); subst("cublasStbmv_64", "hipblasStbmv_64", "library"); @@ -5014,7 +5028,9 @@ sub simpleSubstitutions { subst("cublasZsyr_v2", "hipblasZsyr_v2", "library"); subst("cublasZsyr_v2_64", "hipblasZsyr_v2_64", "library"); subst("cublasZsyrk", "hipblasZsyrk_v2", "library"); + subst("cublasZsyrk_64", "hipblasZsyrk_v2_64", "library"); subst("cublasZsyrk_v2", "hipblasZsyrk_v2", "library"); + subst("cublasZsyrk_v2_64", "hipblasZsyrk_v2_64", "library"); subst("cublasZsyrkx", "hipblasZsyrkx_v2", "library"); subst("cublasZtbmv", "hipblasZtbmv_v2", "library"); subst("cublasZtbmv_64", "hipblasZtbmv_v2_64", "library"); @@ -11580,8 +11596,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZtrmm_64", "cublasZtpttr", "cublasZsyrkx_64", - "cublasZsyrk_v2_64", - "cublasZsyrk_64", "cublasZsyr2k_v2_64", "cublasZsyr2k_64", "cublasZmatinvBatched", @@ -11611,8 +11625,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasStrmm_64", "cublasStpttr", "cublasSsyrkx_64", - "cublasSsyrk_v2_64", - "cublasSsyrk_64", "cublasSsyr2k_v2_64", "cublasSsyr2k_64", "cublasSmatinvBatched", @@ -11713,8 +11725,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDtrmm_64", "cublasDtpttr", "cublasDsyrkx_64", - "cublasDsyrk_v2_64", - "cublasDsyrk_64", "cublasDsyr2k_v2_64", "cublasDsyr2k_64", "cublasDmatinvBatched", @@ -11730,8 +11740,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCtrmm_64", "cublasCtpttr", "cublasCsyrkx_64", - "cublasCsyrk_v2_64", - "cublasCsyrk_64", "cublasCsyrkEx_64", "cublasCsyrkEx", "cublasCsyrk3mEx_64", @@ -13473,8 +13481,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZtrmm_64", "cublasZtpttr", "cublasZsyrkx_64", - "cublasZsyrk_v2_64", - "cublasZsyrk_64", "cublasZsyr2k_v2_64", "cublasZsyr2k_64", "cublasZmatinvBatched", @@ -13498,8 +13504,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasStrmm_64", "cublasStpttr", "cublasSsyrkx_64", - "cublasSsyrk_v2_64", - "cublasSsyrk_64", "cublasSsyr2k_v2_64", "cublasSsyr2k_64", "cublasSmatinvBatched", @@ -13619,8 +13623,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDtrmm_64", "cublasDtpttr", "cublasDsyrkx_64", - "cublasDsyrk_v2_64", - "cublasDsyrk_64", "cublasDsyr2k_v2_64", "cublasDsyr2k_64", "cublasDmatinvBatched", @@ -13638,8 +13640,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCtrmm_64", "cublasCtpttr", "cublasCsyrkx_64", - "cublasCsyrk_v2_64", - "cublasCsyrk_64", "cublasCsyrkEx_64", "cublasCsyrkEx", "cublasCsyrk3mEx_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index abdbdc41..8f6066c5 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1287,9 +1287,9 @@ |`cublasCsyr2k_v2`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | | |`cublasCsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasCsyrk`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | | -|`cublasCsyrk_64`|12.0| | | | | | | | | | +|`cublasCsyrk_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyrk_v2`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | | -|`cublasCsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasCsyrk_v2_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | | |`cublasCsyrkx_64`|12.0| | | | | | | | | | |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | @@ -1323,9 +1323,9 @@ |`cublasDsyr2k_v2`| | | | |`hipblasDsyr2k`|3.5.0| | | | | |`cublasDsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasDsyrk`| | | | |`hipblasDsyrk`|3.5.0| | | | | -|`cublasDsyrk_64`|12.0| | | | | | | | | | +|`cublasDsyrk_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrk_v2`| | | | |`hipblasDsyrk`|3.5.0| | | | | -|`cublasDsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasDsyrk_v2_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | | | | | | | | |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | @@ -1375,9 +1375,9 @@ |`cublasSsyr2k_v2`| | | | |`hipblasSsyr2k`|3.5.0| | | | | |`cublasSsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasSsyrk`| | | | |`hipblasSsyrk`|3.5.0| | | | | -|`cublasSsyrk_64`|12.0| | | | | | | | | | +|`cublasSsyrk_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrk_v2`| | | | |`hipblasSsyrk`|3.5.0| | | | | -|`cublasSsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasSsyrk_v2_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | | | | | | | | |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | @@ -1433,9 +1433,9 @@ |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | |`cublasZsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasZsyrk`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | -|`cublasZsyrk_64`|12.0| | | | | | | | | | +|`cublasZsyrk_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | -|`cublasZsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasZsyrk_v2_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | | |`cublasZsyrkx_64`|12.0| | | | | | | | | | |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index cec7369c..55f789f4 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1287,9 +1287,9 @@ |`cublasCsyr2k_v2`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | |`rocblas_csyr2k`|3.5.0| | | | | |`cublasCsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasCsyrk`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | |`rocblas_csyrk`|3.5.0| | | | | -|`cublasCsyrk_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyrk_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrk_v2`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | |`rocblas_csyrk`|3.5.0| | | | | -|`cublasCsyrk_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyrk_v2_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | | | | | | | | | | | | | | |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | @@ -1323,9 +1323,9 @@ |`cublasDsyr2k_v2`| | | | |`hipblasDsyr2k`|3.5.0| | | | |`rocblas_dsyr2k`|3.5.0| | | | | |`cublasDsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasDsyrk`| | | | |`hipblasDsyrk`|3.5.0| | | | |`rocblas_dsyrk`|3.5.0| | | | | -|`cublasDsyrk_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyrk_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0|`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrk_v2`| | | | |`hipblasDsyrk`|3.5.0| | | | |`rocblas_dsyrk`|3.5.0| | | | | -|`cublasDsyrk_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyrk_v2_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0|`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | | | | | | | | | | | | | | |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | @@ -1375,9 +1375,9 @@ |`cublasSsyr2k_v2`| | | | |`hipblasSsyr2k`|3.5.0| | | | |`rocblas_ssyr2k`|3.5.0| | | | | |`cublasSsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasSsyrk`| | | | |`hipblasSsyrk`|3.5.0| | | | |`rocblas_ssyrk`|3.5.0| | | | | -|`cublasSsyrk_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyrk_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0|`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrk_v2`| | | | |`hipblasSsyrk`|3.5.0| | | | |`rocblas_ssyrk`|3.5.0| | | | | -|`cublasSsyrk_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyrk_v2_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0|`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | | | | | | | | | | | | | | |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | @@ -1433,9 +1433,9 @@ |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | |`cublasZsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZsyrk`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | -|`cublasZsyrk_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyrk_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | -|`cublasZsyrk_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyrk_v2_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | | | | | | | | | | | | | | |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 27a4b28e..578bc19d 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1287,9 +1287,9 @@ |`cublasCsyr2k_v2`| | | | |`rocblas_csyr2k`|3.5.0| | | | | |`cublasCsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasCsyrk`| | | | |`rocblas_csyrk`|3.5.0| | | | | -|`cublasCsyrk_64`|12.0| | | | | | | | | | +|`cublasCsyrk_64`|12.0| | | |`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrk_v2`| | | | |`rocblas_csyrk`|3.5.0| | | | | -|`cublasCsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasCsyrk_v2_64`|12.0| | | |`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrkx`| | | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | | | | | | | | |`cublasCtrmm`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | @@ -1323,9 +1323,9 @@ |`cublasDsyr2k_v2`| | | | |`rocblas_dsyr2k`|3.5.0| | | | | |`cublasDsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasDsyrk`| | | | |`rocblas_dsyrk`|3.5.0| | | | | -|`cublasDsyrk_64`|12.0| | | | | | | | | | +|`cublasDsyrk_64`|12.0| | | |`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrk_v2`| | | | |`rocblas_dsyrk`|3.5.0| | | | | -|`cublasDsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasDsyrk_v2_64`|12.0| | | |`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrkx`| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | | | | | | | | |`cublasDtrmm`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | @@ -1375,9 +1375,9 @@ |`cublasSsyr2k_v2`| | | | |`rocblas_ssyr2k`|3.5.0| | | | | |`cublasSsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasSsyrk`| | | | |`rocblas_ssyrk`|3.5.0| | | | | -|`cublasSsyrk_64`|12.0| | | | | | | | | | +|`cublasSsyrk_64`|12.0| | | |`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrk_v2`| | | | |`rocblas_ssyrk`|3.5.0| | | | | -|`cublasSsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasSsyrk_v2_64`|12.0| | | |`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrkx`| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | | | | | | | | |`cublasStrmm`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | @@ -1433,9 +1433,9 @@ |`cublasZsyr2k_v2`| | | | |`rocblas_zsyr2k`|3.5.0| | | | | |`cublasZsyr2k_v2_64`|12.0| | | | | | | | | | |`cublasZsyrk`| | | | |`rocblas_zsyrk`|3.5.0| | | | | -|`cublasZsyrk_64`|12.0| | | | | | | | | | +|`cublasZsyrk_64`|12.0| | | |`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`rocblas_zsyrk`|3.5.0| | | | | -|`cublasZsyrk_v2_64`|12.0| | | | | | | | | | +|`cublasZsyrk_v2_64`|12.0| | | |`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrkx`| | | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | | | | | | | | |`cublasZtrmm`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 378e1c21..6a2b42c5 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -477,13 +477,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYRK {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsyrk_64", {"hipblasSsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsyrk_64", {"hipblasSsyrk_64", "rocblas_ssyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsyrk_64", {"hipblasDsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsyrk_64", {"hipblasDsyrk_64", "rocblas_dsyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsyrk", {"hipblasCsyrk_v2", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsyrk_64", {"hipblasCsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsyrk_64", {"hipblasCsyrk_v2_64", "rocblas_csyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsyrk", {"hipblasZsyrk_v2", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsyrk_64", {"hipblasZsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsyrk_64", {"hipblasZsyrk_v2_64", "rocblas_zsyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // HERK {"cublasCherk", {"hipblasCherk_v2", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -847,13 +847,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYRK {"cublasSsyrk_v2", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSsyrk_v2_64", {"hipblasSsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsyrk_v2_64", {"hipblasSsyrk_64", "rocblas_ssyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsyrk_v2", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDsyrk_v2_64", {"hipblasDsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsyrk_v2_64", {"hipblasDsyrk_64", "rocblas_dsyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsyrk_v2", {"hipblasCsyrk_v2", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCsyrk_v2_64", {"hipblasCsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsyrk_v2_64", {"hipblasCsyrk_v2_64", "rocblas_csyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsyrk_v2", {"hipblasZsyrk_v2", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZsyrk_v2_64", {"hipblasZsyrk_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsyrk_v2_64", {"hipblasZsyrk_v2_64", "rocblas_zsyrk_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // IO in Int8 complex/cuComplex, computation in cuComplex {"cublasCsyrkEx", {"hipblasCsyrkEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -2048,6 +2048,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCsymm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZsymm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsyrk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsyrk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2461,6 +2465,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_csymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zsymm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ssyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_csyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index cf2188bd..17cd3988 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2974,6 +2974,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* beta, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* beta, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasSsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* beta, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrk_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* beta, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasDsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + blasStatus = cublasDsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + blasStatus = cublasDsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrk_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrk_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 103d4661..d89806a1 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3179,6 +3179,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsymm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsymm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* beta, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyrk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* beta, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ssyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ssyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* beta, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyrk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* beta, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_dsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + blasStatus = cublasDsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + blasStatus = cublasDsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyrk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_csyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_csyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrk_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; From 579a926663b031517d365f8bc3439018a228d341 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 28 Oct 2024 12:07:56 +0000 Subject: [PATCH 26/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 8 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 92 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 46 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 46 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 46 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 92 +++++++++++++++++++ 5 files changed, 322 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 68dbcf16..ed9d63ce 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12583,6 +12583,11 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_80x256", "CUBLASLT_MATMUL_TILE_80x192", "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_768x64", + "CUBLASLT_MATMUL_TILE_760x64", + "CUBLASLT_MATMUL_TILE_752x64", + "CUBLASLT_MATMUL_TILE_744x64", + "CUBLASLT_MATMUL_TILE_736x64", "CUBLASLT_MATMUL_TILE_72x640", "CUBLASLT_MATMUL_TILE_72x64", "CUBLASLT_MATMUL_TILE_72x576", @@ -12593,6 +12598,16 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_72x256", "CUBLASLT_MATMUL_TILE_72x192", "CUBLASLT_MATMUL_TILE_72x128", + "CUBLASLT_MATMUL_TILE_728x64", + "CUBLASLT_MATMUL_TILE_720x64", + "CUBLASLT_MATMUL_TILE_712x64", + "CUBLASLT_MATMUL_TILE_704x64", + "CUBLASLT_MATMUL_TILE_696x64", + "CUBLASLT_MATMUL_TILE_688x64", + "CUBLASLT_MATMUL_TILE_680x64", + "CUBLASLT_MATMUL_TILE_672x64", + "CUBLASLT_MATMUL_TILE_664x64", + "CUBLASLT_MATMUL_TILE_656x64", "CUBLASLT_MATMUL_TILE_64x96", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x768", @@ -12608,6 +12623,16 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x256", "CUBLASLT_MATMUL_TILE_64x192", "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_648x64", + "CUBLASLT_MATMUL_TILE_640x64", + "CUBLASLT_MATMUL_TILE_632x64", + "CUBLASLT_MATMUL_TILE_624x64", + "CUBLASLT_MATMUL_TILE_616x64", + "CUBLASLT_MATMUL_TILE_608x64", + "CUBLASLT_MATMUL_TILE_600x64", + "CUBLASLT_MATMUL_TILE_592x64", + "CUBLASLT_MATMUL_TILE_584x64", + "CUBLASLT_MATMUL_TILE_576x64", "CUBLASLT_MATMUL_TILE_56x768", "CUBLASLT_MATMUL_TILE_56x704", "CUBLASLT_MATMUL_TILE_56x640", @@ -12619,7 +12644,16 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_56x256", "CUBLASLT_MATMUL_TILE_56x192", "CUBLASLT_MATMUL_TILE_56x128", + "CUBLASLT_MATMUL_TILE_568x64", + "CUBLASLT_MATMUL_TILE_560x64", + "CUBLASLT_MATMUL_TILE_552x64", + "CUBLASLT_MATMUL_TILE_544x64", + "CUBLASLT_MATMUL_TILE_536x64", + "CUBLASLT_MATMUL_TILE_528x64", + "CUBLASLT_MATMUL_TILE_520x64", "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_504x64", + "CUBLASLT_MATMUL_TILE_496x64", "CUBLASLT_MATMUL_TILE_48x768", "CUBLASLT_MATMUL_TILE_48x704", "CUBLASLT_MATMUL_TILE_48x640", @@ -12632,6 +12666,16 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_48x256", "CUBLASLT_MATMUL_TILE_48x192", "CUBLASLT_MATMUL_TILE_48x128", + "CUBLASLT_MATMUL_TILE_488x64", + "CUBLASLT_MATMUL_TILE_480x64", + "CUBLASLT_MATMUL_TILE_472x64", + "CUBLASLT_MATMUL_TILE_464x64", + "CUBLASLT_MATMUL_TILE_456x64", + "CUBLASLT_MATMUL_TILE_448x64", + "CUBLASLT_MATMUL_TILE_440x64", + "CUBLASLT_MATMUL_TILE_432x64", + "CUBLASLT_MATMUL_TILE_424x64", + "CUBLASLT_MATMUL_TILE_416x64", "CUBLASLT_MATMUL_TILE_40x768", "CUBLASLT_MATMUL_TILE_40x704", "CUBLASLT_MATMUL_TILE_40x640", @@ -12644,6 +12688,8 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_40x256", "CUBLASLT_MATMUL_TILE_40x192", "CUBLASLT_MATMUL_TILE_40x128", + "CUBLASLT_MATMUL_TILE_408x64", + "CUBLASLT_MATMUL_TILE_400x64", "CUBLASLT_MATMUL_TILE_392x64", "CUBLASLT_MATMUL_TILE_384x64", "CUBLASLT_MATMUL_TILE_384x128", @@ -14236,6 +14282,11 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_80x256", "CUBLASLT_MATMUL_TILE_80x192", "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_768x64", + "CUBLASLT_MATMUL_TILE_760x64", + "CUBLASLT_MATMUL_TILE_752x64", + "CUBLASLT_MATMUL_TILE_744x64", + "CUBLASLT_MATMUL_TILE_736x64", "CUBLASLT_MATMUL_TILE_72x640", "CUBLASLT_MATMUL_TILE_72x64", "CUBLASLT_MATMUL_TILE_72x576", @@ -14246,6 +14297,16 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_72x256", "CUBLASLT_MATMUL_TILE_72x192", "CUBLASLT_MATMUL_TILE_72x128", + "CUBLASLT_MATMUL_TILE_728x64", + "CUBLASLT_MATMUL_TILE_720x64", + "CUBLASLT_MATMUL_TILE_712x64", + "CUBLASLT_MATMUL_TILE_704x64", + "CUBLASLT_MATMUL_TILE_696x64", + "CUBLASLT_MATMUL_TILE_688x64", + "CUBLASLT_MATMUL_TILE_680x64", + "CUBLASLT_MATMUL_TILE_672x64", + "CUBLASLT_MATMUL_TILE_664x64", + "CUBLASLT_MATMUL_TILE_656x64", "CUBLASLT_MATMUL_TILE_64x96", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x768", @@ -14261,6 +14322,16 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x256", "CUBLASLT_MATMUL_TILE_64x192", "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_648x64", + "CUBLASLT_MATMUL_TILE_640x64", + "CUBLASLT_MATMUL_TILE_632x64", + "CUBLASLT_MATMUL_TILE_624x64", + "CUBLASLT_MATMUL_TILE_616x64", + "CUBLASLT_MATMUL_TILE_608x64", + "CUBLASLT_MATMUL_TILE_600x64", + "CUBLASLT_MATMUL_TILE_592x64", + "CUBLASLT_MATMUL_TILE_584x64", + "CUBLASLT_MATMUL_TILE_576x64", "CUBLASLT_MATMUL_TILE_56x768", "CUBLASLT_MATMUL_TILE_56x704", "CUBLASLT_MATMUL_TILE_56x640", @@ -14272,7 +14343,16 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_56x256", "CUBLASLT_MATMUL_TILE_56x192", "CUBLASLT_MATMUL_TILE_56x128", + "CUBLASLT_MATMUL_TILE_568x64", + "CUBLASLT_MATMUL_TILE_560x64", + "CUBLASLT_MATMUL_TILE_552x64", + "CUBLASLT_MATMUL_TILE_544x64", + "CUBLASLT_MATMUL_TILE_536x64", + "CUBLASLT_MATMUL_TILE_528x64", + "CUBLASLT_MATMUL_TILE_520x64", "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_504x64", + "CUBLASLT_MATMUL_TILE_496x64", "CUBLASLT_MATMUL_TILE_48x768", "CUBLASLT_MATMUL_TILE_48x704", "CUBLASLT_MATMUL_TILE_48x640", @@ -14285,6 +14365,16 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_48x256", "CUBLASLT_MATMUL_TILE_48x192", "CUBLASLT_MATMUL_TILE_48x128", + "CUBLASLT_MATMUL_TILE_488x64", + "CUBLASLT_MATMUL_TILE_480x64", + "CUBLASLT_MATMUL_TILE_472x64", + "CUBLASLT_MATMUL_TILE_464x64", + "CUBLASLT_MATMUL_TILE_456x64", + "CUBLASLT_MATMUL_TILE_448x64", + "CUBLASLT_MATMUL_TILE_440x64", + "CUBLASLT_MATMUL_TILE_432x64", + "CUBLASLT_MATMUL_TILE_424x64", + "CUBLASLT_MATMUL_TILE_416x64", "CUBLASLT_MATMUL_TILE_40x768", "CUBLASLT_MATMUL_TILE_40x704", "CUBLASLT_MATMUL_TILE_40x640", @@ -14297,6 +14387,8 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_40x256", "CUBLASLT_MATMUL_TILE_40x192", "CUBLASLT_MATMUL_TILE_40x128", + "CUBLASLT_MATMUL_TILE_408x64", + "CUBLASLT_MATMUL_TILE_400x64", "CUBLASLT_MATMUL_TILE_392x64", "CUBLASLT_MATMUL_TILE_384x64", "CUBLASLT_MATMUL_TILE_384x128", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 8f6066c5..1bb0bdc8 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -485,6 +485,8 @@ |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | @@ -497,6 +499,16 @@ |`CUBLASLT_MATMUL_TILE_40x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_416x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_424x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_432x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_440x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_456x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_464x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_472x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_480x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_488x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x256`|12.6| | | | | | | | | | @@ -509,7 +521,16 @@ |`CUBLASLT_MATMUL_TILE_48x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_496x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_504x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_520x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_528x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_536x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_544x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_552x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_560x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_568x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x256`|12.6| | | | | | | | | | @@ -521,6 +542,16 @@ |`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_584x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_592x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_600x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_608x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_616x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_624x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | @@ -536,6 +567,16 @@ |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_656x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_664x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_672x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_680x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_688x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_696x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_712x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_720x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_728x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x256`|12.6| | | | | | | | | | @@ -546,6 +587,11 @@ |`CUBLASLT_MATMUL_TILE_72x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_736x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_744x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_752x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_760x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 55f789f4..824abff6 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -485,6 +485,8 @@ |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | | | | | | | @@ -497,6 +499,16 @@ |`CUBLASLT_MATMUL_TILE_40x640`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x768`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_416x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_424x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_432x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_440x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_456x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_464x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_472x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_480x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_488x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x256`|12.6| | | | | | | | | | | | | | | | @@ -509,7 +521,16 @@ |`CUBLASLT_MATMUL_TILE_48x640`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_496x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_504x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_520x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_528x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_536x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_544x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_552x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_560x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_568x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x256`|12.6| | | | | | | | | | | | | | | | @@ -521,6 +542,16 @@ |`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_584x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_592x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_600x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_608x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_616x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_624x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | | | | | | | @@ -536,6 +567,16 @@ |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_656x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_664x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_672x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_680x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_688x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_696x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_712x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_720x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_728x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x256`|12.6| | | | | | | | | | | | | | | | @@ -546,6 +587,11 @@ |`CUBLASLT_MATMUL_TILE_72x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_736x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_744x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_752x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_760x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 578bc19d..6d135850 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -485,6 +485,8 @@ |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x256`|12.6| | | | | | | | | | @@ -497,6 +499,16 @@ |`CUBLASLT_MATMUL_TILE_40x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_40x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_416x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_424x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_432x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_440x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_456x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_464x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_472x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_480x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_488x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x256`|12.6| | | | | | | | | | @@ -509,7 +521,16 @@ |`CUBLASLT_MATMUL_TILE_48x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_496x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_504x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_520x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_528x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_536x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_544x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_552x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_560x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_568x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x256`|12.6| | | | | | | | | | @@ -521,6 +542,16 @@ |`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_584x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_592x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_600x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_608x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_616x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_624x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | @@ -536,6 +567,16 @@ |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_656x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_664x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_672x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_680x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_688x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_696x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_712x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_720x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_728x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x256`|12.6| | | | | | | | | | @@ -546,6 +587,11 @@ |`CUBLASLT_MATMUL_TILE_72x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_72x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_736x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_744x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_752x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_760x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 747a27c0..b4c7aab3 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -469,6 +469,52 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_384x64", {"HIPBLASLT_MATMUL_TILE_384x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_384x128", {"HIPBLASLT_MATMUL_TILE_384x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_392x64", {"HIPBLASLT_MATMUL_TILE_392x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_400x64", {"HIPBLASLT_MATMUL_TILE_400x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_408x64", {"HIPBLASLT_MATMUL_TILE_408x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_416x64", {"HIPBLASLT_MATMUL_TILE_416x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_424x64", {"HIPBLASLT_MATMUL_TILE_424x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_432x64", {"HIPBLASLT_MATMUL_TILE_432x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_440x64", {"HIPBLASLT_MATMUL_TILE_440x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x64", {"HIPBLASLT_MATMUL_TILE_448x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_456x64", {"HIPBLASLT_MATMUL_TILE_456x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_464x64", {"HIPBLASLT_MATMUL_TILE_464x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_472x64", {"HIPBLASLT_MATMUL_TILE_472x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_480x64", {"HIPBLASLT_MATMUL_TILE_480x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_488x64", {"HIPBLASLT_MATMUL_TILE_488x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_496x64", {"HIPBLASLT_MATMUL_TILE_496x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_504x64", {"HIPBLASLT_MATMUL_TILE_504x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_520x64", {"HIPBLASLT_MATMUL_TILE_520x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_528x64", {"HIPBLASLT_MATMUL_TILE_528x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_536x64", {"HIPBLASLT_MATMUL_TILE_536x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_544x64", {"HIPBLASLT_MATMUL_TILE_544x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_552x64", {"HIPBLASLT_MATMUL_TILE_552x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_560x64", {"HIPBLASLT_MATMUL_TILE_560x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_568x64", {"HIPBLASLT_MATMUL_TILE_568x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x64", {"HIPBLASLT_MATMUL_TILE_576x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_584x64", {"HIPBLASLT_MATMUL_TILE_584x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_592x64", {"HIPBLASLT_MATMUL_TILE_592x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_600x64", {"HIPBLASLT_MATMUL_TILE_600x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_608x64", {"HIPBLASLT_MATMUL_TILE_608x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_616x64", {"HIPBLASLT_MATMUL_TILE_616x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_624x64", {"HIPBLASLT_MATMUL_TILE_624x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_632x64", {"HIPBLASLT_MATMUL_TILE_632x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x64", {"HIPBLASLT_MATMUL_TILE_640x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_648x64", {"HIPBLASLT_MATMUL_TILE_648x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_656x64", {"HIPBLASLT_MATMUL_TILE_656x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_664x64", {"HIPBLASLT_MATMUL_TILE_664x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_672x64", {"HIPBLASLT_MATMUL_TILE_672x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_680x64", {"HIPBLASLT_MATMUL_TILE_680x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_688x64", {"HIPBLASLT_MATMUL_TILE_688x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_696x64", {"HIPBLASLT_MATMUL_TILE_696x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x64", {"HIPBLASLT_MATMUL_TILE_704x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_712x64", {"HIPBLASLT_MATMUL_TILE_712x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_720x64", {"HIPBLASLT_MATMUL_TILE_720x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_728x64", {"HIPBLASLT_MATMUL_TILE_728x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_736x64", {"HIPBLASLT_MATMUL_TILE_736x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_744x64", {"HIPBLASLT_MATMUL_TILE_744x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_752x64", {"HIPBLASLT_MATMUL_TILE_752x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_760x64", {"HIPBLASLT_MATMUL_TILE_760x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x64", {"HIPBLASLT_MATMUL_TILE_768x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1355,6 +1401,52 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_384x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_384x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_392x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_400x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_408x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_416x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_424x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_432x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_440x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_456x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_464x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_472x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_480x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_488x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_496x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_504x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_520x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_528x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_536x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_544x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_552x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_560x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_568x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_584x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_592x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_600x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_608x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_616x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_624x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_632x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_648x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_656x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_664x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_672x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_680x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_688x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_696x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_712x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_720x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_728x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_736x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_744x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_752x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_760x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 3b7cbb319cf3a75abf7045349c77b5f16d57bd48 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 29 Oct 2024 17:58:36 +0000 Subject: [PATCH 27/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 8 + `rocblas_(s|d|c|z)syr2k_64` and `hipblas(S|D|C|Z)syr2k(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 32 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 24 +++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 ++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 ++++++++++++++++ 7 files changed, 112 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index ed9d63ce..36df3e77 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1704,7 +1704,9 @@ sub rocSubstitutions { subst("cublasCsyr2_v2", "rocblas_csyr2", "library"); subst("cublasCsyr2_v2_64", "rocblas_csyr2_64", "library"); subst("cublasCsyr2k", "rocblas_csyr2k", "library"); + subst("cublasCsyr2k_64", "rocblas_csyr2k_64", "library"); subst("cublasCsyr2k_v2", "rocblas_csyr2k", "library"); + subst("cublasCsyr2k_v2_64", "rocblas_csyr2k_64", "library"); subst("cublasCsyr_64", "rocblas_csyr_64", "library"); subst("cublasCsyr_v2", "rocblas_csyr", "library"); subst("cublasCsyr_v2_64", "rocblas_csyr_64", "library"); @@ -1847,7 +1849,9 @@ sub rocSubstitutions { subst("cublasDsyr2_v2", "rocblas_dsyr2", "library"); subst("cublasDsyr2_v2_64", "rocblas_dsyr2_64", "library"); subst("cublasDsyr2k", "rocblas_dsyr2k", "library"); + subst("cublasDsyr2k_64", "rocblas_dsyr2k_64", "library"); subst("cublasDsyr2k_v2", "rocblas_dsyr2k", "library"); + subst("cublasDsyr2k_v2_64", "rocblas_dsyr2k_64", "library"); subst("cublasDsyr_64", "rocblas_dsyr_64", "library"); subst("cublasDsyr_v2", "rocblas_dsyr", "library"); subst("cublasDsyr_v2_64", "rocblas_dsyr_64", "library"); @@ -2077,7 +2081,9 @@ sub rocSubstitutions { subst("cublasSsyr2_v2", "rocblas_ssyr2", "library"); subst("cublasSsyr2_v2_64", "rocblas_ssyr2_64", "library"); subst("cublasSsyr2k", "rocblas_ssyr2k", "library"); + subst("cublasSsyr2k_64", "rocblas_ssyr2k_64", "library"); subst("cublasSsyr2k_v2", "rocblas_ssyr2k", "library"); + subst("cublasSsyr2k_v2_64", "rocblas_ssyr2k_64", "library"); subst("cublasSsyr_64", "rocblas_ssyr_64", "library"); subst("cublasSsyr_v2", "rocblas_ssyr", "library"); subst("cublasSsyr_v2_64", "rocblas_ssyr_64", "library"); @@ -2248,7 +2254,9 @@ sub rocSubstitutions { subst("cublasZsyr2_v2", "rocblas_zsyr2", "library"); subst("cublasZsyr2_v2_64", "rocblas_zsyr2_64", "library"); subst("cublasZsyr2k", "rocblas_zsyr2k", "library"); + subst("cublasZsyr2k_64", "rocblas_zsyr2k_64", "library"); subst("cublasZsyr2k_v2", "rocblas_zsyr2k", "library"); + subst("cublasZsyr2k_v2_64", "rocblas_zsyr2k_64", "library"); subst("cublasZsyr_64", "rocblas_zsyr_64", "library"); subst("cublasZsyr_v2", "rocblas_zsyr", "library"); subst("cublasZsyr_v2_64", "rocblas_zsyr_64", "library"); @@ -4474,7 +4482,9 @@ sub simpleSubstitutions { subst("cublasCsyr2_v2", "hipblasCsyr2_v2", "library"); subst("cublasCsyr2_v2_64", "hipblasCsyr2_v2_64", "library"); subst("cublasCsyr2k", "hipblasCsyr2k_v2", "library"); + subst("cublasCsyr2k_64", "hipblasCsyr2k_v2_64", "library"); subst("cublasCsyr2k_v2", "hipblasCsyr2k_v2", "library"); + subst("cublasCsyr2k_v2_64", "hipblasCsyr2k_v2_64", "library"); subst("cublasCsyr_64", "hipblasCsyr_v2_64", "library"); subst("cublasCsyr_v2", "hipblasCsyr_v2", "library"); subst("cublasCsyr_v2_64", "hipblasCsyr_v2_64", "library"); @@ -4618,7 +4628,9 @@ sub simpleSubstitutions { subst("cublasDsyr2_v2", "hipblasDsyr2", "library"); subst("cublasDsyr2_v2_64", "hipblasDsyr2_64", "library"); subst("cublasDsyr2k", "hipblasDsyr2k", "library"); + subst("cublasDsyr2k_64", "hipblasDsyr2k_64", "library"); subst("cublasDsyr2k_v2", "hipblasDsyr2k", "library"); + subst("cublasDsyr2k_v2_64", "hipblasDsyr2k_64", "library"); subst("cublasDsyr_64", "hipblasDsyr_64", "library"); subst("cublasDsyr_v2", "hipblasDsyr", "library"); subst("cublasDsyr_v2_64", "hipblasDsyr_64", "library"); @@ -4859,7 +4871,9 @@ sub simpleSubstitutions { subst("cublasSsyr2_v2", "hipblasSsyr2", "library"); subst("cublasSsyr2_v2_64", "hipblasSsyr2_64", "library"); subst("cublasSsyr2k", "hipblasSsyr2k", "library"); + subst("cublasSsyr2k_64", "hipblasSsyr2k_64", "library"); subst("cublasSsyr2k_v2", "hipblasSsyr2k", "library"); + subst("cublasSsyr2k_v2_64", "hipblasSsyr2k_64", "library"); subst("cublasSsyr_64", "hipblasSsyr_64", "library"); subst("cublasSsyr_v2", "hipblasSsyr", "library"); subst("cublasSsyr_v2_64", "hipblasSsyr_64", "library"); @@ -5023,7 +5037,9 @@ sub simpleSubstitutions { subst("cublasZsyr2_v2", "hipblasZsyr2_v2", "library"); subst("cublasZsyr2_v2_64", "hipblasZsyr2_v2_64", "library"); subst("cublasZsyr2k", "hipblasZsyr2k_v2", "library"); + subst("cublasZsyr2k_64", "hipblasZsyr2k_v2_64", "library"); subst("cublasZsyr2k_v2", "hipblasZsyr2k_v2", "library"); + subst("cublasZsyr2k_v2_64", "hipblasZsyr2k_v2_64", "library"); subst("cublasZsyr_64", "hipblasZsyr_v2_64", "library"); subst("cublasZsyr_v2", "hipblasZsyr_v2", "library"); subst("cublasZsyr_v2_64", "hipblasZsyr_v2_64", "library"); @@ -11596,8 +11612,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZtrmm_64", "cublasZtpttr", "cublasZsyrkx_64", - "cublasZsyr2k_v2_64", - "cublasZsyr2k_64", "cublasZmatinvBatched", "cublasZhemm_v2_64", "cublasZhemm_64", @@ -11625,8 +11639,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasStrmm_64", "cublasStpttr", "cublasSsyrkx_64", - "cublasSsyr2k_v2_64", - "cublasSsyr2k_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgemmGroupedBatched_64", @@ -11725,8 +11737,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDtrmm_64", "cublasDtpttr", "cublasDsyrkx_64", - "cublasDsyr2k_v2_64", - "cublasDsyr2k_64", "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -11744,8 +11754,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCsyrkEx", "cublasCsyrk3mEx_64", "cublasCsyrk3mEx", - "cublasCsyr2k_v2_64", - "cublasCsyr2k_64", "cublasCopyEx_64", "cublasCopyEx", "cublasContext", @@ -13527,8 +13535,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZtrmm_64", "cublasZtpttr", "cublasZsyrkx_64", - "cublasZsyr2k_v2_64", - "cublasZsyr2k_64", "cublasZmatinvBatched", "cublasZhemm_v2_64", "cublasZhemm_64", @@ -13550,8 +13556,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasStrmm_64", "cublasStpttr", "cublasSsyrkx_64", - "cublasSsyr2k_v2_64", - "cublasSsyr2k_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -13669,8 +13673,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDtrmm_64", "cublasDtpttr", "cublasDsyrkx_64", - "cublasDsyr2k_v2_64", - "cublasDsyr2k_64", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", @@ -13690,8 +13692,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCsyrkEx", "cublasCsyrk3mEx_64", "cublasCsyrk3mEx", - "cublasCsyr2k_v2_64", - "cublasCsyr2k_64", "cublasCopyEx_64", "cublasCopyEx", "cublasCmatinvBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 1bb0bdc8..ee71f7c8 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1329,9 +1329,9 @@ |`cublasCsymm_v2`| | | | |`hipblasCsymm_v2`|6.0.0| | | | | |`cublasCsymm_v2_64`|12.0| | | |`hipblasCsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | | -|`cublasCsyr2k_64`|12.0| | | | | | | | | | +|`cublasCsyr2k_64`|12.0| | | |`hipblasCsyr2k_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k_v2`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | | -|`cublasCsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasCsyr2k_v2_64`|12.0| | | |`hipblasCsyr2k_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyrk`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | | |`cublasCsyrk_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyrk_v2`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | | @@ -1365,9 +1365,9 @@ |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | | |`cublasDsymm_v2_64`|12.0| | | |`hipblasDsymm_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k`| | | | |`hipblasDsyr2k`|3.5.0| | | | | -|`cublasDsyr2k_64`|12.0| | | | | | | | | | +|`cublasDsyr2k_64`|12.0| | | |`hipblasDsyr2k_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k_v2`| | | | |`hipblasDsyr2k`|3.5.0| | | | | -|`cublasDsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasDsyr2k_v2_64`|12.0| | | |`hipblasDsyr2k_64`|6.3.0| | | |6.3.0| |`cublasDsyrk`| | | | |`hipblasDsyrk`|3.5.0| | | | | |`cublasDsyrk_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrk_v2`| | | | |`hipblasDsyrk`|3.5.0| | | | | @@ -1417,9 +1417,9 @@ |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | | |`cublasSsymm_v2_64`|12.0| | | |`hipblasSsymm_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k`| | | | |`hipblasSsyr2k`|3.5.0| | | | | -|`cublasSsyr2k_64`|12.0| | | | | | | | | | +|`cublasSsyr2k_64`|12.0| | | |`hipblasSsyr2k_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k_v2`| | | | |`hipblasSsyr2k`|3.5.0| | | | | -|`cublasSsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasSsyr2k_v2_64`|12.0| | | |`hipblasSsyr2k_64`|6.3.0| | | |6.3.0| |`cublasSsyrk`| | | | |`hipblasSsyrk`|3.5.0| | | | | |`cublasSsyrk_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrk_v2`| | | | |`hipblasSsyrk`|3.5.0| | | | | @@ -1475,9 +1475,9 @@ |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | | |`cublasZsymm_v2_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | -|`cublasZsyr2k_64`|12.0| | | | | | | | | | +|`cublasZsyr2k_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | | |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | -|`cublasZsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasZsyr2k_v2_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | | |`cublasZsyrk`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | |`cublasZsyrk_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 824abff6..266bf47f 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1329,9 +1329,9 @@ |`cublasCsymm_v2`| | | | |`hipblasCsymm_v2`|6.0.0| | | | |`rocblas_csymm`|3.5.0| | | | | |`cublasCsymm_v2_64`|12.0| | | |`hipblasCsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_csymm_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | |`rocblas_csyr2k`|3.5.0| | | | | -|`cublasCsyr2k_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyr2k_64`|12.0| | | |`hipblasCsyr2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyr2k_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k_v2`| | | | |`hipblasCsyr2k_v2`|6.0.0| | | | |`rocblas_csyr2k`|3.5.0| | | | | -|`cublasCsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyr2k_v2_64`|12.0| | | |`hipblasCsyr2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyr2k_64`|6.3.0| | | |6.3.0| |`cublasCsyrk`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | |`rocblas_csyrk`|3.5.0| | | | | |`cublasCsyrk_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrk_v2`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | |`rocblas_csyrk`|3.5.0| | | | | @@ -1365,9 +1365,9 @@ |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | |`cublasDsymm_v2_64`|12.0| | | |`hipblasDsymm_64`|6.3.0| | | |6.3.0|`rocblas_dsymm_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k`| | | | |`hipblasDsyr2k`|3.5.0| | | | |`rocblas_dsyr2k`|3.5.0| | | | | -|`cublasDsyr2k_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyr2k_64`|12.0| | | |`hipblasDsyr2k_64`|6.3.0| | | |6.3.0|`rocblas_dsyr2k_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k_v2`| | | | |`hipblasDsyr2k`|3.5.0| | | | |`rocblas_dsyr2k`|3.5.0| | | | | -|`cublasDsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyr2k_v2_64`|12.0| | | |`hipblasDsyr2k_64`|6.3.0| | | |6.3.0|`rocblas_dsyr2k_64`|6.3.0| | | |6.3.0| |`cublasDsyrk`| | | | |`hipblasDsyrk`|3.5.0| | | | |`rocblas_dsyrk`|3.5.0| | | | | |`cublasDsyrk_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0|`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrk_v2`| | | | |`hipblasDsyrk`|3.5.0| | | | |`rocblas_dsyrk`|3.5.0| | | | | @@ -1417,9 +1417,9 @@ |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | |`cublasSsymm_v2_64`|12.0| | | |`hipblasSsymm_64`|6.3.0| | | |6.3.0|`rocblas_ssymm_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k`| | | | |`hipblasSsyr2k`|3.5.0| | | | |`rocblas_ssyr2k`|3.5.0| | | | | -|`cublasSsyr2k_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyr2k_64`|12.0| | | |`hipblasSsyr2k_64`|6.3.0| | | |6.3.0|`rocblas_ssyr2k_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k_v2`| | | | |`hipblasSsyr2k`|3.5.0| | | | |`rocblas_ssyr2k`|3.5.0| | | | | -|`cublasSsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyr2k_v2_64`|12.0| | | |`hipblasSsyr2k_64`|6.3.0| | | |6.3.0|`rocblas_ssyr2k_64`|6.3.0| | | |6.3.0| |`cublasSsyrk`| | | | |`hipblasSsyrk`|3.5.0| | | | |`rocblas_ssyrk`|3.5.0| | | | | |`cublasSsyrk_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0|`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrk_v2`| | | | |`hipblasSsyrk`|3.5.0| | | | |`rocblas_ssyrk`|3.5.0| | | | | @@ -1475,9 +1475,9 @@ |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | |`rocblas_zsymm`|3.5.0| | | | | |`cublasZsymm_v2_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | -|`cublasZsyr2k_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyr2k_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | |`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | -|`cublasZsyr2k_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyr2k_v2_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | |`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| |`cublasZsyrk`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | |`cublasZsyrk_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 6d135850..48de2a84 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1329,9 +1329,9 @@ |`cublasCsymm_v2`| | | | |`rocblas_csymm`|3.5.0| | | | | |`cublasCsymm_v2_64`|12.0| | | |`rocblas_csymm_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k`| | | | |`rocblas_csyr2k`|3.5.0| | | | | -|`cublasCsyr2k_64`|12.0| | | | | | | | | | +|`cublasCsyr2k_64`|12.0| | | |`rocblas_csyr2k_64`|6.3.0| | | |6.3.0| |`cublasCsyr2k_v2`| | | | |`rocblas_csyr2k`|3.5.0| | | | | -|`cublasCsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasCsyr2k_v2_64`|12.0| | | |`rocblas_csyr2k_64`|6.3.0| | | |6.3.0| |`cublasCsyrk`| | | | |`rocblas_csyrk`|3.5.0| | | | | |`cublasCsyrk_64`|12.0| | | |`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrk_v2`| | | | |`rocblas_csyrk`|3.5.0| | | | | @@ -1365,9 +1365,9 @@ |`cublasDsymm_v2`| | | | |`rocblas_dsymm`|3.5.0| | | | | |`cublasDsymm_v2_64`|12.0| | | |`rocblas_dsymm_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k`| | | | |`rocblas_dsyr2k`|3.5.0| | | | | -|`cublasDsyr2k_64`|12.0| | | | | | | | | | +|`cublasDsyr2k_64`|12.0| | | |`rocblas_dsyr2k_64`|6.3.0| | | |6.3.0| |`cublasDsyr2k_v2`| | | | |`rocblas_dsyr2k`|3.5.0| | | | | -|`cublasDsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasDsyr2k_v2_64`|12.0| | | |`rocblas_dsyr2k_64`|6.3.0| | | |6.3.0| |`cublasDsyrk`| | | | |`rocblas_dsyrk`|3.5.0| | | | | |`cublasDsyrk_64`|12.0| | | |`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrk_v2`| | | | |`rocblas_dsyrk`|3.5.0| | | | | @@ -1417,9 +1417,9 @@ |`cublasSsymm_v2`| | | | |`rocblas_ssymm`|3.5.0| | | | | |`cublasSsymm_v2_64`|12.0| | | |`rocblas_ssymm_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k`| | | | |`rocblas_ssyr2k`|3.5.0| | | | | -|`cublasSsyr2k_64`|12.0| | | | | | | | | | +|`cublasSsyr2k_64`|12.0| | | |`rocblas_ssyr2k_64`|6.3.0| | | |6.3.0| |`cublasSsyr2k_v2`| | | | |`rocblas_ssyr2k`|3.5.0| | | | | -|`cublasSsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasSsyr2k_v2_64`|12.0| | | |`rocblas_ssyr2k_64`|6.3.0| | | |6.3.0| |`cublasSsyrk`| | | | |`rocblas_ssyrk`|3.5.0| | | | | |`cublasSsyrk_64`|12.0| | | |`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrk_v2`| | | | |`rocblas_ssyrk`|3.5.0| | | | | @@ -1475,9 +1475,9 @@ |`cublasZsymm_v2`| | | | |`rocblas_zsymm`|3.5.0| | | | | |`cublasZsymm_v2_64`|12.0| | | |`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`rocblas_zsyr2k`|3.5.0| | | | | -|`cublasZsyr2k_64`|12.0| | | | | | | | | | +|`cublasZsyr2k_64`|12.0| | | |`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k_v2`| | | | |`rocblas_zsyr2k`|3.5.0| | | | | -|`cublasZsyr2k_v2_64`|12.0| | | | | | | | | | +|`cublasZsyr2k_v2_64`|12.0| | | |`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| |`cublasZsyrk`| | | | |`rocblas_zsyrk`|3.5.0| | | | | |`cublasZsyrk_64`|12.0| | | |`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`rocblas_zsyrk`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 6a2b42c5..79609b64 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -493,13 +493,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYR2K {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsyr2k_64", {"hipblasSsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsyr2k_64", {"hipblasSsyr2k_64", "rocblas_ssyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsyr2k_64", {"hipblasDsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsyr2k_64", {"hipblasDsyr2k_64", "rocblas_dsyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsyr2k", {"hipblasCsyr2k_v2", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsyr2k_64", {"hipblasCsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsyr2k_64", {"hipblasCsyr2k_v2_64", "rocblas_csyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsyr2k", {"hipblasZsyr2k_v2", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsyr2k_64", {"hipblasZsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsyr2k_64", {"hipblasZsyr2k_v2_64", "rocblas_zsyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // SYRKX - eXtended SYRK {"cublasSsyrkx", {"hipblasSsyrkx", "rocblas_ssyrkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -876,13 +876,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYR2K {"cublasSsyr2k_v2", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSsyr2k_v2_64", {"hipblasSsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsyr2k_v2_64", {"hipblasSsyr2k_64", "rocblas_ssyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsyr2k_v2", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDsyr2k_v2_64", {"hipblasDsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsyr2k_v2_64", {"hipblasDsyr2k_64", "rocblas_dsyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsyr2k_v2", {"hipblasCsyr2k_v2", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCsyr2k_v2_64", {"hipblasCsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsyr2k_v2_64", {"hipblasCsyr2k_v2_64", "rocblas_csyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsyr2k_v2", {"hipblasZsyr2k_v2", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZsyr2k_v2_64", {"hipblasZsyr2k_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsyr2k_v2_64", {"hipblasZsyr2k_v2_64", "rocblas_zsyr2k_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // HER2K {"cublasCher2k_v2", {"hipblasCher2k_v2", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2052,6 +2052,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCsyrk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZsyrk_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsyr2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsyr2k_64_v2", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2469,6 +2473,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_csyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zsyrk_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ssyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_csyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 17cd3988..8be907e5 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3002,6 +3002,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2k_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* BP, int64_t ldb, const float* beta, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasSsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2k_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* BP, int64_t ldb, const double* beta, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasDsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2k_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2k_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index d89806a1..f0fdcd33 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3207,6 +3207,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyrk_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyrk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr2k_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ssyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ssyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr2k_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_dsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr2k_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_csyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_csyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr2k_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; From 95c9f4813002d42d6ca82ecbdb525cd02c046f1a Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 29 Oct 2024 18:07:58 +0000 Subject: [PATCH 28/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 8 - Fix typo --- docs/tables/CUBLAS_API_supported_by_HIP.md | 4 ++-- docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md | 4 ++-- src/CUDA2HIP_BLAS_API_functions.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index ee71f7c8..1993760e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1475,9 +1475,9 @@ |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | | |`cublasZsymm_v2_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | -|`cublasZsyr2k_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | | +|`cublasZsyr2k_64`|12.0| | | |`hipblasZsyr2k_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | | -|`cublasZsyr2k_v2_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | | +|`cublasZsyr2k_v2_64`|12.0| | | |`hipblasZsyr2k_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyrk`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | |`cublasZsyrk_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 266bf47f..dc35c007 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1475,9 +1475,9 @@ |`cublasZsymm_v2`| | | | |`hipblasZsymm_v2`|6.0.0| | | | |`rocblas_zsymm`|3.5.0| | | | | |`cublasZsymm_v2_64`|12.0| | | |`hipblasZsymm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsymm_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | -|`cublasZsyr2k_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | |`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| +|`cublasZsyr2k_64`|12.0| | | |`hipblasZsyr2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| |`cublasZsyr2k_v2`| | | | |`hipblasZsyr2k_v2`|6.0.0| | | | |`rocblas_zsyr2k`|3.5.0| | | | | -|`cublasZsyr2k_v2_64`|12.0| | | |`hipblasZsyr2k_v2_64`| | | | | |`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| +|`cublasZsyr2k_v2_64`|12.0| | | |`hipblasZsyr2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyr2k_64`|6.3.0| | | |6.3.0| |`cublasZsyrk`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | |`cublasZsyrk_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 79609b64..66307011 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -2055,7 +2055,7 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasSsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasDsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCsyr2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipblasZsyr2k_64_v2", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsyr2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, From 8f4aada0a735574ae2cadc137ca5df9c9ab85b4b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 29 Oct 2024 19:50:15 +0000 Subject: [PATCH 29/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 9 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 92 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 46 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 46 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 46 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 92 +++++++++++++++++++ 5 files changed, 322 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 36df3e77..d9354586 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12617,20 +12617,66 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_664x64", "CUBLASLT_MATMUL_TILE_656x64", "CUBLASLT_MATMUL_TILE_64x96", + "CUBLASLT_MATMUL_TILE_64x88", + "CUBLASLT_MATMUL_TILE_64x80", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x72", "CUBLASLT_MATMUL_TILE_64x704", "CUBLASLT_MATMUL_TILE_64x640", "CUBLASLT_MATMUL_TILE_64x64", "CUBLASLT_MATMUL_TILE_64x576", + "CUBLASLT_MATMUL_TILE_64x56", "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x48", "CUBLASLT_MATMUL_TILE_64x448", + "CUBLASLT_MATMUL_TILE_64x440", + "CUBLASLT_MATMUL_TILE_64x432", + "CUBLASLT_MATMUL_TILE_64x424", + "CUBLASLT_MATMUL_TILE_64x416", + "CUBLASLT_MATMUL_TILE_64x408", + "CUBLASLT_MATMUL_TILE_64x400", + "CUBLASLT_MATMUL_TILE_64x40", + "CUBLASLT_MATMUL_TILE_64x392", "CUBLASLT_MATMUL_TILE_64x384", + "CUBLASLT_MATMUL_TILE_64x376", + "CUBLASLT_MATMUL_TILE_64x368", + "CUBLASLT_MATMUL_TILE_64x360", + "CUBLASLT_MATMUL_TILE_64x352", + "CUBLASLT_MATMUL_TILE_64x344", + "CUBLASLT_MATMUL_TILE_64x336", + "CUBLASLT_MATMUL_TILE_64x328", "CUBLASLT_MATMUL_TILE_64x320", "CUBLASLT_MATMUL_TILE_64x32", + "CUBLASLT_MATMUL_TILE_64x312", + "CUBLASLT_MATMUL_TILE_64x304", + "CUBLASLT_MATMUL_TILE_64x296", + "CUBLASLT_MATMUL_TILE_64x288", + "CUBLASLT_MATMUL_TILE_64x280", + "CUBLASLT_MATMUL_TILE_64x272", + "CUBLASLT_MATMUL_TILE_64x264", "CUBLASLT_MATMUL_TILE_64x256", + "CUBLASLT_MATMUL_TILE_64x248", + "CUBLASLT_MATMUL_TILE_64x240", + "CUBLASLT_MATMUL_TILE_64x24", + "CUBLASLT_MATMUL_TILE_64x232", + "CUBLASLT_MATMUL_TILE_64x224", + "CUBLASLT_MATMUL_TILE_64x216", + "CUBLASLT_MATMUL_TILE_64x208", + "CUBLASLT_MATMUL_TILE_64x200", "CUBLASLT_MATMUL_TILE_64x192", + "CUBLASLT_MATMUL_TILE_64x184", + "CUBLASLT_MATMUL_TILE_64x176", + "CUBLASLT_MATMUL_TILE_64x168", + "CUBLASLT_MATMUL_TILE_64x160", + "CUBLASLT_MATMUL_TILE_64x16", + "CUBLASLT_MATMUL_TILE_64x152", + "CUBLASLT_MATMUL_TILE_64x144", + "CUBLASLT_MATMUL_TILE_64x136", "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_64x120", + "CUBLASLT_MATMUL_TILE_64x112", + "CUBLASLT_MATMUL_TILE_64x104", "CUBLASLT_MATMUL_TILE_648x64", "CUBLASLT_MATMUL_TILE_640x64", "CUBLASLT_MATMUL_TILE_632x64", @@ -14308,20 +14354,66 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_664x64", "CUBLASLT_MATMUL_TILE_656x64", "CUBLASLT_MATMUL_TILE_64x96", + "CUBLASLT_MATMUL_TILE_64x88", + "CUBLASLT_MATMUL_TILE_64x80", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x72", "CUBLASLT_MATMUL_TILE_64x704", "CUBLASLT_MATMUL_TILE_64x640", "CUBLASLT_MATMUL_TILE_64x64", "CUBLASLT_MATMUL_TILE_64x576", + "CUBLASLT_MATMUL_TILE_64x56", "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x48", "CUBLASLT_MATMUL_TILE_64x448", + "CUBLASLT_MATMUL_TILE_64x440", + "CUBLASLT_MATMUL_TILE_64x432", + "CUBLASLT_MATMUL_TILE_64x424", + "CUBLASLT_MATMUL_TILE_64x416", + "CUBLASLT_MATMUL_TILE_64x408", + "CUBLASLT_MATMUL_TILE_64x400", + "CUBLASLT_MATMUL_TILE_64x40", + "CUBLASLT_MATMUL_TILE_64x392", "CUBLASLT_MATMUL_TILE_64x384", + "CUBLASLT_MATMUL_TILE_64x376", + "CUBLASLT_MATMUL_TILE_64x368", + "CUBLASLT_MATMUL_TILE_64x360", + "CUBLASLT_MATMUL_TILE_64x352", + "CUBLASLT_MATMUL_TILE_64x344", + "CUBLASLT_MATMUL_TILE_64x336", + "CUBLASLT_MATMUL_TILE_64x328", "CUBLASLT_MATMUL_TILE_64x320", "CUBLASLT_MATMUL_TILE_64x32", + "CUBLASLT_MATMUL_TILE_64x312", + "CUBLASLT_MATMUL_TILE_64x304", + "CUBLASLT_MATMUL_TILE_64x296", + "CUBLASLT_MATMUL_TILE_64x288", + "CUBLASLT_MATMUL_TILE_64x280", + "CUBLASLT_MATMUL_TILE_64x272", + "CUBLASLT_MATMUL_TILE_64x264", "CUBLASLT_MATMUL_TILE_64x256", + "CUBLASLT_MATMUL_TILE_64x248", + "CUBLASLT_MATMUL_TILE_64x240", + "CUBLASLT_MATMUL_TILE_64x24", + "CUBLASLT_MATMUL_TILE_64x232", + "CUBLASLT_MATMUL_TILE_64x224", + "CUBLASLT_MATMUL_TILE_64x216", + "CUBLASLT_MATMUL_TILE_64x208", + "CUBLASLT_MATMUL_TILE_64x200", "CUBLASLT_MATMUL_TILE_64x192", + "CUBLASLT_MATMUL_TILE_64x184", + "CUBLASLT_MATMUL_TILE_64x176", + "CUBLASLT_MATMUL_TILE_64x168", + "CUBLASLT_MATMUL_TILE_64x160", + "CUBLASLT_MATMUL_TILE_64x16", + "CUBLASLT_MATMUL_TILE_64x152", + "CUBLASLT_MATMUL_TILE_64x144", + "CUBLASLT_MATMUL_TILE_64x136", "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_64x120", + "CUBLASLT_MATMUL_TILE_64x112", + "CUBLASLT_MATMUL_TILE_64x104", "CUBLASLT_MATMUL_TILE_648x64", "CUBLASLT_MATMUL_TILE_640x64", "CUBLASLT_MATMUL_TILE_632x64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 1993760e..53a9eb0c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -552,20 +552,66 @@ |`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x232`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x248`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x264`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x272`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x280`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x288`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x296`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x304`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x312`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x32`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x328`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x336`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x344`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x352`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x360`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x368`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x376`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x392`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x400`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x408`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x416`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x424`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x432`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x440`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x48`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x72`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x88`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_656x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_664x64`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index dc35c007..d8944098 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -552,20 +552,66 @@ |`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x152`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x160`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x168`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x176`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x184`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x200`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x208`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x216`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x224`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x232`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x240`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x248`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x264`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x272`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x280`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x288`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x296`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x304`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x312`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x32`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x328`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x336`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x344`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x352`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x360`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x368`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x376`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x392`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x400`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x408`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x416`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x424`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x432`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x440`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x48`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x72`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x88`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_656x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_664x64`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 48de2a84..fbfe9e72 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -552,20 +552,66 @@ |`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x232`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x248`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x256`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x264`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x272`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x280`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x288`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x296`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x304`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x312`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x32`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x328`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x336`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x344`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x352`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x360`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x368`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x376`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x392`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x400`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x408`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x416`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x424`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x432`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x440`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x48`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x72`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x88`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x96`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_656x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_664x64`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index b4c7aab3..d77ed8eb 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -515,6 +515,52 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_752x64", {"HIPBLASLT_MATMUL_TILE_752x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_760x64", {"HIPBLASLT_MATMUL_TILE_760x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_768x64", {"HIPBLASLT_MATMUL_TILE_768x64", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x16", {"HIPBLASLT_MATMUL_TILE_64x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x24", {"HIPBLASLT_MATMUL_TILE_64x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x40", {"HIPBLASLT_MATMUL_TILE_64x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x48", {"HIPBLASLT_MATMUL_TILE_64x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x56", {"HIPBLASLT_MATMUL_TILE_64x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x72", {"HIPBLASLT_MATMUL_TILE_64x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x80", {"HIPBLASLT_MATMUL_TILE_64x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x88", {"HIPBLASLT_MATMUL_TILE_64x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x104", {"HIPBLASLT_MATMUL_TILE_64x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x112", {"HIPBLASLT_MATMUL_TILE_64x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x120", {"HIPBLASLT_MATMUL_TILE_64x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x136", {"HIPBLASLT_MATMUL_TILE_64x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x144", {"HIPBLASLT_MATMUL_TILE_64x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x152", {"HIPBLASLT_MATMUL_TILE_64x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x160", {"HIPBLASLT_MATMUL_TILE_64x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x168", {"HIPBLASLT_MATMUL_TILE_64x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x176", {"HIPBLASLT_MATMUL_TILE_64x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x184", {"HIPBLASLT_MATMUL_TILE_64x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x200", {"HIPBLASLT_MATMUL_TILE_64x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x208", {"HIPBLASLT_MATMUL_TILE_64x208", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x216", {"HIPBLASLT_MATMUL_TILE_64x216", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x224", {"HIPBLASLT_MATMUL_TILE_64x224", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x232", {"HIPBLASLT_MATMUL_TILE_64x232", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x240", {"HIPBLASLT_MATMUL_TILE_64x240", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x248", {"HIPBLASLT_MATMUL_TILE_64x248", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x264", {"HIPBLASLT_MATMUL_TILE_64x264", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x272", {"HIPBLASLT_MATMUL_TILE_64x272", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x280", {"HIPBLASLT_MATMUL_TILE_64x280", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x288", {"HIPBLASLT_MATMUL_TILE_64x288", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x296", {"HIPBLASLT_MATMUL_TILE_64x296", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x304", {"HIPBLASLT_MATMUL_TILE_64x304", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x312", {"HIPBLASLT_MATMUL_TILE_64x312", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x328", {"HIPBLASLT_MATMUL_TILE_64x328", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x336", {"HIPBLASLT_MATMUL_TILE_64x336", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x344", {"HIPBLASLT_MATMUL_TILE_64x344", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x352", {"HIPBLASLT_MATMUL_TILE_64x352", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x360", {"HIPBLASLT_MATMUL_TILE_64x360", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x368", {"HIPBLASLT_MATMUL_TILE_64x368", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x376", {"HIPBLASLT_MATMUL_TILE_64x376", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x392", {"HIPBLASLT_MATMUL_TILE_64x392", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x400", {"HIPBLASLT_MATMUL_TILE_64x400", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x408", {"HIPBLASLT_MATMUL_TILE_64x408", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x416", {"HIPBLASLT_MATMUL_TILE_64x416", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x424", {"HIPBLASLT_MATMUL_TILE_64x424", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x432", {"HIPBLASLT_MATMUL_TILE_64x432", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x440", {"HIPBLASLT_MATMUL_TILE_64x440", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1447,6 +1493,52 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_752x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_760x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_768x64", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x152", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x160", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x168", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x176", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x184", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x200", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x208", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x216", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x224", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x232", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x240", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x248", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x264", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x272", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x280", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x288", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x296", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x304", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x312", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x328", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x336", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x344", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x352", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x360", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x368", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x376", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x392", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x400", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x408", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x416", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x424", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x432", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x440", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 7c0813e16e8962cb36d811b28db0641930b592ff Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 30 Oct 2024 18:16:35 +0000 Subject: [PATCH 30/51] [HIPIFY][MIOpen][fix] Get rid of `miopenAcceleratorQueue_t` + Use `hipStream_t` instead + [Reason] `miopen.h`: typedef hipStream_t miopenAcceleratorQueue_t; + Get rid of `ROC_MIOPEN_ONLY` flag, too + [Reason] `ROC_MIOPEN_ONLY` was used only for `cudaStream_t` to `miopenAcceleratorQueue_t` hipification (if the `--miopen` option is used) + Now `cudaStream_t` is always hipified to `hipStream_t` for both ROC and HIP targets --- src/CUDA2HIP_Runtime_API_types.cpp | 2 +- src/Statistics.cpp | 6 +----- src/Statistics.h | 5 +---- tests/unit_tests/synthetic/libraries/cudnn2miopen.cu | 2 +- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index b260a7f5..dc0aebc6 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -191,7 +191,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // the same - CUstream_st {"CUstream_st", {"ihipStream_t", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, // CUstream - {"cudaStream_t", {"hipStream_t", "miopenAcceleratorQueue_t", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, ROC_MIOPEN_ONLY}}, + {"cudaStream_t", {"hipStream_t", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, // CUfunction {"cudaFunction_t", {"hipFunction_t", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 97f76c57..ba625fab 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -367,7 +367,7 @@ void Statistics::setActive(const std::string &name) { bool Statistics::isToRoc(const hipCounter &counter) { return (counter.apiType == API_BLAS || counter.apiType == API_DNN || counter.apiType == API_SPARSE || counter.apiType == API_SOLVER || counter.apiType == API_RUNTIME || counter.apiType == API_COMPLEX || counter.apiType == API_RAND) && - ((TranslateToRoc && !TranslateToMIOpen && !isRocMiopenOnly(counter)) || TranslateToMIOpen); + ((TranslateToRoc && !TranslateToMIOpen) || TranslateToMIOpen); } bool Statistics::isHipExperimental(const hipCounter &counter) { @@ -438,10 +438,6 @@ bool Statistics::isHipSupportedV2Only(const hipCounter &counter) { return HIP_SUPPORTED_V2_ONLY == (counter.supportDegree & HIP_SUPPORTED_V2_ONLY); } -bool Statistics::isRocMiopenOnly(const hipCounter &counter) { - return ROC_MIOPEN_ONLY == (counter.supportDegree & ROC_MIOPEN_ONLY); -} - bool Statistics::isCudaOverloaded(const hipCounter &counter) { return CUDA_OVERLOADED == (counter.supportDegree & CUDA_OVERLOADED); } diff --git a/src/Statistics.h b/src/Statistics.h index fa114715..0206c450 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -197,8 +197,7 @@ enum SupportDegree { REMOVED = 0x400, HIP_EXPERIMENTAL = 0x800, HIP_SUPPORTED_V2_ONLY = 0x1000, - ROC_MIOPEN_ONLY = 0x2000, - CUDA_OVERLOADED = 0x4000 + CUDA_OVERLOADED = 0x2000 }; enum cudaVersions { @@ -503,8 +502,6 @@ class Statistics { static bool isRemoved(const hipCounter &counter); // Check whether the counter is HIP_SUPPORTED_V2_ONLY or not. static bool isHipSupportedV2Only(const hipCounter &counter); - // Check whether the counter is ROC_MIOPEN_ONLY or not. - static bool isRocMiopenOnly(const hipCounter &counter); // Check whether the counter is CUDA_OVERLOADED or not. static bool isCudaOverloaded(const hipCounter &counter); // Get string CUDA version. diff --git a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu index 98aa2b92..366a5bd4 100644 --- a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu +++ b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu @@ -56,7 +56,7 @@ int main() { // CHECK: const_ch = miopenGetErrorString(status); const_ch = cudnnGetErrorString(status); - // CHECK: miopenAcceleratorQueue_t streamId; + // CHECK: hipStream_t streamId; cudaStream_t streamId; // CUDA: cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId); From 9a6cc55d04e3cd7a8cd4da9a7aaca499bbb4ed55 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 30 Oct 2024 18:41:52 +0000 Subject: [PATCH 31/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 9 + `rocblas_(s|d|c|z)syrkx_64` and `hipblas(S|D|C|Z)syrkx(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++++-------- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 ++++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 ++++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 ++++---- src/CUDA2HIP_BLAS_API_functions.cpp | 16 +++++++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 20 +++++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 20 +++++++++++++++++++ 7 files changed, 72 insertions(+), 24 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index d9354586..88f74bff 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1715,6 +1715,7 @@ sub rocSubstitutions { subst("cublasCsyrk_v2", "rocblas_csyrk", "library"); subst("cublasCsyrk_v2_64", "rocblas_csyrk_64", "library"); subst("cublasCsyrkx", "rocblas_csyrkx", "library"); + subst("cublasCsyrkx_64", "rocblas_csyrkx_64", "library"); subst("cublasCtbmv", "rocblas_ctbmv", "library"); subst("cublasCtbmv_64", "rocblas_ctbmv_64", "library"); subst("cublasCtbmv_v2", "rocblas_ctbmv", "library"); @@ -1860,6 +1861,7 @@ sub rocSubstitutions { subst("cublasDsyrk_v2", "rocblas_dsyrk", "library"); subst("cublasDsyrk_v2_64", "rocblas_dsyrk_64", "library"); subst("cublasDsyrkx", "rocblas_dsyrkx", "library"); + subst("cublasDsyrkx_64", "rocblas_dsyrkx_64", "library"); subst("cublasDtbmv", "rocblas_dtbmv", "library"); subst("cublasDtbmv_64", "rocblas_dtbmv_64", "library"); subst("cublasDtbmv_v2", "rocblas_dtbmv", "library"); @@ -2092,6 +2094,7 @@ sub rocSubstitutions { subst("cublasSsyrk_v2", "rocblas_ssyrk", "library"); subst("cublasSsyrk_v2_64", "rocblas_ssyrk_64", "library"); subst("cublasSsyrkx", "rocblas_ssyrkx", "library"); + subst("cublasSsyrkx_64", "rocblas_ssyrkx_64", "library"); subst("cublasStbmv", "rocblas_stbmv", "library"); subst("cublasStbmv_64", "rocblas_stbmv_64", "library"); subst("cublasStbmv_v2", "rocblas_stbmv", "library"); @@ -2265,6 +2268,7 @@ sub rocSubstitutions { subst("cublasZsyrk_v2", "rocblas_zsyrk", "library"); subst("cublasZsyrk_v2_64", "rocblas_zsyrk_64", "library"); subst("cublasZsyrkx", "rocblas_zsyrkx", "library"); + subst("cublasZsyrkx_64", "rocblas_zsyrkx_64", "library"); subst("cublasZtbmv", "rocblas_ztbmv", "library"); subst("cublasZtbmv_64", "rocblas_ztbmv_64", "library"); subst("cublasZtbmv_v2", "rocblas_ztbmv", "library"); @@ -4493,6 +4497,7 @@ sub simpleSubstitutions { subst("cublasCsyrk_v2", "hipblasCsyrk_v2", "library"); subst("cublasCsyrk_v2_64", "hipblasCsyrk_v2_64", "library"); subst("cublasCsyrkx", "hipblasCsyrkx_v2", "library"); + subst("cublasCsyrkx_64", "hipblasCsyrkx_v2_64", "library"); subst("cublasCtbmv", "hipblasCtbmv_v2", "library"); subst("cublasCtbmv_64", "hipblasCtbmv_v2_64", "library"); subst("cublasCtbmv_v2", "hipblasCtbmv_v2", "library"); @@ -4639,6 +4644,7 @@ sub simpleSubstitutions { subst("cublasDsyrk_v2", "hipblasDsyrk", "library"); subst("cublasDsyrk_v2_64", "hipblasDsyrk_64", "library"); subst("cublasDsyrkx", "hipblasDsyrkx", "library"); + subst("cublasDsyrkx_64", "hipblasDsyrkx_64", "library"); subst("cublasDtbmv", "hipblasDtbmv", "library"); subst("cublasDtbmv_64", "hipblasDtbmv_64", "library"); subst("cublasDtbmv_v2", "hipblasDtbmv", "library"); @@ -4882,6 +4888,7 @@ sub simpleSubstitutions { subst("cublasSsyrk_v2", "hipblasSsyrk", "library"); subst("cublasSsyrk_v2_64", "hipblasSsyrk_64", "library"); subst("cublasSsyrkx", "hipblasSsyrkx", "library"); + subst("cublasSsyrkx_64", "hipblasSsyrkx_64", "library"); subst("cublasStbmv", "hipblasStbmv", "library"); subst("cublasStbmv_64", "hipblasStbmv_64", "library"); subst("cublasStbmv_v2", "hipblasStbmv", "library"); @@ -5048,6 +5055,7 @@ sub simpleSubstitutions { subst("cublasZsyrk_v2", "hipblasZsyrk_v2", "library"); subst("cublasZsyrk_v2_64", "hipblasZsyrk_v2_64", "library"); subst("cublasZsyrkx", "hipblasZsyrkx_v2", "library"); + subst("cublasZsyrkx_64", "hipblasZsyrkx_v2_64", "library"); subst("cublasZtbmv", "hipblasZtbmv_v2", "library"); subst("cublasZtbmv_64", "hipblasZtbmv_v2_64", "library"); subst("cublasZtbmv_v2", "hipblasZtbmv_v2", "library"); @@ -11611,7 +11619,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", - "cublasZsyrkx_64", "cublasZmatinvBatched", "cublasZhemm_v2_64", "cublasZhemm_64", @@ -11638,7 +11645,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", - "cublasSsyrkx_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgemmGroupedBatched_64", @@ -11736,7 +11742,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", - "cublasDsyrkx_64", "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -11749,7 +11754,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", - "cublasCsyrkx_64", "cublasCsyrkEx_64", "cublasCsyrkEx", "cublasCsyrk3mEx_64", @@ -13580,7 +13584,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", - "cublasZsyrkx_64", "cublasZmatinvBatched", "cublasZhemm_v2_64", "cublasZhemm_64", @@ -13601,7 +13604,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", - "cublasSsyrkx_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -13718,7 +13720,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", - "cublasDsyrkx_64", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", @@ -13733,7 +13734,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", - "cublasCsyrkx_64", "cublasCsyrkEx_64", "cublasCsyrkEx", "cublasCsyrk3mEx_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 53a9eb0c..87b2663f 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1383,7 +1383,7 @@ |`cublasCsyrk_v2`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | | |`cublasCsyrk_v2_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | | -|`cublasCsyrkx_64`|12.0| | | | | | | | | | +|`cublasCsyrkx_64`|12.0| | | |`hipblasCsyrkx_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | |`cublasCtrmm_64`|12.0| | | | | | | | | | |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | @@ -1419,7 +1419,7 @@ |`cublasDsyrk_v2`| | | | |`hipblasDsyrk`|3.5.0| | | | | |`cublasDsyrk_v2_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | | -|`cublasDsyrkx_64`|12.0| | | | | | | | | | +|`cublasDsyrkx_64`|12.0| | | |`hipblasDsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | |`cublasDtrmm_64`|12.0| | | | | | | | | | |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | @@ -1471,7 +1471,7 @@ |`cublasSsyrk_v2`| | | | |`hipblasSsyrk`|3.5.0| | | | | |`cublasSsyrk_v2_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | | -|`cublasSsyrkx_64`|12.0| | | | | | | | | | +|`cublasSsyrkx_64`|12.0| | | |`hipblasSsyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | |`cublasStrmm_64`|12.0| | | | | | | | | | |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | @@ -1529,7 +1529,7 @@ |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | | |`cublasZsyrk_v2_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0| |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | | -|`cublasZsyrkx_64`|12.0| | | | | | | | | | +|`cublasZsyrkx_64`|12.0| | | |`hipblasZsyrkx_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | |`cublasZtrmm_64`|12.0| | | | | | | | | | |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index d8944098..beeaca84 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1383,7 +1383,7 @@ |`cublasCsyrk_v2`| | | | |`hipblasCsyrk_v2`|6.0.0| | | | |`rocblas_csyrk`|3.5.0| | | | | |`cublasCsyrk_v2_64`|12.0| | | |`hipblasCsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | |`rocblas_csyrkx`|3.5.0| | | | | -|`cublasCsyrkx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyrkx_64`|12.0| | | |`hipblasCsyrkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrkx_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | @@ -1419,7 +1419,7 @@ |`cublasDsyrk_v2`| | | | |`hipblasDsyrk`|3.5.0| | | | |`rocblas_dsyrk`|3.5.0| | | | | |`cublasDsyrk_v2_64`|12.0| | | |`hipblasDsyrk_64`|6.3.0| | | |6.3.0|`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | |`rocblas_dsyrkx`|3.5.0| | | | | -|`cublasDsyrkx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyrkx_64`|12.0| | | |`hipblasDsyrkx_64`|6.3.0| | | |6.3.0|`rocblas_dsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | @@ -1471,7 +1471,7 @@ |`cublasSsyrk_v2`| | | | |`hipblasSsyrk`|3.5.0| | | | |`rocblas_ssyrk`|3.5.0| | | | | |`cublasSsyrk_v2_64`|12.0| | | |`hipblasSsyrk_64`|6.3.0| | | |6.3.0|`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | |`rocblas_ssyrkx`|3.5.0| | | | | -|`cublasSsyrkx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyrkx_64`|12.0| | | |`hipblasSsyrkx_64`|6.3.0| | | |6.3.0|`rocblas_ssyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | @@ -1529,7 +1529,7 @@ |`cublasZsyrk_v2`| | | | |`hipblasZsyrk_v2`|6.0.0| | | | |`rocblas_zsyrk`|3.5.0| | | | | |`cublasZsyrk_v2_64`|12.0| | | |`hipblasZsyrk_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | |`rocblas_zsyrkx`|3.5.0| | | | | -|`cublasZsyrkx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyrkx_64`|12.0| | | |`hipblasZsyrkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrkx_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index fbfe9e72..b40f0993 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1383,7 +1383,7 @@ |`cublasCsyrk_v2`| | | | |`rocblas_csyrk`|3.5.0| | | | | |`cublasCsyrk_v2_64`|12.0| | | |`rocblas_csyrk_64`|6.3.0| | | |6.3.0| |`cublasCsyrkx`| | | | |`rocblas_csyrkx`|3.5.0| | | | | -|`cublasCsyrkx_64`|12.0| | | | | | | | | | +|`cublasCsyrkx_64`|12.0| | | |`rocblas_csyrkx_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_64`|12.0| | | | | | | | | | |`cublasCtrmm_v2`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | @@ -1419,7 +1419,7 @@ |`cublasDsyrk_v2`| | | | |`rocblas_dsyrk`|3.5.0| | | | | |`cublasDsyrk_v2_64`|12.0| | | |`rocblas_dsyrk_64`|6.3.0| | | |6.3.0| |`cublasDsyrkx`| | | | |`rocblas_dsyrkx`|3.5.0| | | | | -|`cublasDsyrkx_64`|12.0| | | | | | | | | | +|`cublasDsyrkx_64`|12.0| | | |`rocblas_dsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_64`|12.0| | | | | | | | | | |`cublasDtrmm_v2`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | @@ -1471,7 +1471,7 @@ |`cublasSsyrk_v2`| | | | |`rocblas_ssyrk`|3.5.0| | | | | |`cublasSsyrk_v2_64`|12.0| | | |`rocblas_ssyrk_64`|6.3.0| | | |6.3.0| |`cublasSsyrkx`| | | | |`rocblas_ssyrkx`|3.5.0| | | | | -|`cublasSsyrkx_64`|12.0| | | | | | | | | | +|`cublasSsyrkx_64`|12.0| | | |`rocblas_ssyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_64`|12.0| | | | | | | | | | |`cublasStrmm_v2`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | @@ -1529,7 +1529,7 @@ |`cublasZsyrk_v2`| | | | |`rocblas_zsyrk`|3.5.0| | | | | |`cublasZsyrk_v2_64`|12.0| | | |`rocblas_zsyrk_64`|6.3.0| | | |6.3.0| |`cublasZsyrkx`| | | | |`rocblas_zsyrkx`|3.5.0| | | | | -|`cublasZsyrkx_64`|12.0| | | | | | | | | | +|`cublasZsyrkx_64`|12.0| | | |`rocblas_zsyrkx_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_64`|12.0| | | | | | | | | | |`cublasZtrmm_v2`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 66307011..c015029e 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -503,13 +503,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYRKX - eXtended SYRK {"cublasSsyrkx", {"hipblasSsyrkx", "rocblas_ssyrkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSsyrkx_64", {"hipblasSsyrkx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSsyrkx_64", {"hipblasSsyrkx_64", "rocblas_ssyrkx_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDsyrkx", {"hipblasDsyrkx", "rocblas_dsyrkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDsyrkx_64", {"hipblasDsyrkx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDsyrkx_64", {"hipblasDsyrkx_64", "rocblas_dsyrkx_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCsyrkx", {"hipblasCsyrkx_v2", "rocblas_csyrkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCsyrkx_64", {"hipblasCsyrkx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCsyrkx_64", {"hipblasCsyrkx_v2_64", "rocblas_csyrkx_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZsyrkx", {"hipblasZsyrkx_v2", "rocblas_zsyrkx", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZsyrkx_64", {"hipblasZsyrkx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZsyrkx_64", {"hipblasZsyrkx_v2_64", "rocblas_zsyrkx_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // HER2K {"cublasCher2k", {"hipblasCher2k_v2", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -2056,6 +2056,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCsyr2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZsyr2k_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsyrkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsyrkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2477,6 +2481,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_csyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zsyr2k_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ssyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_csyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 8be907e5..39da4a13 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3030,6 +3030,26 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrkx_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* BP, int64_t ldb, const float* beta, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrkx_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* BP, int64_t ldb, const double* beta, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCsyrkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZsyrkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index f0fdcd33..fbe7bed6 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3235,6 +3235,26 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyr2k_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyr2k_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyrkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ssyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + blasStatus = cublasSsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &fa, &fA, lda_64, &fB, ldb_64, &fb, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyrkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + blasStatus = cublasDsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dA, lda_64, &dB, ldb_64, &db, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyrkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_csyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasCsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; From 4294a89c33f9846df64e631f9c01de86aef4145e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 31 Oct 2024 12:42:46 +0000 Subject: [PATCH 32/51] [HIPIFY][tests][build] Python `3.13.0` is supported, Python < `3.0.0` is out of support --- CMakeLists.txt | 4 ++-- docs/hipify-clang.rst | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c982e70..689d7328 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -258,9 +258,9 @@ if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) set (HIPIFY_CLANG_RES "${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}") if(${CMAKE_VERSION} VERSION_LESS "3.27.0") - find_package(PythonInterp 2.7 REQUIRED) + find_package(PythonInterp 3.0 REQUIRED) else() - find_package(Python 2.7...3.13 REQUIRED) + find_package(Python 3.0...3.14 REQUIRED) endif() function (require_program PROGRAM_NAME) diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index cc3fd808..010cef86 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -566,7 +566,7 @@ LLVM >= 10.0.0 -DCUDA_CUB_ROOT_DIR=D:/CUDA/CUB -6. Install `Python `_ version 2.7 or greater. +6. Install `Python `_ version 3.0 or greater. 7. Install ``lit`` and ``FileCheck``; these are distributed with LLVM. @@ -648,7 +648,7 @@ Minimum build system requirements for the above configurations: Recommended build system requirements: -* CMake 3.30.4, GNU C/C++ 13.2, Python 3.12.7. +* CMake 3.30.4, GNU C/C++ 13.2, Python 3.13.0. Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01``: @@ -692,7 +692,7 @@ The corresponding successful output is: -- - Binary path : /usr/llvm/19.1.2/dist/bin -- Linker detection: GNU ld -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: /usr/bin/python3.12 (found version "3.12.7") found components: Interpreter + -- Found Python: /usr/bin/python3.13 (found version "3.13.0") found components: Interpreter -- Found lit: /usr/local/bin/lit -- Found FileCheck: /GIT/LLVM/trunk/dist/FileCheck -- Initial CUDA to configure: @@ -728,7 +728,7 @@ The corresponding successful output is: x86_64 - Platform architecture Linux 6.5.0-15-generic - Platform OS 64 - hipify-clang binary bitness - 64 - python 3.12.7 binary bitness + 64 - python 3.13.0 binary bitness =============================================================== -- Testing: 106 tests, 12 threads -- Testing Time: 6.91s @@ -823,13 +823,13 @@ Tested configurations: - ``8.0.5 - 9.5.0`` - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` - - ``3.12.7`` + - ``3.13.0`` * - ``19.1.0 - 19.1.2`` - ``7.0 - 12.6.1`` - ``8.0.5 - 9.5.0`` - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` - - ``3.12.7`` + - ``3.13.0`` :sup:`5` LLVM 14.x.x is the latest major release supporting Visual Studio 2017. @@ -888,8 +888,8 @@ The corresponding successful output is: -- - LLVM Include path : D:/LLVM/19.1.2/dist/include -- - Binary path : D:/LLVM/19.1.2/dist/bin -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.7") found components: Interpreter - -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python312/Scripts/lit.exe + -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python313/python.exe (found version "3.13.0") found components: Interpreter + -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python313/Scripts/lit.exe -- Found FileCheck: D:/LLVM/19.1.2/dist/bin/FileCheck.exe -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 From 38be7d4b626dd029f126e6747b800c4ad81dca8d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 31 Oct 2024 13:10:15 +0000 Subject: [PATCH 33/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 10 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 96 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 48 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 48 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 48 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 96 +++++++++++++++++++ 5 files changed, 336 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 88f74bff..bf72c2d4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12625,14 +12625,49 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x80", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x760", + "CUBLASLT_MATMUL_TILE_64x752", + "CUBLASLT_MATMUL_TILE_64x744", + "CUBLASLT_MATMUL_TILE_64x736", + "CUBLASLT_MATMUL_TILE_64x728", + "CUBLASLT_MATMUL_TILE_64x720", "CUBLASLT_MATMUL_TILE_64x72", + "CUBLASLT_MATMUL_TILE_64x712", "CUBLASLT_MATMUL_TILE_64x704", + "CUBLASLT_MATMUL_TILE_64x696", + "CUBLASLT_MATMUL_TILE_64x688", + "CUBLASLT_MATMUL_TILE_64x680", + "CUBLASLT_MATMUL_TILE_64x672", + "CUBLASLT_MATMUL_TILE_64x664", + "CUBLASLT_MATMUL_TILE_64x656", + "CUBLASLT_MATMUL_TILE_64x648", "CUBLASLT_MATMUL_TILE_64x640", "CUBLASLT_MATMUL_TILE_64x64", + "CUBLASLT_MATMUL_TILE_64x632", + "CUBLASLT_MATMUL_TILE_64x624", + "CUBLASLT_MATMUL_TILE_64x616", + "CUBLASLT_MATMUL_TILE_64x608", + "CUBLASLT_MATMUL_TILE_64x600", + "CUBLASLT_MATMUL_TILE_64x592", + "CUBLASLT_MATMUL_TILE_64x584", "CUBLASLT_MATMUL_TILE_64x576", + "CUBLASLT_MATMUL_TILE_64x568", + "CUBLASLT_MATMUL_TILE_64x560", "CUBLASLT_MATMUL_TILE_64x56", + "CUBLASLT_MATMUL_TILE_64x552", + "CUBLASLT_MATMUL_TILE_64x544", + "CUBLASLT_MATMUL_TILE_64x536", + "CUBLASLT_MATMUL_TILE_64x528", + "CUBLASLT_MATMUL_TILE_64x520", "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x504", + "CUBLASLT_MATMUL_TILE_64x496", + "CUBLASLT_MATMUL_TILE_64x488", + "CUBLASLT_MATMUL_TILE_64x480", "CUBLASLT_MATMUL_TILE_64x48", + "CUBLASLT_MATMUL_TILE_64x472", + "CUBLASLT_MATMUL_TILE_64x464", + "CUBLASLT_MATMUL_TILE_64x456", "CUBLASLT_MATMUL_TILE_64x448", "CUBLASLT_MATMUL_TILE_64x440", "CUBLASLT_MATMUL_TILE_64x432", @@ -12884,14 +12919,27 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_136x192", "CUBLASLT_MATMUL_TILE_136x128", "CUBLASLT_MATMUL_TILE_128x96", + "CUBLASLT_MATMUL_TILE_128x88", + "CUBLASLT_MATMUL_TILE_128x80", + "CUBLASLT_MATMUL_TILE_128x8", + "CUBLASLT_MATMUL_TILE_128x72", "CUBLASLT_MATMUL_TILE_128x64", + "CUBLASLT_MATMUL_TILE_128x56", + "CUBLASLT_MATMUL_TILE_128x48", + "CUBLASLT_MATMUL_TILE_128x40", "CUBLASLT_MATMUL_TILE_128x384", "CUBLASLT_MATMUL_TILE_128x320", "CUBLASLT_MATMUL_TILE_128x32", "CUBLASLT_MATMUL_TILE_128x256", + "CUBLASLT_MATMUL_TILE_128x24", "CUBLASLT_MATMUL_TILE_128x192", "CUBLASLT_MATMUL_TILE_128x160", + "CUBLASLT_MATMUL_TILE_128x16", + "CUBLASLT_MATMUL_TILE_128x136", "CUBLASLT_MATMUL_TILE_128x128", + "CUBLASLT_MATMUL_TILE_128x120", + "CUBLASLT_MATMUL_TILE_128x112", + "CUBLASLT_MATMUL_TILE_128x104", "CUBLASLT_MATMUL_TILE_120x64", "CUBLASLT_MATMUL_TILE_120x384", "CUBLASLT_MATMUL_TILE_120x320", @@ -14358,14 +14406,49 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x80", "CUBLASLT_MATMUL_TILE_64x8", "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x760", + "CUBLASLT_MATMUL_TILE_64x752", + "CUBLASLT_MATMUL_TILE_64x744", + "CUBLASLT_MATMUL_TILE_64x736", + "CUBLASLT_MATMUL_TILE_64x728", + "CUBLASLT_MATMUL_TILE_64x720", "CUBLASLT_MATMUL_TILE_64x72", + "CUBLASLT_MATMUL_TILE_64x712", "CUBLASLT_MATMUL_TILE_64x704", + "CUBLASLT_MATMUL_TILE_64x696", + "CUBLASLT_MATMUL_TILE_64x688", + "CUBLASLT_MATMUL_TILE_64x680", + "CUBLASLT_MATMUL_TILE_64x672", + "CUBLASLT_MATMUL_TILE_64x664", + "CUBLASLT_MATMUL_TILE_64x656", + "CUBLASLT_MATMUL_TILE_64x648", "CUBLASLT_MATMUL_TILE_64x640", "CUBLASLT_MATMUL_TILE_64x64", + "CUBLASLT_MATMUL_TILE_64x632", + "CUBLASLT_MATMUL_TILE_64x624", + "CUBLASLT_MATMUL_TILE_64x616", + "CUBLASLT_MATMUL_TILE_64x608", + "CUBLASLT_MATMUL_TILE_64x600", + "CUBLASLT_MATMUL_TILE_64x592", + "CUBLASLT_MATMUL_TILE_64x584", "CUBLASLT_MATMUL_TILE_64x576", + "CUBLASLT_MATMUL_TILE_64x568", + "CUBLASLT_MATMUL_TILE_64x560", "CUBLASLT_MATMUL_TILE_64x56", + "CUBLASLT_MATMUL_TILE_64x552", + "CUBLASLT_MATMUL_TILE_64x544", + "CUBLASLT_MATMUL_TILE_64x536", + "CUBLASLT_MATMUL_TILE_64x528", + "CUBLASLT_MATMUL_TILE_64x520", "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x504", + "CUBLASLT_MATMUL_TILE_64x496", + "CUBLASLT_MATMUL_TILE_64x488", + "CUBLASLT_MATMUL_TILE_64x480", "CUBLASLT_MATMUL_TILE_64x48", + "CUBLASLT_MATMUL_TILE_64x472", + "CUBLASLT_MATMUL_TILE_64x464", + "CUBLASLT_MATMUL_TILE_64x456", "CUBLASLT_MATMUL_TILE_64x448", "CUBLASLT_MATMUL_TILE_64x440", "CUBLASLT_MATMUL_TILE_64x432", @@ -14617,14 +14700,27 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_136x192", "CUBLASLT_MATMUL_TILE_136x128", "CUBLASLT_MATMUL_TILE_128x96", + "CUBLASLT_MATMUL_TILE_128x88", + "CUBLASLT_MATMUL_TILE_128x80", + "CUBLASLT_MATMUL_TILE_128x8", + "CUBLASLT_MATMUL_TILE_128x72", "CUBLASLT_MATMUL_TILE_128x64", + "CUBLASLT_MATMUL_TILE_128x56", + "CUBLASLT_MATMUL_TILE_128x48", + "CUBLASLT_MATMUL_TILE_128x40", "CUBLASLT_MATMUL_TILE_128x384", "CUBLASLT_MATMUL_TILE_128x320", "CUBLASLT_MATMUL_TILE_128x32", "CUBLASLT_MATMUL_TILE_128x256", + "CUBLASLT_MATMUL_TILE_128x24", "CUBLASLT_MATMUL_TILE_128x192", "CUBLASLT_MATMUL_TILE_128x160", + "CUBLASLT_MATMUL_TILE_128x16", + "CUBLASLT_MATMUL_TILE_128x136", "CUBLASLT_MATMUL_TILE_128x128", + "CUBLASLT_MATMUL_TILE_128x120", + "CUBLASLT_MATMUL_TILE_128x112", + "CUBLASLT_MATMUL_TILE_128x104", "CUBLASLT_MATMUL_TILE_120x64", "CUBLASLT_MATMUL_TILE_120x384", "CUBLASLT_MATMUL_TILE_120x320", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 87b2663f..6ac39220 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -341,14 +341,27 @@ |`CUBLASLT_MATMUL_TILE_120x320`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_120x384`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_120x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x16`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x24`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x88`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x192`|12.6| | | | | | | | | | @@ -600,14 +613,49 @@ |`CUBLASLT_MATMUL_TILE_64x432`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x440`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x456`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x464`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x472`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x480`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x488`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x496`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x504`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x520`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x528`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x536`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x544`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x552`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x56`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x560`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x568`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x584`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x592`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x600`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x608`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x616`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x624`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x632`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x648`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x656`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x664`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x672`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x680`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x688`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x696`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x712`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x720`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x728`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x736`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x744`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x752`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x760`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x80`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index beeaca84..fe25a76f 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -341,14 +341,27 @@ |`CUBLASLT_MATMUL_TILE_120x320`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_120x384`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_120x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x16`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x24`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x88`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x192`|12.6| | | | | | | | | | | | | | | | @@ -600,14 +613,49 @@ |`CUBLASLT_MATMUL_TILE_64x432`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x440`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x456`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x464`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x472`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x480`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x488`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x496`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x504`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x520`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x528`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x536`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x544`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x552`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x56`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x560`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x568`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x584`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x592`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x600`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x608`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x616`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x624`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x632`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x648`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x656`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x664`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x672`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x680`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x688`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x696`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x712`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x720`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x728`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x736`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x744`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x752`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x760`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x80`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index b40f0993..d37445ff 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -341,14 +341,27 @@ |`CUBLASLT_MATMUL_TILE_120x320`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_120x384`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_120x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x16`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x24`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x88`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x96`|11.8| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_136x192`|12.6| | | | | | | | | | @@ -600,14 +613,49 @@ |`CUBLASLT_MATMUL_TILE_64x432`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x440`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x456`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x464`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x472`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x480`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x488`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x496`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x504`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x512`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x520`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x528`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x536`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x544`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x552`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x56`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x560`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x568`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x576`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x584`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x592`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x600`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x608`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x616`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x624`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x632`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x640`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x648`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x656`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x664`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x672`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x680`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x688`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x696`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x704`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x712`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x720`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x728`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x736`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x744`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x752`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_64x760`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x8`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x80`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index d77ed8eb..58f8284c 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -561,6 +561,54 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_64x424", {"HIPBLASLT_MATMUL_TILE_64x424", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_64x432", {"HIPBLASLT_MATMUL_TILE_64x432", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_64x440", {"HIPBLASLT_MATMUL_TILE_64x440", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x456", {"HIPBLASLT_MATMUL_TILE_64x456", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x464", {"HIPBLASLT_MATMUL_TILE_64x464", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x472", {"HIPBLASLT_MATMUL_TILE_64x472", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x480", {"HIPBLASLT_MATMUL_TILE_64x480", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x488", {"HIPBLASLT_MATMUL_TILE_64x488", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x496", {"HIPBLASLT_MATMUL_TILE_64x496", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x504", {"HIPBLASLT_MATMUL_TILE_64x504", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x520", {"HIPBLASLT_MATMUL_TILE_64x520", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x528", {"HIPBLASLT_MATMUL_TILE_64x528", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x536", {"HIPBLASLT_MATMUL_TILE_64x536", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x544", {"HIPBLASLT_MATMUL_TILE_64x544", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x552", {"HIPBLASLT_MATMUL_TILE_64x552", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x560", {"HIPBLASLT_MATMUL_TILE_64x560", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x568", {"HIPBLASLT_MATMUL_TILE_64x568", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x584", {"HIPBLASLT_MATMUL_TILE_64x584", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x592", {"HIPBLASLT_MATMUL_TILE_64x592", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x600", {"HIPBLASLT_MATMUL_TILE_64x600", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x608", {"HIPBLASLT_MATMUL_TILE_64x608", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x616", {"HIPBLASLT_MATMUL_TILE_64x616", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x624", {"HIPBLASLT_MATMUL_TILE_64x624", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x632", {"HIPBLASLT_MATMUL_TILE_64x632", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x648", {"HIPBLASLT_MATMUL_TILE_64x648", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x656", {"HIPBLASLT_MATMUL_TILE_64x656", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x664", {"HIPBLASLT_MATMUL_TILE_64x664", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x672", {"HIPBLASLT_MATMUL_TILE_64x672", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x680", {"HIPBLASLT_MATMUL_TILE_64x680", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x688", {"HIPBLASLT_MATMUL_TILE_64x688", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x696", {"HIPBLASLT_MATMUL_TILE_64x696", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x712", {"HIPBLASLT_MATMUL_TILE_64x712", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x720", {"HIPBLASLT_MATMUL_TILE_64x720", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x728", {"HIPBLASLT_MATMUL_TILE_64x728", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x736", {"HIPBLASLT_MATMUL_TILE_64x736", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x744", {"HIPBLASLT_MATMUL_TILE_64x744", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x752", {"HIPBLASLT_MATMUL_TILE_64x752", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_64x760", {"HIPBLASLT_MATMUL_TILE_64x760", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x8", {"HIPBLASLT_MATMUL_TILE_128x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x16", {"HIPBLASLT_MATMUL_TILE_128x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x24", {"HIPBLASLT_MATMUL_TILE_128x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x40", {"HIPBLASLT_MATMUL_TILE_128x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x48", {"HIPBLASLT_MATMUL_TILE_128x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x56", {"HIPBLASLT_MATMUL_TILE_128x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x72", {"HIPBLASLT_MATMUL_TILE_128x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x80", {"HIPBLASLT_MATMUL_TILE_128x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x88", {"HIPBLASLT_MATMUL_TILE_128x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x104", {"HIPBLASLT_MATMUL_TILE_128x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x112", {"HIPBLASLT_MATMUL_TILE_128x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x120", {"HIPBLASLT_MATMUL_TILE_128x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x136", {"HIPBLASLT_MATMUL_TILE_128x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1539,6 +1587,54 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_64x424", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_64x432", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_64x440", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x456", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x464", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x472", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x480", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x488", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x496", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x504", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x520", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x528", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x536", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x544", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x552", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x560", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x568", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x584", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x592", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x600", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x608", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x616", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x624", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x632", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x648", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x656", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x664", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x672", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x680", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x688", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x696", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x712", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x720", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x728", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x736", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x744", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x752", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_64x760", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 65efca603316aab14d8a10bdbf5b11cf96eb981e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 1 Nov 2024 17:01:50 +0000 Subject: [PATCH 34/51] [HIPIFY][DNN][doc] cuDNN 9.5.1 is the latest supported version --- docs/hipify-clang.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index 010cef86..153b86dc 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -547,7 +547,7 @@ LLVM >= 10.0.0 .. code-block:: shell - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.0 + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.1 5. [Optional] Install `CUB 1.9.8 `_ for ``CUDA < 11.0`` only; for ``CUDA >= 11.0``, the CUB shipped with CUDA will be used for testing. @@ -639,8 +639,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.0 -* Ubuntu 22-23: LLVM 13.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.0 +* Ubuntu 20-21: LLVM 9.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.1 +* Ubuntu 22-23: LLVM 13.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.1 Minimum build system requirements for the above configurations: @@ -660,7 +660,7 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.6.1 \ - -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.5.0 \ + -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.5.1 \ -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.2/build/bin/llvm-lit \ ../hipify @@ -698,7 +698,7 @@ The corresponding successful output is: -- Initial CUDA to configure: -- - CUDA Toolkit path : /usr/local/cuda-12.6.1 -- - CUDA Samples path : - -- - cuDNN path : /usr/local/cudnn-9.5.0 + -- - cuDNN path : /usr/local/cudnn-9.5.1 -- - CUB path : -- Found CUDAToolkit: /usr/local/cuda-12.6.1/targets/x86_64-linux/include (found version "12.6.68") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -707,7 +707,7 @@ The corresponding successful output is: -- Found CUDA config: -- - CUDA Toolkit path : /usr/local/cuda-12.6.1 -- - CUDA Samples path : OFF - -- - cuDNN path : /usr/local/cudnn-9.5.0 + -- - cuDNN path : /usr/local/cudnn-9.5.1 -- - CUB path : /usr/local/cuda-12.6.1/include/cub -- Configuring done (0.5s) -- Generating done (0.0s) @@ -820,13 +820,13 @@ Tested configurations: - ``3.11.4`` * - ``17.0.1`` :sup:`6` - ``18.1.8`` :sup:`7` - ``7.0 - 12.3.2`` - - ``8.0.5 - 9.5.0`` + - ``8.0.5 - 9.5.1`` - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` - ``3.13.0`` * - ``19.1.0 - 19.1.2`` - ``7.0 - 12.6.1`` - - ``8.0.5 - 9.5.0`` + - ``8.0.5 - 9.5.1`` - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` - ``3.13.0`` @@ -857,7 +857,7 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5" \ - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.0 \ + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.1 \ -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.2/build/Release/bin/llvm-lit.py \ ../hipify @@ -894,13 +894,13 @@ The corresponding successful output is: -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5 - -- - cuDNN path : D:/CUDA/cuDNN/9.5.0 + -- - cuDNN path : D:/CUDA/cuDNN/9.5.1 -- - CUB path : -- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6/include (found version "12.6.68") -- Found CUDA config: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5 - -- - cuDNN path : D:/CUDA/cuDNN/9.5.0 + -- - cuDNN path : D:/CUDA/cuDNN/9.5.1 -- - CUB path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6/include/cub -- Configuring done (2.1s) -- Generating done (0.1s) From 35c2891daa0e00e32a349abaf29fd1dd7d60267a Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 1 Nov 2024 19:01:50 +0000 Subject: [PATCH 35/51] [HIPIFY][doc] `LLVM 19.1.3` is the latest supported LLVM release + No patches are needed + Updated the `README.md` accordingly + `hipify-clang` built with `LLVM 19.1.3` works correctly with the latest supported `CUDA 12.6.1`, even though clang may report that `CUDA 12.6.1` is not fully supported + Tested on `Windows 11` (`VS 2019` and `VS 2022`) and `Ubuntu 23.10` --- docs/hipify-clang.rst | 63 ++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index 153b86dc..acb73b44 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -37,7 +37,7 @@ Dependencies * `LLVM+Clang `_ of at least version `4.0.0 `_; the latest stable and recommended release: - `19.1.2 `_. + `19.1.3 `_. * `CUDA `_ of at least version `7.0 `_, the latest supported version is @@ -186,7 +186,8 @@ Dependencies - ✅ * - `19.1.0 `_, `19.1.1 `_, - `19.1.2 `_:sup:`4` + `19.1.2 `_, + `19.1.3 `_:sup:`4` - `12.6.1 `_:sup:`4` - **Latest stable config** - **Latest stable config** @@ -229,7 +230,7 @@ Dependencies In most cases, you can get a suitable version of ``LLVM+Clang`` with your package manager. However, you can also `download a release archive `_ and build or install it. In case of multiple versions of ``LLVM`` installed, set `CMAKE_PREFIX_PATH `_ so that -``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\19.1.2\dist``. +``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\19.1.3\dist``. Usage ============================================================ @@ -262,7 +263,7 @@ header files used during the hipification process: .. code:: shell - ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.6 --clang-resource-directory=/usr/llvm/19.1.2/dist/lib/clang/19 + ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.6 --clang-resource-directory=/usr/llvm/19.1.3/dist/lib/clang/19 For more information, refer to the `Clang manual for compiling CUDA `_. @@ -399,7 +400,7 @@ To ensure LLVM being found or in case of multiple LLVM instances, specify the pa .. code-block:: bash - -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.2/dist + -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.3/dist On Windows, specify the following option for CMake in the first place: ``-G "Visual Studio 17 2022"``. @@ -473,7 +474,7 @@ LLVM <= 9.0.1 LLVM >= 10.0.0 ----------------- -1. Download `LLVM project `_ sources. +1. Download `LLVM project `_ sources. 2. Build `LLVM project `_: @@ -576,13 +577,13 @@ LLVM >= 10.0.0 .. code-block:: bash - python /usr/llvm/19.1.2/llvm-project/llvm/utils/lit/setup.py install + python /usr/llvm/19.1.3/llvm-project/llvm/utils/lit/setup.py install **Windows**: .. code-block:: shell - python D:/LLVM/19.1.2/llvm-project/llvm/utils/lit/setup.py install + python D:/LLVM/19.1.3/llvm-project/llvm/utils/lit/setup.py install In case of errors similar to ``ModuleNotFoundError: No module named 'setuptools'``, upgrade the ``setuptools`` package: @@ -596,23 +597,23 @@ LLVM >= 10.0.0 .. code-block:: bash - -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.2/build/bin/llvm-lit + -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.3/build/bin/llvm-lit **Windows**: .. code-block:: shell - -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.2/build/Release/bin/llvm-lit.py + -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.3/build/Release/bin/llvm-lit.py * ``FileCheck``: **Linux**: - Copy from ``/usr/llvm/19.1.2/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``/usr/llvm/19.1.3/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. **Windows**: - Copy from ``D:/LLVM/19.1.2/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``D:/LLVM/19.1.3/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. Alternatively, specify the path to ``FileCheck`` in the ``CMAKE_INSTALL_PREFIX`` option. @@ -639,8 +640,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.1 -* Ubuntu 22-23: LLVM 13.0.0 - 19.1.2, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.1 +* Ubuntu 20-21: LLVM 9.0.0 - 19.1.3, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.1 +* Ubuntu 22-23: LLVM 13.0.0 - 19.1.3, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.1 Minimum build system requirements for the above configurations: @@ -658,10 +659,10 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.2/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.3/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.6.1 \ -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.5.1 \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.2/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.3/build/bin/llvm-lit \ ../hipify The corresponding successful output is: @@ -685,11 +686,11 @@ The corresponding successful output is: -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.13") - -- Found LLVM 19.1.2: - -- - CMake module path : /usr/llvm/19.1.2/dist/lib/cmake/llvm - -- - Clang include path : /usr/llvm/19.1.2/dist/include - -- - LLVM Include path : /usr/llvm/19.1.2/dist/include - -- - Binary path : /usr/llvm/19.1.2/dist/bin + -- Found LLVM 19.1.3: + -- - CMake module path : /usr/llvm/19.1.3/dist/lib/cmake/llvm + -- - Clang include path : /usr/llvm/19.1.3/dist/include + -- - LLVM Include path : /usr/llvm/19.1.3/dist/include + -- - Binary path : /usr/llvm/19.1.3/dist/bin -- Linker detection: GNU ld -- ---- The below configuring for hipify-clang testing only ---- -- Found Python: /usr/bin/python3.13 (found version "3.13.0") found components: Interpreter @@ -724,7 +725,7 @@ The corresponding successful output is: Running HIPify regression tests =============================================================== CUDA 12.6.68 - will be used for testing - LLVM 19.1.2 - will be used for testing + LLVM 19.1.3 - will be used for testing x86_64 - Platform architecture Linux 6.5.0-15-generic - Platform OS 64 - hipify-clang binary bitness @@ -824,7 +825,7 @@ Tested configurations: - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` - ``3.13.0`` - * - ``19.1.0 - 19.1.2`` + * - ``19.1.0 - 19.1.3`` - ``7.0 - 12.6.1`` - ``8.0.5 - 9.5.1`` - ``2019.16.11.40, 2022.17.11.4`` @@ -854,11 +855,11 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.2/dist \ + -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.3/dist \ -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5" \ -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.1 \ - -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.2/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.3/build/Release/bin/llvm-lit.py \ ../hipify The corresponding successful output is: @@ -882,15 +883,15 @@ The corresponding successful output is: -- - Build hipify-clang : ON -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF - -- Found LLVM 19.1.2: - -- - CMake module path : D:/LLVM/19.1.2/dist/lib/cmake/llvm - -- - Clang include path : D:/LLVM/19.1.2/dist/include - -- - LLVM Include path : D:/LLVM/19.1.2/dist/include - -- - Binary path : D:/LLVM/19.1.2/dist/bin + -- Found LLVM 19.1.3: + -- - CMake module path : D:/LLVM/19.1.3/dist/lib/cmake/llvm + -- - Clang include path : D:/LLVM/19.1.3/dist/include + -- - LLVM Include path : D:/LLVM/19.1.3/dist/include + -- - Binary path : D:/LLVM/19.1.3/dist/bin -- ---- The below configuring for hipify-clang testing only ---- -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python313/python.exe (found version "3.13.0") found components: Interpreter -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python313/Scripts/lit.exe - -- Found FileCheck: D:/LLVM/19.1.2/dist/bin/FileCheck.exe + -- Found FileCheck: D:/LLVM/19.1.3/dist/bin/FileCheck.exe -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5 From 6a205bd710dde58578d7cd14a6dedfb07115b482 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 4 Nov 2024 14:16:48 +0000 Subject: [PATCH 36/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 10 + `rocblas_(s|d|c|z)geam_64` and `hipblas(S|D|C|Z)geam(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++++-------- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 ++++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 ++++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 ++++---- src/CUDA2HIP_BLAS_API_functions.cpp | 16 +++++++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 20 +++++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 20 +++++++++++++++++++ 7 files changed, 72 insertions(+), 24 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index bf72c2d4..d6ae392a 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1602,6 +1602,7 @@ sub rocSubstitutions { subst("cublasCgbmv_v2", "rocblas_cgbmv", "library"); subst("cublasCgbmv_v2_64", "rocblas_cgbmv_64", "library"); subst("cublasCgeam", "rocblas_cgeam", "library"); + subst("cublasCgeam_64", "rocblas_cgeam_64", "library"); subst("cublasCgemm", "rocblas_cgemm", "library"); subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); subst("cublasCgemmBatched_64", "rocblas_cgemm_batched_64", "library"); @@ -1772,6 +1773,7 @@ sub rocSubstitutions { subst("cublasDgbmv_v2", "rocblas_dgbmv", "library"); subst("cublasDgbmv_v2_64", "rocblas_dgbmv_64", "library"); subst("cublasDgeam", "rocblas_dgeam", "library"); + subst("cublasDgeam_64", "rocblas_dgeam_64", "library"); subst("cublasDgemm", "rocblas_dgemm", "library"); subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); subst("cublasDgemmBatched_64", "rocblas_dgemm_batched_64", "library"); @@ -2009,6 +2011,7 @@ sub rocSubstitutions { subst("cublasSgbmv_v2", "rocblas_sgbmv", "library"); subst("cublasSgbmv_v2_64", "rocblas_sgbmv_64", "library"); subst("cublasSgeam", "rocblas_sgeam", "library"); + subst("cublasSgeam_64", "rocblas_sgeam_64", "library"); subst("cublasSgemm", "rocblas_sgemm", "library"); subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); subst("cublasSgemmBatched_64", "rocblas_sgemm_batched_64", "library"); @@ -2165,6 +2168,7 @@ sub rocSubstitutions { subst("cublasZgbmv_v2", "rocblas_zgbmv", "library"); subst("cublasZgbmv_v2_64", "rocblas_zgbmv_64", "library"); subst("cublasZgeam", "rocblas_zgeam", "library"); + subst("cublasZgeam_64", "rocblas_zgeam_64", "library"); subst("cublasZgemm", "rocblas_zgemm", "library"); subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); subst("cublasZgemmBatched_64", "rocblas_zgemm_batched_64", "library"); @@ -4380,6 +4384,7 @@ sub simpleSubstitutions { subst("cublasCgbmv_v2", "hipblasCgbmv_v2", "library"); subst("cublasCgbmv_v2_64", "hipblasCgbmv_v2_64", "library"); subst("cublasCgeam", "hipblasCgeam_v2", "library"); + subst("cublasCgeam_64", "hipblasCgeam_v2_64", "library"); subst("cublasCgelsBatched", "hipblasCgelsBatched_v2", "library"); subst("cublasCgemm", "hipblasCgemm_v2", "library"); subst("cublasCgemmBatched", "hipblasCgemmBatched_v2", "library"); @@ -4551,6 +4556,7 @@ sub simpleSubstitutions { subst("cublasDgbmv_v2", "hipblasDgbmv", "library"); subst("cublasDgbmv_v2_64", "hipblasDgbmv_64", "library"); subst("cublasDgeam", "hipblasDgeam", "library"); + subst("cublasDgeam_64", "hipblasDgeam_64", "library"); subst("cublasDgelsBatched", "hipblasDgelsBatched", "library"); subst("cublasDgemm", "hipblasDgemm", "library"); subst("cublasDgemmBatched", "hipblasDgemmBatched", "library"); @@ -4799,6 +4805,7 @@ sub simpleSubstitutions { subst("cublasSgbmv_v2", "hipblasSgbmv", "library"); subst("cublasSgbmv_v2_64", "hipblasSgbmv_64", "library"); subst("cublasSgeam", "hipblasSgeam", "library"); + subst("cublasSgeam_64", "hipblasSgeam_64", "library"); subst("cublasSgelsBatched", "hipblasSgelsBatched", "library"); subst("cublasSgemm", "hipblasSgemm", "library"); subst("cublasSgemmBatched", "hipblasSgemmBatched", "library"); @@ -4948,6 +4955,7 @@ sub simpleSubstitutions { subst("cublasZgbmv_v2", "hipblasZgbmv_v2", "library"); subst("cublasZgbmv_v2_64", "hipblasZgbmv_v2_64", "library"); subst("cublasZgeam", "hipblasZgeam_v2", "library"); + subst("cublasZgeam_64", "hipblasZgeam_v2_64", "library"); subst("cublasZgelsBatched", "hipblasZgelsBatched_v2", "library"); subst("cublasZgemm", "hipblasZgemm_v2", "library"); subst("cublasZgemmBatched", "hipblasZgemmBatched_v2", "library"); @@ -11624,7 +11632,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZhemm_64", "cublasZgemm3m_64", "cublasZgemm3m", - "cublasZgeam_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -11651,7 +11658,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSgemmGroupedBatched", "cublasSgemmEx_64", "cublasSgemmEx", - "cublasSgeam_64", "cublasSetVector_64", "cublasSetVectorAsync_64", "cublasSetSmCountTarget", @@ -11745,7 +11751,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", - "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", "cublasCtrsm_v2_64", @@ -11778,7 +11783,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgemm3mBatched_64", "cublasCgemm3mBatched", "cublasCgemm3m", - "cublasCgeam_64", "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", @@ -13642,7 +13646,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgelsBatched", - "cublasZgeam_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -13663,7 +13666,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgemmEx_64", "cublasSgemmEx", "cublasSgelsBatched", - "cublasSgeam_64", "cublasSetVector_64", "cublasSetVectorAsync_64", "cublasSetSmCountTarget", @@ -13776,7 +13778,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgelsBatched", - "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", "cublasCtrmm_v2_64", @@ -13810,7 +13811,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemm3mBatched", "cublasCgemm3m", "cublasCgelsBatched", - "cublasCgeam_64", "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 6ac39220..b8fa8820 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1598,7 +1598,7 @@ |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | | |`cublasCdgmm_64`|12.0| | | | | | | | | | |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | | -|`cublasCgeam_64`|12.0| | | | | | | | | | +|`cublasCgeam_64`|12.0| | | |`hipblasCgeam_v2_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | |`hipblasCgelsBatched_v2`|6.0.0| | | | | |`cublasCgemmEx`|8.0| | | | | | | | | | |`cublasCgemmEx_64`|12.0| | | | | | | | | | @@ -1624,7 +1624,7 @@ |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | |`cublasDgeam`| | | | |`hipblasDgeam`|1.8.2| | | | | -|`cublasDgeam_64`|12.0| | | | | | | | | | +|`cublasDgeam_64`|12.0| | | |`hipblasDgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | |`hipblasDgelsBatched`|5.4.0| | | | | |`cublasDgeqrfBatched`| | | | |`hipblasDgeqrfBatched`|3.5.0| | | | | |`cublasDgetrfBatched`| | | | |`hipblasDgetrfBatched`|3.5.0| | | | | @@ -1660,7 +1660,7 @@ |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | | |`cublasSdgmm_64`|12.0| | | | | | | | | | |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | | -|`cublasSgeam_64`|12.0| | | | | | | | | | +|`cublasSgeam_64`|12.0| | | |`hipblasSgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | |`hipblasSgelsBatched`|5.4.0| | | | | |`cublasSgemmEx`|7.5| | | | | | | | | | |`cublasSgemmEx_64`|12.0| | | | | | | | | | @@ -1679,7 +1679,7 @@ |`cublasZdgmm`| | | | |`hipblasZdgmm_v2`|6.0.0| | | | | |`cublasZdgmm_64`|12.0| | | | | | | | | | |`cublasZgeam`| | | | |`hipblasZgeam_v2`|6.0.0| | | | | -|`cublasZgeam_64`|12.0| | | | | | | | | | +|`cublasZgeam_64`|12.0| | | |`hipblasZgeam_v2_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | |`hipblasZgelsBatched_v2`|6.0.0| | | | | |`cublasZgeqrfBatched`| | | | |`hipblasZgeqrfBatched_v2`|6.0.0| | | | | |`cublasZgetrfBatched`| | | | |`hipblasZgetrfBatched_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index fe25a76f..493f7784 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1598,7 +1598,7 @@ |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | |`rocblas_cdgmm`|3.5.0| | | | | |`cublasCdgmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | |`rocblas_cgeam`|3.5.0| | | | | -|`cublasCgeam_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgeam_64`|12.0| | | |`hipblasCgeam_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgeam_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | |`hipblasCgelsBatched_v2`|6.0.0| | | | | | | | | | | |`cublasCgemmEx`|8.0| | | | | | | | | | | | | | | | |`cublasCgemmEx_64`|12.0| | | | | | | | | | | | | | | | @@ -1624,7 +1624,7 @@ |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | |`rocblas_ddgmm`|3.5.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasDgeam`| | | | |`hipblasDgeam`|1.8.2| | | | |`rocblas_dgeam`|1.6.4| | | | | -|`cublasDgeam_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgeam_64`|12.0| | | |`hipblasDgeam_64`|6.3.0| | | |6.3.0|`rocblas_dgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | |`hipblasDgelsBatched`|5.4.0| | | | | | | | | | | |`cublasDgeqrfBatched`| | | | |`hipblasDgeqrfBatched`|3.5.0| | | | | | | | | | | |`cublasDgetrfBatched`| | | | |`hipblasDgetrfBatched`|3.5.0| | | | | | | | | | | @@ -1660,7 +1660,7 @@ |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | |`rocblas_sdgmm`|3.5.0| | | | | |`cublasSdgmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | |`rocblas_sgeam`|1.6.4| | | | | -|`cublasSgeam_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgeam_64`|12.0| | | |`hipblasSgeam_64`|6.3.0| | | |6.3.0|`rocblas_sgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | |`hipblasSgelsBatched`|5.4.0| | | | | | | | | | | |`cublasSgemmEx`|7.5| | | | | | | | | | | | | | | | |`cublasSgemmEx_64`|12.0| | | | | | | | | | | | | | | | @@ -1679,7 +1679,7 @@ |`cublasZdgmm`| | | | |`hipblasZdgmm_v2`|6.0.0| | | | |`rocblas_zdgmm`|3.5.0| | | | | |`cublasZdgmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgeam`| | | | |`hipblasZgeam_v2`|6.0.0| | | | |`rocblas_zgeam`|3.5.0| | | | | -|`cublasZgeam_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgeam_64`|12.0| | | |`hipblasZgeam_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgeam_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | |`hipblasZgelsBatched_v2`|6.0.0| | | | | | | | | | | |`cublasZgeqrfBatched`| | | | |`hipblasZgeqrfBatched_v2`|6.0.0| | | | | | | | | | | |`cublasZgetrfBatched`| | | | |`hipblasZgetrfBatched_v2`|6.0.0| | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index d37445ff..2671cfd0 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1598,7 +1598,7 @@ |`cublasCdgmm`| | | | |`rocblas_cdgmm`|3.5.0| | | | | |`cublasCdgmm_64`|12.0| | | | | | | | | | |`cublasCgeam`| | | | |`rocblas_cgeam`|3.5.0| | | | | -|`cublasCgeam_64`|12.0| | | | | | | | | | +|`cublasCgeam_64`|12.0| | | |`rocblas_cgeam_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | | | | | | | | |`cublasCgemmEx`|8.0| | | | | | | | | | |`cublasCgemmEx_64`|12.0| | | | | | | | | | @@ -1624,7 +1624,7 @@ |`cublasDdgmm`| | | | |`rocblas_ddgmm`|3.5.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | |`cublasDgeam`| | | | |`rocblas_dgeam`|1.6.4| | | | | -|`cublasDgeam_64`|12.0| | | | | | | | | | +|`cublasDgeam_64`|12.0| | | |`rocblas_dgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | | | | | | | | |`cublasDgeqrfBatched`| | | | | | | | | | | |`cublasDgetrfBatched`| | | | | | | | | | | @@ -1660,7 +1660,7 @@ |`cublasSdgmm`| | | | |`rocblas_sdgmm`|3.5.0| | | | | |`cublasSdgmm_64`|12.0| | | | | | | | | | |`cublasSgeam`| | | | |`rocblas_sgeam`|1.6.4| | | | | -|`cublasSgeam_64`|12.0| | | | | | | | | | +|`cublasSgeam_64`|12.0| | | |`rocblas_sgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | | | | | | | | |`cublasSgemmEx`|7.5| | | | | | | | | | |`cublasSgemmEx_64`|12.0| | | | | | | | | | @@ -1679,7 +1679,7 @@ |`cublasZdgmm`| | | | |`rocblas_zdgmm`|3.5.0| | | | | |`cublasZdgmm_64`|12.0| | | | | | | | | | |`cublasZgeam`| | | | |`rocblas_zgeam`|3.5.0| | | | | -|`cublasZgeam_64`|12.0| | | | | | | | | | +|`cublasZgeam_64`|12.0| | | |`rocblas_zgeam_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | | | | | | | | |`cublasZgeqrfBatched`| | | | | | | | | | | |`cublasZgetrfBatched`| | | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index c015029e..6b8bdf55 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -562,13 +562,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) // GEAM {"cublasSgeam", {"hipblasSgeam", "rocblas_sgeam", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasSgeam_64", {"hipblasSgeam_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasSgeam_64", {"hipblasSgeam_64", "rocblas_sgeam_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasDgeam", {"hipblasDgeam", "rocblas_dgeam", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDgeam_64", {"hipblasDgeam_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasDgeam_64", {"hipblasDgeam_64", "rocblas_dgeam_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasCgeam", {"hipblasCgeam_v2", "rocblas_cgeam", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasCgeam_64", {"hipblasCgeam_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasCgeam_64", {"hipblasCgeam_v2_64", "rocblas_cgeam_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasZgeam", {"hipblasZgeam_v2", "rocblas_zgeam", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasZgeam_64", {"hipblasZgeam_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasZgeam_64", {"hipblasZgeam_v2_64", "rocblas_zgeam_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, // GETRF - Batched LU {"cublasSgetrfBatched", {"hipblasSgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, ROC_UNSUPPORTED}}, @@ -2060,6 +2060,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCsyrkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZsyrkx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgeam_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgeam_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2485,6 +2489,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_csyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zsyrkx_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 39da4a13..0ce9f3ea 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3050,6 +3050,26 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); // CHECK: blasStatus = hipblasZsyrkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* beta, const float* B, int64_t ldb, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeam_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, const float* beta, const float* BP, int64_t ldb, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSgeam_64(blasHandle, transa, transb, m_64, n_64, &fa, &fA, lda_64, &fb, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasSgeam_64(blasHandle, transa, transb, m_64, n_64, &fa, &fA, lda_64, &fb, &fB, ldb_64, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* beta, const double* B, int64_t ldb, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeam_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, const double* beta, const double* BP, int64_t ldb, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDgeam_64(blasHandle, transa, transb, m_64, n_64, &da, &dA, lda_64, &db, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDgeam_64(blasHandle, transa, transb, m_64, n_64, &da, &dA, lda_64, &db, &dB, ldb_64, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* beta, const cuComplex* B, int64_t ldb, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeam_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* beta, const hipComplex* BP, int64_t ldb, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCgeam_v2_64(blasHandle, transa, transb, m_64, n_64, &complexa, &complexA, lda_64, &complexb, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCgeam_64(blasHandle, transa, transb, m_64, n_64, &complexa, &complexA, lda_64, &complexb, &complexB, ldb_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int64_t ldb, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeam_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* beta, const hipDoubleComplex* BP, int64_t ldb, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZgeam_v2_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZgeam_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index fbe7bed6..de82f856 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3255,6 +3255,26 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrkx_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); // CHECK: blasStatus = rocblas_zsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZsyrkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* beta, const float* B, int64_t ldb, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgeam_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* beta, const float* B, int64_t ldb, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_sgeam_64(blasHandle, transa, transb, m_64, n_64, &fa, &fA, lda_64, &fb, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasSgeam_64(blasHandle, transa, transb, m_64, n_64, &fa, &fA, lda_64, &fb, &fB, ldb_64, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* beta, const double* B, int64_t ldb, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgeam_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* beta, const double* B, int64_t ldb, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dgeam_64(blasHandle, transa, transb, m_64, n_64, &da, &dA, lda_64, &db, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDgeam_64(blasHandle, transa, transb, m_64, n_64, &da, &dA, lda_64, &db, &dB, ldb_64, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* beta, const cuComplex* B, int64_t ldb, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeam_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* beta, const rocblas_float_complex* B, int64_t ldb, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cgeam_64(blasHandle, transa, transb, m_64, n_64, &complexa, &complexA, lda_64, &complexb, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCgeam_64(blasHandle, transa, transb, m_64, n_64, &complexa, &complexA, lda_64, &complexb, &complexB, ldb_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int64_t ldb, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeam_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* beta, const rocblas_double_complex* B, int64_t ldb, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zgeam_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZgeam_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); #endif return 0; From 97241f85b67ef5bf55ede52b4a17ac301eb6a59e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 4 Nov 2024 21:50:45 +0000 Subject: [PATCH 37/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 11 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 84 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 42 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 42 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 42 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 84 +++++++++++++++++++ 5 files changed, 294 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index d6ae392a..648a7886 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12929,16 +12929,58 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_128x72", "CUBLASLT_MATMUL_TILE_128x64", "CUBLASLT_MATMUL_TILE_128x56", + "CUBLASLT_MATMUL_TILE_128x512", + "CUBLASLT_MATMUL_TILE_128x504", + "CUBLASLT_MATMUL_TILE_128x496", + "CUBLASLT_MATMUL_TILE_128x488", + "CUBLASLT_MATMUL_TILE_128x480", "CUBLASLT_MATMUL_TILE_128x48", + "CUBLASLT_MATMUL_TILE_128x472", + "CUBLASLT_MATMUL_TILE_128x464", + "CUBLASLT_MATMUL_TILE_128x456", + "CUBLASLT_MATMUL_TILE_128x448", + "CUBLASLT_MATMUL_TILE_128x440", + "CUBLASLT_MATMUL_TILE_128x432", + "CUBLASLT_MATMUL_TILE_128x424", + "CUBLASLT_MATMUL_TILE_128x416", + "CUBLASLT_MATMUL_TILE_128x408", + "CUBLASLT_MATMUL_TILE_128x400", "CUBLASLT_MATMUL_TILE_128x40", + "CUBLASLT_MATMUL_TILE_128x392", "CUBLASLT_MATMUL_TILE_128x384", + "CUBLASLT_MATMUL_TILE_128x376", + "CUBLASLT_MATMUL_TILE_128x368", + "CUBLASLT_MATMUL_TILE_128x360", + "CUBLASLT_MATMUL_TILE_128x352", + "CUBLASLT_MATMUL_TILE_128x344", + "CUBLASLT_MATMUL_TILE_128x336", + "CUBLASLT_MATMUL_TILE_128x328", "CUBLASLT_MATMUL_TILE_128x320", "CUBLASLT_MATMUL_TILE_128x32", + "CUBLASLT_MATMUL_TILE_128x312", + "CUBLASLT_MATMUL_TILE_128x304", + "CUBLASLT_MATMUL_TILE_128x296", + "CUBLASLT_MATMUL_TILE_128x288", + "CUBLASLT_MATMUL_TILE_128x280", + "CUBLASLT_MATMUL_TILE_128x272", + "CUBLASLT_MATMUL_TILE_128x264", "CUBLASLT_MATMUL_TILE_128x256", + "CUBLASLT_MATMUL_TILE_128x248", + "CUBLASLT_MATMUL_TILE_128x240", "CUBLASLT_MATMUL_TILE_128x24", + "CUBLASLT_MATMUL_TILE_128x232", + "CUBLASLT_MATMUL_TILE_128x224", + "CUBLASLT_MATMUL_TILE_128x216", + "CUBLASLT_MATMUL_TILE_128x208", + "CUBLASLT_MATMUL_TILE_128x200", "CUBLASLT_MATMUL_TILE_128x192", + "CUBLASLT_MATMUL_TILE_128x184", + "CUBLASLT_MATMUL_TILE_128x176", + "CUBLASLT_MATMUL_TILE_128x168", "CUBLASLT_MATMUL_TILE_128x160", "CUBLASLT_MATMUL_TILE_128x16", + "CUBLASLT_MATMUL_TILE_128x152", + "CUBLASLT_MATMUL_TILE_128x144", "CUBLASLT_MATMUL_TILE_128x136", "CUBLASLT_MATMUL_TILE_128x128", "CUBLASLT_MATMUL_TILE_128x120", @@ -14706,16 +14748,58 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_128x72", "CUBLASLT_MATMUL_TILE_128x64", "CUBLASLT_MATMUL_TILE_128x56", + "CUBLASLT_MATMUL_TILE_128x512", + "CUBLASLT_MATMUL_TILE_128x504", + "CUBLASLT_MATMUL_TILE_128x496", + "CUBLASLT_MATMUL_TILE_128x488", + "CUBLASLT_MATMUL_TILE_128x480", "CUBLASLT_MATMUL_TILE_128x48", + "CUBLASLT_MATMUL_TILE_128x472", + "CUBLASLT_MATMUL_TILE_128x464", + "CUBLASLT_MATMUL_TILE_128x456", + "CUBLASLT_MATMUL_TILE_128x448", + "CUBLASLT_MATMUL_TILE_128x440", + "CUBLASLT_MATMUL_TILE_128x432", + "CUBLASLT_MATMUL_TILE_128x424", + "CUBLASLT_MATMUL_TILE_128x416", + "CUBLASLT_MATMUL_TILE_128x408", + "CUBLASLT_MATMUL_TILE_128x400", "CUBLASLT_MATMUL_TILE_128x40", + "CUBLASLT_MATMUL_TILE_128x392", "CUBLASLT_MATMUL_TILE_128x384", + "CUBLASLT_MATMUL_TILE_128x376", + "CUBLASLT_MATMUL_TILE_128x368", + "CUBLASLT_MATMUL_TILE_128x360", + "CUBLASLT_MATMUL_TILE_128x352", + "CUBLASLT_MATMUL_TILE_128x344", + "CUBLASLT_MATMUL_TILE_128x336", + "CUBLASLT_MATMUL_TILE_128x328", "CUBLASLT_MATMUL_TILE_128x320", "CUBLASLT_MATMUL_TILE_128x32", + "CUBLASLT_MATMUL_TILE_128x312", + "CUBLASLT_MATMUL_TILE_128x304", + "CUBLASLT_MATMUL_TILE_128x296", + "CUBLASLT_MATMUL_TILE_128x288", + "CUBLASLT_MATMUL_TILE_128x280", + "CUBLASLT_MATMUL_TILE_128x272", + "CUBLASLT_MATMUL_TILE_128x264", "CUBLASLT_MATMUL_TILE_128x256", + "CUBLASLT_MATMUL_TILE_128x248", + "CUBLASLT_MATMUL_TILE_128x240", "CUBLASLT_MATMUL_TILE_128x24", + "CUBLASLT_MATMUL_TILE_128x232", + "CUBLASLT_MATMUL_TILE_128x224", + "CUBLASLT_MATMUL_TILE_128x216", + "CUBLASLT_MATMUL_TILE_128x208", + "CUBLASLT_MATMUL_TILE_128x200", "CUBLASLT_MATMUL_TILE_128x192", + "CUBLASLT_MATMUL_TILE_128x184", + "CUBLASLT_MATMUL_TILE_128x176", + "CUBLASLT_MATMUL_TILE_128x168", "CUBLASLT_MATMUL_TILE_128x160", "CUBLASLT_MATMUL_TILE_128x16", + "CUBLASLT_MATMUL_TILE_128x152", + "CUBLASLT_MATMUL_TILE_128x144", "CUBLASLT_MATMUL_TILE_128x136", "CUBLASLT_MATMUL_TILE_128x128", "CUBLASLT_MATMUL_TILE_128x120", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index b8fa8820..5b24e3df 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -346,16 +346,58 @@ |`CUBLASLT_MATMUL_TILE_128x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x152`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x16`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x232`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x248`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x264`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x272`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x280`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x288`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x296`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x304`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x312`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x328`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x336`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x344`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x352`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x360`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x368`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x376`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x392`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x400`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x408`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x416`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x424`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x432`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x440`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x456`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x464`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x472`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x480`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x488`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x496`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x504`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x72`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 493f7784..b5dddcea 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -346,16 +346,58 @@ |`CUBLASLT_MATMUL_TILE_128x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x152`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x16`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x168`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x176`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x184`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x200`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x208`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x216`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x224`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x232`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x240`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x248`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x264`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x272`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x280`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x288`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x296`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x304`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x312`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x328`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x336`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x344`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x352`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x360`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x368`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x376`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x392`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x400`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x408`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x416`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x424`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x432`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x440`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x448`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x456`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x464`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x472`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x480`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x488`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x496`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x504`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x512`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x72`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 2671cfd0..ffbdab8d 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -346,16 +346,58 @@ |`CUBLASLT_MATMUL_TILE_128x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x128`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x152`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x16`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x160`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x192`|11.8| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x232`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x248`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x256`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x264`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x272`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x280`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x288`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x296`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x304`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x312`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x32`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x328`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x336`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x344`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x352`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x360`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x368`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x376`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x384`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x392`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x400`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x408`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x416`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x424`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x432`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x440`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x448`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x456`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x464`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x472`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x480`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x488`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x496`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x504`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_128x512`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x64`|10.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_128x72`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 58f8284c..fa694c65 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -609,6 +609,48 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_128x112", {"HIPBLASLT_MATMUL_TILE_128x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_128x120", {"HIPBLASLT_MATMUL_TILE_128x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_128x136", {"HIPBLASLT_MATMUL_TILE_128x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x144", {"HIPBLASLT_MATMUL_TILE_128x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x152", {"HIPBLASLT_MATMUL_TILE_128x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x168", {"HIPBLASLT_MATMUL_TILE_128x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x176", {"HIPBLASLT_MATMUL_TILE_128x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x184", {"HIPBLASLT_MATMUL_TILE_128x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x200", {"HIPBLASLT_MATMUL_TILE_128x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x208", {"HIPBLASLT_MATMUL_TILE_128x208", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x216", {"HIPBLASLT_MATMUL_TILE_128x216", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x224", {"HIPBLASLT_MATMUL_TILE_128x224", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x232", {"HIPBLASLT_MATMUL_TILE_128x232", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x240", {"HIPBLASLT_MATMUL_TILE_128x240", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x248", {"HIPBLASLT_MATMUL_TILE_128x248", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x264", {"HIPBLASLT_MATMUL_TILE_128x264", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x272", {"HIPBLASLT_MATMUL_TILE_128x272", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x280", {"HIPBLASLT_MATMUL_TILE_128x280", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x288", {"HIPBLASLT_MATMUL_TILE_128x288", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x296", {"HIPBLASLT_MATMUL_TILE_128x296", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x304", {"HIPBLASLT_MATMUL_TILE_128x304", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x312", {"HIPBLASLT_MATMUL_TILE_128x312", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x328", {"HIPBLASLT_MATMUL_TILE_128x328", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x336", {"HIPBLASLT_MATMUL_TILE_128x336", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x344", {"HIPBLASLT_MATMUL_TILE_128x344", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x352", {"HIPBLASLT_MATMUL_TILE_128x352", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x360", {"HIPBLASLT_MATMUL_TILE_128x360", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x368", {"HIPBLASLT_MATMUL_TILE_128x368", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x376", {"HIPBLASLT_MATMUL_TILE_128x376", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x392", {"HIPBLASLT_MATMUL_TILE_128x392", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x400", {"HIPBLASLT_MATMUL_TILE_128x400", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x408", {"HIPBLASLT_MATMUL_TILE_128x408", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x416", {"HIPBLASLT_MATMUL_TILE_128x416", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x424", {"HIPBLASLT_MATMUL_TILE_128x424", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x432", {"HIPBLASLT_MATMUL_TILE_128x432", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x440", {"HIPBLASLT_MATMUL_TILE_128x440", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x448", {"HIPBLASLT_MATMUL_TILE_128x448", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x456", {"HIPBLASLT_MATMUL_TILE_128x456", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x464", {"HIPBLASLT_MATMUL_TILE_128x464", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x472", {"HIPBLASLT_MATMUL_TILE_128x472", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x480", {"HIPBLASLT_MATMUL_TILE_128x480", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x488", {"HIPBLASLT_MATMUL_TILE_128x488", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x496", {"HIPBLASLT_MATMUL_TILE_128x496", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x504", {"HIPBLASLT_MATMUL_TILE_128x504", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_128x512", {"HIPBLASLT_MATMUL_TILE_128x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1635,6 +1677,48 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_128x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_128x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_128x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x152", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x168", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x176", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x184", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x200", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x208", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x216", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x224", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x232", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x240", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x248", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x264", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x272", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x280", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x288", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x296", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x304", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x312", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x328", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x336", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x344", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x352", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x360", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x368", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x376", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x392", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x400", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x408", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x416", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x424", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x432", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x440", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x448", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x456", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x464", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x472", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x480", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x488", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x496", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x504", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_128x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From c8c9f15b2914c398fd8ac0c1f84a065beb4bbe0d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 5 Nov 2024 13:54:38 +0000 Subject: [PATCH 38/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 12 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 96 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 48 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 48 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 48 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 96 +++++++++++++++++++ 5 files changed, 336 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 648a7886..96fb0256 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12835,9 +12835,19 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_272x128", "CUBLASLT_MATMUL_TILE_264x64", "CUBLASLT_MATMUL_TILE_264x128", + "CUBLASLT_MATMUL_TILE_256x96", + "CUBLASLT_MATMUL_TILE_256x88", + "CUBLASLT_MATMUL_TILE_256x80", + "CUBLASLT_MATMUL_TILE_256x8", + "CUBLASLT_MATMUL_TILE_256x72", "CUBLASLT_MATMUL_TILE_256x64", + "CUBLASLT_MATMUL_TILE_256x56", + "CUBLASLT_MATMUL_TILE_256x48", + "CUBLASLT_MATMUL_TILE_256x40", "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x24", "CUBLASLT_MATMUL_TILE_256x192", + "CUBLASLT_MATMUL_TILE_256x16", "CUBLASLT_MATMUL_TILE_256x128", "CUBLASLT_MATMUL_TILE_24x768", "CUBLASLT_MATMUL_TILE_24x704", @@ -12872,10 +12882,48 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_200x64", "CUBLASLT_MATMUL_TILE_200x192", "CUBLASLT_MATMUL_TILE_200x128", + "CUBLASLT_MATMUL_TILE_192x96", + "CUBLASLT_MATMUL_TILE_192x88", + "CUBLASLT_MATMUL_TILE_192x80", + "CUBLASLT_MATMUL_TILE_192x8", + "CUBLASLT_MATMUL_TILE_192x72", "CUBLASLT_MATMUL_TILE_192x64", + "CUBLASLT_MATMUL_TILE_192x56", + "CUBLASLT_MATMUL_TILE_192x48", + "CUBLASLT_MATMUL_TILE_192x40", + "CUBLASLT_MATMUL_TILE_192x336", + "CUBLASLT_MATMUL_TILE_192x328", + "CUBLASLT_MATMUL_TILE_192x320", + "CUBLASLT_MATMUL_TILE_192x32", + "CUBLASLT_MATMUL_TILE_192x312", + "CUBLASLT_MATMUL_TILE_192x304", + "CUBLASLT_MATMUL_TILE_192x296", + "CUBLASLT_MATMUL_TILE_192x288", + "CUBLASLT_MATMUL_TILE_192x280", + "CUBLASLT_MATMUL_TILE_192x272", + "CUBLASLT_MATMUL_TILE_192x264", "CUBLASLT_MATMUL_TILE_192x256", + "CUBLASLT_MATMUL_TILE_192x248", + "CUBLASLT_MATMUL_TILE_192x240", + "CUBLASLT_MATMUL_TILE_192x24", + "CUBLASLT_MATMUL_TILE_192x232", + "CUBLASLT_MATMUL_TILE_192x224", + "CUBLASLT_MATMUL_TILE_192x216", + "CUBLASLT_MATMUL_TILE_192x208", + "CUBLASLT_MATMUL_TILE_192x200", "CUBLASLT_MATMUL_TILE_192x192", + "CUBLASLT_MATMUL_TILE_192x184", + "CUBLASLT_MATMUL_TILE_192x176", + "CUBLASLT_MATMUL_TILE_192x168", + "CUBLASLT_MATMUL_TILE_192x160", + "CUBLASLT_MATMUL_TILE_192x16", + "CUBLASLT_MATMUL_TILE_192x152", + "CUBLASLT_MATMUL_TILE_192x144", + "CUBLASLT_MATMUL_TILE_192x136", "CUBLASLT_MATMUL_TILE_192x128", + "CUBLASLT_MATMUL_TILE_192x120", + "CUBLASLT_MATMUL_TILE_192x112", + "CUBLASLT_MATMUL_TILE_192x104", "CUBLASLT_MATMUL_TILE_184x64", "CUBLASLT_MATMUL_TILE_184x256", "CUBLASLT_MATMUL_TILE_184x192", @@ -14654,9 +14702,19 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_272x128", "CUBLASLT_MATMUL_TILE_264x64", "CUBLASLT_MATMUL_TILE_264x128", + "CUBLASLT_MATMUL_TILE_256x96", + "CUBLASLT_MATMUL_TILE_256x88", + "CUBLASLT_MATMUL_TILE_256x80", + "CUBLASLT_MATMUL_TILE_256x8", + "CUBLASLT_MATMUL_TILE_256x72", "CUBLASLT_MATMUL_TILE_256x64", + "CUBLASLT_MATMUL_TILE_256x56", + "CUBLASLT_MATMUL_TILE_256x48", + "CUBLASLT_MATMUL_TILE_256x40", "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x24", "CUBLASLT_MATMUL_TILE_256x192", + "CUBLASLT_MATMUL_TILE_256x16", "CUBLASLT_MATMUL_TILE_256x128", "CUBLASLT_MATMUL_TILE_24x768", "CUBLASLT_MATMUL_TILE_24x704", @@ -14691,10 +14749,48 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_200x64", "CUBLASLT_MATMUL_TILE_200x192", "CUBLASLT_MATMUL_TILE_200x128", + "CUBLASLT_MATMUL_TILE_192x96", + "CUBLASLT_MATMUL_TILE_192x88", + "CUBLASLT_MATMUL_TILE_192x80", + "CUBLASLT_MATMUL_TILE_192x8", + "CUBLASLT_MATMUL_TILE_192x72", "CUBLASLT_MATMUL_TILE_192x64", + "CUBLASLT_MATMUL_TILE_192x56", + "CUBLASLT_MATMUL_TILE_192x48", + "CUBLASLT_MATMUL_TILE_192x40", + "CUBLASLT_MATMUL_TILE_192x336", + "CUBLASLT_MATMUL_TILE_192x328", + "CUBLASLT_MATMUL_TILE_192x320", + "CUBLASLT_MATMUL_TILE_192x32", + "CUBLASLT_MATMUL_TILE_192x312", + "CUBLASLT_MATMUL_TILE_192x304", + "CUBLASLT_MATMUL_TILE_192x296", + "CUBLASLT_MATMUL_TILE_192x288", + "CUBLASLT_MATMUL_TILE_192x280", + "CUBLASLT_MATMUL_TILE_192x272", + "CUBLASLT_MATMUL_TILE_192x264", "CUBLASLT_MATMUL_TILE_192x256", + "CUBLASLT_MATMUL_TILE_192x248", + "CUBLASLT_MATMUL_TILE_192x240", + "CUBLASLT_MATMUL_TILE_192x24", + "CUBLASLT_MATMUL_TILE_192x232", + "CUBLASLT_MATMUL_TILE_192x224", + "CUBLASLT_MATMUL_TILE_192x216", + "CUBLASLT_MATMUL_TILE_192x208", + "CUBLASLT_MATMUL_TILE_192x200", "CUBLASLT_MATMUL_TILE_192x192", + "CUBLASLT_MATMUL_TILE_192x184", + "CUBLASLT_MATMUL_TILE_192x176", + "CUBLASLT_MATMUL_TILE_192x168", + "CUBLASLT_MATMUL_TILE_192x160", + "CUBLASLT_MATMUL_TILE_192x16", + "CUBLASLT_MATMUL_TILE_192x152", + "CUBLASLT_MATMUL_TILE_192x144", + "CUBLASLT_MATMUL_TILE_192x136", "CUBLASLT_MATMUL_TILE_192x128", + "CUBLASLT_MATMUL_TILE_192x120", + "CUBLASLT_MATMUL_TILE_192x112", + "CUBLASLT_MATMUL_TILE_192x104", "CUBLASLT_MATMUL_TILE_184x64", "CUBLASLT_MATMUL_TILE_184x256", "CUBLASLT_MATMUL_TILE_184x192", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 5b24e3df..306c282f 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -451,10 +451,48 @@ |`CUBLASLT_MATMUL_TILE_184x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_184x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_184x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x232`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x248`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x264`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x272`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x280`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x288`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x296`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x304`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x312`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x328`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x336`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x64`|12.6| | | | | | | | | | @@ -489,9 +527,19 @@ |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x16`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x24`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_264x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_264x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_272x128`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index b5dddcea..177c71aa 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -451,10 +451,48 @@ |`CUBLASLT_MATMUL_TILE_184x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_184x256`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_184x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x152`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x160`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x168`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x176`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x184`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x200`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x208`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x216`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x224`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x232`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x240`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x248`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x256`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x264`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x272`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x280`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x288`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x296`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x304`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x312`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x320`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x328`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x336`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x64`|12.6| | | | | | | | | | | | | | | | @@ -489,9 +527,19 @@ |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x16`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x24`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_264x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_264x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_272x128`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index ffbdab8d..d59fb203 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -451,10 +451,48 @@ |`CUBLASLT_MATMUL_TILE_184x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_184x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_184x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x128`|11.3| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x232`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x248`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x256`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x264`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x272`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x280`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x288`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x296`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x304`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x312`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x320`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x328`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x336`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_192x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_192x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_200x64`|12.6| | | | | | | | | | @@ -489,9 +527,19 @@ |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x16`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x24`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_264x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_264x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_272x128`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index fa694c65..aaaa92ba 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -651,6 +651,54 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_128x496", {"HIPBLASLT_MATMUL_TILE_128x496", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_128x504", {"HIPBLASLT_MATMUL_TILE_128x504", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_128x512", {"HIPBLASLT_MATMUL_TILE_128x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x8", {"CUBLASLT_MATMUL_TILE_192x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x16", {"CUBLASLT_MATMUL_TILE_192x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x24", {"CUBLASLT_MATMUL_TILE_192x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x32", {"CUBLASLT_MATMUL_TILE_192x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x40", {"CUBLASLT_MATMUL_TILE_192x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x48", {"CUBLASLT_MATMUL_TILE_192x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x56", {"CUBLASLT_MATMUL_TILE_192x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x72", {"CUBLASLT_MATMUL_TILE_192x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x80", {"CUBLASLT_MATMUL_TILE_192x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x88", {"CUBLASLT_MATMUL_TILE_192x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x96", {"CUBLASLT_MATMUL_TILE_192x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x104", {"CUBLASLT_MATMUL_TILE_192x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x112", {"CUBLASLT_MATMUL_TILE_192x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x120", {"CUBLASLT_MATMUL_TILE_192x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x136", {"CUBLASLT_MATMUL_TILE_192x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x144", {"CUBLASLT_MATMUL_TILE_192x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x152", {"CUBLASLT_MATMUL_TILE_192x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x160", {"CUBLASLT_MATMUL_TILE_192x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x168", {"CUBLASLT_MATMUL_TILE_192x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x176", {"CUBLASLT_MATMUL_TILE_192x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x184", {"CUBLASLT_MATMUL_TILE_192x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x200", {"CUBLASLT_MATMUL_TILE_192x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x208", {"CUBLASLT_MATMUL_TILE_192x208", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x216", {"CUBLASLT_MATMUL_TILE_192x216", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x224", {"CUBLASLT_MATMUL_TILE_192x224", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x232", {"CUBLASLT_MATMUL_TILE_192x232", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x240", {"CUBLASLT_MATMUL_TILE_192x240", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x248", {"CUBLASLT_MATMUL_TILE_192x248", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x264", {"CUBLASLT_MATMUL_TILE_192x264", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x272", {"CUBLASLT_MATMUL_TILE_192x272", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x280", {"CUBLASLT_MATMUL_TILE_192x280", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x288", {"CUBLASLT_MATMUL_TILE_192x288", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x296", {"CUBLASLT_MATMUL_TILE_192x296", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x304", {"CUBLASLT_MATMUL_TILE_192x304", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x312", {"CUBLASLT_MATMUL_TILE_192x312", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x320", {"CUBLASLT_MATMUL_TILE_192x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x328", {"CUBLASLT_MATMUL_TILE_192x328", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x336", {"CUBLASLT_MATMUL_TILE_192x336", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x8", {"CUBLASLT_MATMUL_TILE_256x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x16", {"CUBLASLT_MATMUL_TILE_256x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x24", {"CUBLASLT_MATMUL_TILE_256x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x40", {"CUBLASLT_MATMUL_TILE_256x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x48", {"CUBLASLT_MATMUL_TILE_256x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x56", {"CUBLASLT_MATMUL_TILE_256x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x72", {"CUBLASLT_MATMUL_TILE_256x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x80", {"CUBLASLT_MATMUL_TILE_256x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x88", {"CUBLASLT_MATMUL_TILE_256x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x96", {"CUBLASLT_MATMUL_TILE_256x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1719,6 +1767,54 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_128x496", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_128x504", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_128x512", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x152", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x160", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x168", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x176", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x184", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x200", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x208", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x216", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x224", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x232", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x240", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x248", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x264", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x272", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x280", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x288", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x296", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x304", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x312", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x320", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x328", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_192x336", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From da870e0bd33529aa7330cfcc49ddfa356ba03814 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 5 Nov 2024 14:16:56 +0000 Subject: [PATCH 39/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 11 + `rocblas_(c|z)hemm_64` and `hipblas(C|Z)hemm_v2_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 ++++++++-------- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 ++++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 ++++---- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 ++++---- src/CUDA2HIP_BLAS_API_functions.cpp | 12 ++++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 14 ++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 14 ++++++++++++++ 7 files changed, 56 insertions(+), 24 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 96fb0256..320d15fb 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1632,7 +1632,9 @@ sub rocSubstitutions { subst("cublasChbmv_v2", "rocblas_chbmv", "library"); subst("cublasChbmv_v2_64", "rocblas_chbmv_64", "library"); subst("cublasChemm", "rocblas_chemm", "library"); + subst("cublasChemm_64", "rocblas_chemm_64", "library"); subst("cublasChemm_v2", "rocblas_chemm", "library"); + subst("cublasChemm_v2_64", "rocblas_chemm_64", "library"); subst("cublasChemv", "rocblas_chemv", "library"); subst("cublasChemv_64", "rocblas_chemv_64", "library"); subst("cublasChemv_v2", "rocblas_chemv", "library"); @@ -2198,7 +2200,9 @@ sub rocSubstitutions { subst("cublasZhbmv_v2", "rocblas_zhbmv", "library"); subst("cublasZhbmv_v2_64", "rocblas_zhbmv_64", "library"); subst("cublasZhemm", "rocblas_zhemm", "library"); + subst("cublasZhemm_64", "rocblas_zhemm_64", "library"); subst("cublasZhemm_v2", "rocblas_zhemm", "library"); + subst("cublasZhemm_v2_64", "rocblas_zhemm_64", "library"); subst("cublasZhemv", "rocblas_zhemv", "library"); subst("cublasZhemv_64", "rocblas_zhemv_64", "library"); subst("cublasZhemv_v2", "rocblas_zhemv", "library"); @@ -4418,7 +4422,9 @@ sub simpleSubstitutions { subst("cublasChbmv_v2", "hipblasChbmv_v2", "library"); subst("cublasChbmv_v2_64", "hipblasChbmv_v2_64", "library"); subst("cublasChemm", "hipblasChemm_v2", "library"); + subst("cublasChemm_64", "hipblasChemm_v2_64", "library"); subst("cublasChemm_v2", "hipblasChemm_v2", "library"); + subst("cublasChemm_v2_64", "hipblasChemm_v2_64", "library"); subst("cublasChemv", "hipblasChemv_v2", "library"); subst("cublasChemv_64", "hipblasChemv_v2_64", "library"); subst("cublasChemv_v2", "hipblasChemv_v2", "library"); @@ -4989,7 +4995,9 @@ sub simpleSubstitutions { subst("cublasZhbmv_v2", "hipblasZhbmv_v2", "library"); subst("cublasZhbmv_v2_64", "hipblasZhbmv_v2_64", "library"); subst("cublasZhemm", "hipblasZhemm_v2", "library"); + subst("cublasZhemm_64", "hipblasZhemm_v2_64", "library"); subst("cublasZhemm_v2", "hipblasZhemm_v2", "library"); + subst("cublasZhemm_v2_64", "hipblasZhemm_v2_64", "library"); subst("cublasZhemv", "hipblasZhemv_v2", "library"); subst("cublasZhemv_64", "hipblasZhemv_v2_64", "library"); subst("cublasZhemv_v2", "hipblasZhemv_v2", "library"); @@ -11628,8 +11636,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZtrmm_64", "cublasZtpttr", "cublasZmatinvBatched", - "cublasZhemm_v2_64", - "cublasZhemm_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZdgmm_64", @@ -11771,8 +11777,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCherkEx", "cublasCherk3mEx_64", "cublasCherk3mEx", - "cublasChemm_v2_64", - "cublasChemm_64", "cublasCgemmEx_64", "cublasCgemmEx", "cublasCgemm3m_64", @@ -13727,8 +13731,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZtrmm_64", "cublasZtpttr", "cublasZmatinvBatched", - "cublasZhemm_v2_64", - "cublasZhemm_64", "cublasZgetrsBatched", "cublasZgetriBatched", "cublasZgetrfBatched", @@ -13884,8 +13886,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCherkEx", "cublasCherk3mEx_64", "cublasCherk3mEx", - "cublasChemm_v2_64", - "cublasChemm_64", "cublasCgetrsBatched", "cublasCgetriBatched", "cublasCgetrfBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 306c282f..918f7186 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1495,9 +1495,9 @@ |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | | |`cublasCgemvStridedBatched_64`|12.0| | | |`hipblasCgemvStridedBatched_v2_64`|6.2.0| | | | | |`cublasChemm`| | | | |`hipblasChemm_v2`|6.0.0| | | | | -|`cublasChemm_64`|12.0| | | | | | | | | | +|`cublasChemm_64`|12.0| | | |`hipblasChemm_v2_64`|6.3.0| | | |6.3.0| |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | | -|`cublasChemm_v2_64`|12.0| | | | | | | | | | +|`cublasChemm_v2_64`|12.0| | | |`hipblasChemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCher2k`| | | | |`hipblasCher2k_v2`|6.0.0| | | | | |`cublasCher2k_64`|12.0| | | |`hipblasCher2k_v2_64`|6.3.0| | | |6.3.0| |`cublasCher2k_v2`| | | | |`hipblasCher2k_v2`|6.0.0| | | | | @@ -1641,9 +1641,9 @@ |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | | |`cublasZgemvStridedBatched_64`|12.0| | | |`hipblasZgemvStridedBatched_v2_64`|6.2.0| | | | | |`cublasZhemm`| | | | |`hipblasZhemm_v2`|6.0.0| | | | | -|`cublasZhemm_64`|12.0| | | | | | | | | | +|`cublasZhemm_64`|12.0| | | |`hipblasZhemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | | -|`cublasZhemm_v2_64`|12.0| | | | | | | | | | +|`cublasZhemm_v2_64`|12.0| | | |`hipblasZhemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZher2k`| | | | |`hipblasZher2k_v2`|6.0.0| | | | | |`cublasZher2k_64`|12.0| | | |`hipblasZher2k_v2_64`|6.3.0| | | |6.3.0| |`cublasZher2k_v2`| | | | |`hipblasZher2k_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 177c71aa..e94c800e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1495,9 +1495,9 @@ |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | |`cublasCgemvStridedBatched_64`|12.0| | | |`hipblasCgemvStridedBatched_v2_64`|6.2.0| | | | |`rocblas_cgemv_strided_batched_64`|6.2.0| | | | | |`cublasChemm`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | -|`cublasChemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChemm_64`|12.0| | | |`hipblasChemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_chemm_64`|6.3.0| | | |6.3.0| |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | -|`cublasChemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChemm_v2_64`|12.0| | | |`hipblasChemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_chemm_64`|6.3.0| | | |6.3.0| |`cublasCher2k`| | | | |`hipblasCher2k_v2`|6.0.0| | | | |`rocblas_cher2k`|3.5.0| | | | | |`cublasCher2k_64`|12.0| | | |`hipblasCher2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_cher2k_64`|6.3.0| | | |6.3.0| |`cublasCher2k_v2`| | | | |`hipblasCher2k_v2`|6.0.0| | | | |`rocblas_cher2k`|3.5.0| | | | | @@ -1641,9 +1641,9 @@ |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | |`cublasZgemvStridedBatched_64`|12.0| | | |`hipblasZgemvStridedBatched_v2_64`|6.2.0| | | | |`rocblas_zgemv_strided_batched_64`|6.2.0| | | | | |`cublasZhemm`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | -|`cublasZhemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhemm_64`|12.0| | | |`hipblasZhemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zhemm_64`|6.3.0| | | |6.3.0| |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | -|`cublasZhemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhemm_v2_64`|12.0| | | |`hipblasZhemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zhemm_64`|6.3.0| | | |6.3.0| |`cublasZher2k`| | | | |`hipblasZher2k_v2`|6.0.0| | | | |`rocblas_zher2k`|3.5.0| | | | | |`cublasZher2k_64`|12.0| | | |`hipblasZher2k_v2_64`|6.3.0| | | |6.3.0|`rocblas_zher2k_64`|6.3.0| | | |6.3.0| |`cublasZher2k_v2`| | | | |`hipblasZher2k_v2`|6.0.0| | | | |`rocblas_zher2k`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index d59fb203..fdac1cea 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1495,9 +1495,9 @@ |`cublasCgemvStridedBatched`|11.6| | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | |`cublasCgemvStridedBatched_64`|12.0| | | |`rocblas_cgemv_strided_batched_64`|6.2.0| | | | | |`cublasChemm`| | | | |`rocblas_chemm`|3.5.0| | | | | -|`cublasChemm_64`|12.0| | | | | | | | | | +|`cublasChemm_64`|12.0| | | |`rocblas_chemm_64`|6.3.0| | | |6.3.0| |`cublasChemm_v2`| | | | |`rocblas_chemm`|3.5.0| | | | | -|`cublasChemm_v2_64`|12.0| | | | | | | | | | +|`cublasChemm_v2_64`|12.0| | | |`rocblas_chemm_64`|6.3.0| | | |6.3.0| |`cublasCher2k`| | | | |`rocblas_cher2k`|3.5.0| | | | | |`cublasCher2k_64`|12.0| | | |`rocblas_cher2k_64`|6.3.0| | | |6.3.0| |`cublasCher2k_v2`| | | | |`rocblas_cher2k`|3.5.0| | | | | @@ -1641,9 +1641,9 @@ |`cublasZgemvStridedBatched`|11.6| | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | |`cublasZgemvStridedBatched_64`|12.0| | | |`rocblas_zgemv_strided_batched_64`|6.2.0| | | | | |`cublasZhemm`| | | | |`rocblas_zhemm`|3.5.0| | | | | -|`cublasZhemm_64`|12.0| | | | | | | | | | +|`cublasZhemm_64`|12.0| | | |`rocblas_zhemm_64`|6.3.0| | | |6.3.0| |`cublasZhemm_v2`| | | | |`rocblas_zhemm`|3.5.0| | | | | -|`cublasZhemm_v2_64`|12.0| | | | | | | | | | +|`cublasZhemm_v2_64`|12.0| | | |`rocblas_zhemm_64`|6.3.0| | | |6.3.0| |`cublasZher2k`| | | | |`rocblas_zher2k`|3.5.0| | | | | |`cublasZher2k_64`|12.0| | | |`rocblas_zher2k_64`|6.3.0| | | |6.3.0| |`cublasZher2k_v2`| | | | |`rocblas_zher2k`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 6b8bdf55..3d12911d 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -535,9 +535,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HEMM {"cublasChemm", {"hipblasChemm_v2", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChemm_64", {"hipblasChemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasChemm_64", {"hipblasChemm_v2_64", "rocblas_chemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZhemm", {"hipblasZhemm_v2", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhemm_64", {"hipblasZhemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZhemm_64", {"hipblasZhemm_v2_64", "rocblas_zhemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // TRSM {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -902,9 +902,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // HEMM {"cublasChemm_v2", {"hipblasChemm_v2", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasChemm_v2_64", {"hipblasChemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasChemm_v2_64", {"hipblasChemm_v2_64", "rocblas_chemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZhemm_v2", {"hipblasZhemm_v2", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZhemm_v2_64", {"hipblasZhemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZhemm_v2_64", {"hipblasZhemm_v2_64", "rocblas_zhemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // TRSM {"cublasStrsm_v2", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2064,6 +2064,8 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCgeam_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZgeam_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasChemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZhemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2493,6 +2495,8 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_chemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zhemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 0ce9f3ea..7783384f 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3070,6 +3070,20 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeam_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* beta, const hipDoubleComplex* BP, int64_t ldb, hipDoubleComplex* CP, int64_t ldc); // CHECK: blasStatus = hipblasZgeam_v2_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZgeam_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChemm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasChemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasChemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasChemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasChemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhemm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index de82f856..ffbcfb63 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3275,6 +3275,20 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeam_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* beta, const rocblas_double_complex* B, int64_t ldb, rocblas_double_complex* C, int64_t ldc); // CHECK: blasStatus = rocblas_zgeam_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZgeam_64(blasHandle, transa, transb, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexb, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chemm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_chemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_chemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasChemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + blasStatus = cublasChemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexb, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhemm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_zhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + blasStatus = cublasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); #endif return 0; From 59b239f7a2c3c1a26a27b9dcc6b3429433f61c3c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 6 Nov 2024 18:33:32 +0100 Subject: [PATCH 40/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 12 + `rocblas_(s|d|c|z)trmm_64` and `hipblas(S|D|C|Z)trmm(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 32 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++----- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 24 +++++++++----- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 ++++++++++++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 ++++++++++++++++ 7 files changed, 112 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 320d15fb..64f19bb9 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1736,7 +1736,9 @@ sub rocSubstitutions { subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); subst("cublasCtpsv_v2_64", "rocblas_ctpsv_64", "library"); subst("cublasCtrmm", "rocblas_ctrmm", "library"); + subst("cublasCtrmm_64", "rocblas_ctrmm_64", "library"); subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); + subst("cublasCtrmm_v2_64", "rocblas_ctrmm_64", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); subst("cublasCtrmv_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); @@ -1883,7 +1885,9 @@ sub rocSubstitutions { subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); subst("cublasDtpsv_v2_64", "rocblas_dtpsv_64", "library"); subst("cublasDtrmm", "rocblas_dtrmm", "library"); + subst("cublasDtrmm_64", "rocblas_dtrmm_64", "library"); subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); + subst("cublasDtrmm_v2_64", "rocblas_dtrmm_64", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); subst("cublasDtrmv_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); @@ -2117,7 +2121,9 @@ sub rocSubstitutions { subst("cublasStpsv_v2", "rocblas_stpsv", "library"); subst("cublasStpsv_v2_64", "rocblas_stpsv_64", "library"); subst("cublasStrmm", "rocblas_strmm", "library"); + subst("cublasStrmm_64", "rocblas_strmm_64", "library"); subst("cublasStrmm_v2", "rocblas_strmm", "library"); + subst("cublasStrmm_v2_64", "rocblas_strmm_64", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); subst("cublasStrmv_64", "rocblas_strmv_64", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); @@ -2294,7 +2300,9 @@ sub rocSubstitutions { subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); subst("cublasZtpsv_v2_64", "rocblas_ztpsv_64", "library"); subst("cublasZtrmm", "rocblas_ztrmm", "library"); + subst("cublasZtrmm_64", "rocblas_ztrmm_64", "library"); subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); + subst("cublasZtrmm_v2_64", "rocblas_ztrmm_64", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); subst("cublasZtrmv_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); @@ -4526,7 +4534,9 @@ sub simpleSubstitutions { subst("cublasCtpsv_v2", "hipblasCtpsv_v2", "library"); subst("cublasCtpsv_v2_64", "hipblasCtpsv_v2_64", "library"); subst("cublasCtrmm", "hipblasCtrmm_v2", "library"); + subst("cublasCtrmm_64", "hipblasCtrmm_v2_64", "library"); subst("cublasCtrmm_v2", "hipblasCtrmm_v2", "library"); + subst("cublasCtrmm_v2_64", "hipblasCtrmm_v2_64", "library"); subst("cublasCtrmv", "hipblasCtrmv_v2", "library"); subst("cublasCtrmv_64", "hipblasCtrmv_v2_64", "library"); subst("cublasCtrmv_v2", "hipblasCtrmv_v2", "library"); @@ -4674,7 +4684,9 @@ sub simpleSubstitutions { subst("cublasDtpsv_v2", "hipblasDtpsv", "library"); subst("cublasDtpsv_v2_64", "hipblasDtpsv_64", "library"); subst("cublasDtrmm", "hipblasDtrmm", "library"); + subst("cublasDtrmm_64", "hipblasDtrmm_64", "library"); subst("cublasDtrmm_v2", "hipblasDtrmm", "library"); + subst("cublasDtrmm_v2_64", "hipblasDtrmm_64", "library"); subst("cublasDtrmv", "hipblasDtrmv", "library"); subst("cublasDtrmv_64", "hipblasDtrmv_64", "library"); subst("cublasDtrmv_v2", "hipblasDtrmv", "library"); @@ -4919,7 +4931,9 @@ sub simpleSubstitutions { subst("cublasStpsv_v2", "hipblasStpsv", "library"); subst("cublasStpsv_v2_64", "hipblasStpsv_64", "library"); subst("cublasStrmm", "hipblasStrmm", "library"); + subst("cublasStrmm_64", "hipblasStrmm_64", "library"); subst("cublasStrmm_v2", "hipblasStrmm", "library"); + subst("cublasStrmm_v2_64", "hipblasStrmm_64", "library"); subst("cublasStrmv", "hipblasStrmv", "library"); subst("cublasStrmv_64", "hipblasStrmv_64", "library"); subst("cublasStrmv_v2", "hipblasStrmv", "library"); @@ -5089,7 +5103,9 @@ sub simpleSubstitutions { subst("cublasZtpsv_v2", "hipblasZtpsv_v2", "library"); subst("cublasZtpsv_v2_64", "hipblasZtpsv_v2_64", "library"); subst("cublasZtrmm", "hipblasZtrmm_v2", "library"); + subst("cublasZtrmm_64", "hipblasZtrmm_v2_64", "library"); subst("cublasZtrmm_v2", "hipblasZtrmm_v2", "library"); + subst("cublasZtrmm_v2_64", "hipblasZtrmm_v2_64", "library"); subst("cublasZtrmv", "hipblasZtrmv_v2", "library"); subst("cublasZtrmv_64", "hipblasZtrmv_v2_64", "library"); subst("cublasZtrmv_v2", "hipblasZtrmv_v2", "library"); @@ -11632,8 +11648,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZtrsm_v2_64", "cublasZtrsm_64", "cublasZtrsmBatched_64", - "cublasZtrmm_v2_64", - "cublasZtrmm_64", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgemm3m_64", @@ -11655,8 +11669,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasStrsm_v2_64", "cublasStrsm_64", "cublasStrsmBatched_64", - "cublasStrmm_v2_64", - "cublasStrmm_64", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -11751,8 +11763,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDtrsm_v2_64", "cublasDtrsm_64", "cublasDtrsmBatched_64", - "cublasDtrmm_v2_64", - "cublasDtrmm_64", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", @@ -11762,8 +11772,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCtrsm_v2_64", "cublasCtrsm_64", "cublasCtrsmBatched_64", - "cublasCtrmm_v2_64", - "cublasCtrmm_64", "cublasCtpttr", "cublasCsyrkEx_64", "cublasCsyrkEx", @@ -13727,8 +13735,6 @@ sub warnRocOnlyUnsupportedFunctions { "cudnnAddTensor", "cudnnActivationStruct", "cublasZtrttp", - "cublasZtrmm_v2_64", - "cublasZtrmm_64", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgetrsBatched", @@ -13744,8 +13750,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrmm_v2_64", - "cublasStrmm_64", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -13859,8 +13863,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrmm_v2_64", - "cublasDtrmm_64", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgetrsBatched", @@ -13872,8 +13874,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgelsBatched", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrmm_v2_64", - "cublasCtrmm_64", "cublasCtpttr", "cublasCsyrkEx_64", "cublasCsyrkEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 918f7186..7561d5c8 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1523,9 +1523,9 @@ |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | | |`cublasCsyrkx_64`|12.0| | | |`hipblasCsyrkx_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | -|`cublasCtrmm_64`|12.0| | | | | | | | | | +|`cublasCtrmm_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | -|`cublasCtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmm_v2_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | | @@ -1559,9 +1559,9 @@ |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | |`hipblasDsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | -|`cublasDtrmm_64`|12.0| | | | | | | | | | +|`cublasDtrmm_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | -|`cublasDtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmm_v2_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | | @@ -1611,9 +1611,9 @@ |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | |`hipblasSsyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | -|`cublasStrmm_64`|12.0| | | | | | | | | | +|`cublasStrmm_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0| |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | -|`cublasStrmm_v2_64`|12.0| | | | | | | | | | +|`cublasStrmm_v2_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | | |`cublasStrsm_64`|12.0| | | | | | | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | | @@ -1669,9 +1669,9 @@ |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | | |`cublasZsyrkx_64`|12.0| | | |`hipblasZsyrkx_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | -|`cublasZtrmm_64`|12.0| | | | | | | | | | +|`cublasZtrmm_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | -|`cublasZtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmm_v2_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index e94c800e..ae05ad0e 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1523,9 +1523,9 @@ |`cublasCsyrkx`| | | | |`hipblasCsyrkx_v2`|6.0.0| | | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | |`hipblasCsyrkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_csyrkx_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrmm_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrmm_v2_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | @@ -1559,9 +1559,9 @@ |`cublasDsyrkx`| | | | |`hipblasDsyrkx`|3.5.0| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | |`hipblasDsyrkx_64`|6.3.0| | | |6.3.0|`rocblas_dsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrmm_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0|`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrmm_v2_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0|`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | @@ -1611,9 +1611,9 @@ |`cublasSsyrkx`| | | | |`hipblasSsyrkx`|3.5.0| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | |`hipblasSsyrkx_64`|6.3.0| | | |6.3.0|`rocblas_ssyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrmm_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0|`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrmm_v2_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0|`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | |`cublasStrsm_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | @@ -1669,9 +1669,9 @@ |`cublasZsyrkx`| | | | |`hipblasZsyrkx_v2`|6.0.0| | | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | |`hipblasZsyrkx_v2_64`|6.3.0| | | |6.3.0|`rocblas_zsyrkx_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrmm_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrmm_v2_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index fdac1cea..faf0f611 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1523,9 +1523,9 @@ |`cublasCsyrkx`| | | | |`rocblas_csyrkx`|3.5.0| | | | | |`cublasCsyrkx_64`|12.0| | | |`rocblas_csyrkx_64`|6.3.0| | | |6.3.0| |`cublasCtrmm`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_64`|12.0| | | | | | | | | | +|`cublasCtrmm_64`|12.0| | | |`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrmm_v2`| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | -|`cublasCtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmm_v2_64`|12.0| | | |`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`rocblas_ctrsm`|3.5.0| | | | | |`cublasCtrsm_64`|12.0| | | |`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`rocblas_ctrsm`|3.5.0| | | | | @@ -1559,9 +1559,9 @@ |`cublasDsyrkx`| | | | |`rocblas_dsyrkx`|3.5.0| | | | | |`cublasDsyrkx_64`|12.0| | | |`rocblas_dsyrkx_64`|6.3.0| | | |6.3.0| |`cublasDtrmm`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_64`|12.0| | | | | | | | | | +|`cublasDtrmm_64`|12.0| | | |`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrmm_v2`| | | | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | -|`cublasDtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmm_v2_64`|12.0| | | |`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_64`|12.0| | | |`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`rocblas_dtrsm`|1.5.0| | | | | @@ -1611,9 +1611,9 @@ |`cublasSsyrkx`| | | | |`rocblas_ssyrkx`|3.5.0| | | | | |`cublasSsyrkx_64`|12.0| | | |`rocblas_ssyrkx_64`|6.3.0| | | |6.3.0| |`cublasStrmm`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_64`|12.0| | | | | | | | | | +|`cublasStrmm_64`|12.0| | | |`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrmm_v2`| | | | |`rocblas_strmm`|3.5.0| |6.0.0| | | -|`cublasStrmm_v2_64`|12.0| | | | | | | | | | +|`cublasStrmm_v2_64`|12.0| | | |`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`rocblas_strsm`|1.5.0| | | | | |`cublasStrsm_64`|12.0| | | |`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`rocblas_strsm`|1.5.0| | | | | @@ -1669,9 +1669,9 @@ |`cublasZsyrkx`| | | | |`rocblas_zsyrkx`|3.5.0| | | | | |`cublasZsyrkx_64`|12.0| | | |`rocblas_zsyrkx_64`|6.3.0| | | |6.3.0| |`cublasZtrmm`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_64`|12.0| | | | | | | | | | +|`cublasZtrmm_64`|12.0| | | |`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrmm_v2`| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | -|`cublasZtrmm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmm_v2_64`|12.0| | | |`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`rocblas_ztrsm`|3.5.0| | | | | |`cublasZtrsm_64`|12.0| | | |`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`rocblas_ztrsm`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 3d12911d..436db2a6 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -551,13 +551,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMM {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrmm_64", {"hipblasStrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrmm_64", {"hipblasStrmm_64", "rocblas_strmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrmm_64", {"hipblasDtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrmm_64", {"hipblasDtrmm_64", "rocblas_dtrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCtrmm", {"hipblasCtrmm_v2", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrmm_64", {"hipblasCtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrmm_64", {"hipblasCtrmm_v2_64", "rocblas_ctrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZtrmm", {"hipblasZtrmm_v2", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrmm_64", {"hipblasZtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrmm_64", {"hipblasZtrmm_v2_64", "rocblas_ztrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) // GEAM @@ -918,13 +918,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMM {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasStrmm_v2_64", {"hipblasStrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasStrmm_v2_64", {"hipblasStrmm_64", "rocblas_strmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDtrmm_v2_64", {"hipblasDtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDtrmm_v2_64", {"hipblasDtrmm_64", "rocblas_dtrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCtrmm_v2", {"hipblasCtrmm_v2", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCtrmm_v2_64", {"hipblasCtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCtrmm_v2_64", {"hipblasCtrmm_v2_64", "rocblas_ctrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZtrmm_v2", {"hipblasZtrmm_v2", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZtrmm_v2_64", {"hipblasZtrmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZtrmm_v2_64", {"hipblasZtrmm_v2_64", "rocblas_ztrmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // NRM2 {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, @@ -2066,6 +2066,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasZgeam_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasChemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZhemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2497,6 +2501,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zgeam_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_chemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zhemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_strmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ctrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ztrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 7783384f..89919766 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3084,6 +3084,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrmm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // CHECK: blasStatus = hipblasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // CHECK: blasStatus = hipblasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* A, int64_t lda, const hipComplex* B, int64_t ldb, hipComplex* C, int64_t ldc); + // CHECK: blasStatus = hipblasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* A, int64_t lda, const hipDoubleComplex* B, int64_t ldb, hipDoubleComplex* C, int64_t ldc); + // CHECK: blasStatus = hipblasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = hipblasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index ffbcfb63..5ad41633 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3289,6 +3289,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); blasStatus = cublasZhemm_v2_64(blasHandle, blasSideMode, blasFillMode, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexb, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_strmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_strmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + blasStatus = cublasStrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64, &fC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_dtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + blasStatus = cublasDtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64, &dC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ctrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ctrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + blasStatus = cublasCtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ztrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + // CHECK-NEXT: blasStatus = rocblas_ztrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); #endif return 0; From 8ea24215c156c1e1957960b31ba720147f96e66d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 6 Nov 2024 21:38:05 +0100 Subject: [PATCH 41/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 13 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 84 ++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 42 ++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 42 ++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 42 ++++ src/CUDA2HIP_BLAS_API_types.cpp | 180 +++++++++++++----- 5 files changed, 342 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 64f19bb9..8a4afeb6 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12800,6 +12800,7 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_408x64", "CUBLASLT_MATMUL_TILE_400x64", "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x8", "CUBLASLT_MATMUL_TILE_384x64", "CUBLASLT_MATMUL_TILE_384x128", "CUBLASLT_MATMUL_TILE_376x64", @@ -12831,8 +12832,31 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_32x128", "CUBLASLT_MATMUL_TILE_328x64", "CUBLASLT_MATMUL_TILE_328x128", + "CUBLASLT_MATMUL_TILE_320x96", + "CUBLASLT_MATMUL_TILE_320x88", + "CUBLASLT_MATMUL_TILE_320x80", + "CUBLASLT_MATMUL_TILE_320x8", + "CUBLASLT_MATMUL_TILE_320x72", "CUBLASLT_MATMUL_TILE_320x64", + "CUBLASLT_MATMUL_TILE_320x56", + "CUBLASLT_MATMUL_TILE_320x48", + "CUBLASLT_MATMUL_TILE_320x40", + "CUBLASLT_MATMUL_TILE_320x32", + "CUBLASLT_MATMUL_TILE_320x24", + "CUBLASLT_MATMUL_TILE_320x200", + "CUBLASLT_MATMUL_TILE_320x192", + "CUBLASLT_MATMUL_TILE_320x184", + "CUBLASLT_MATMUL_TILE_320x176", + "CUBLASLT_MATMUL_TILE_320x168", + "CUBLASLT_MATMUL_TILE_320x160", + "CUBLASLT_MATMUL_TILE_320x16", + "CUBLASLT_MATMUL_TILE_320x152", + "CUBLASLT_MATMUL_TILE_320x144", + "CUBLASLT_MATMUL_TILE_320x136", "CUBLASLT_MATMUL_TILE_320x128", + "CUBLASLT_MATMUL_TILE_320x120", + "CUBLASLT_MATMUL_TILE_320x112", + "CUBLASLT_MATMUL_TILE_320x104", "CUBLASLT_MATMUL_TILE_312x64", "CUBLASLT_MATMUL_TILE_312x128", "CUBLASLT_MATMUL_TILE_304x64", @@ -12857,10 +12881,28 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_256x48", "CUBLASLT_MATMUL_TILE_256x40", "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x256", + "CUBLASLT_MATMUL_TILE_256x248", + "CUBLASLT_MATMUL_TILE_256x240", "CUBLASLT_MATMUL_TILE_256x24", + "CUBLASLT_MATMUL_TILE_256x232", + "CUBLASLT_MATMUL_TILE_256x224", + "CUBLASLT_MATMUL_TILE_256x216", + "CUBLASLT_MATMUL_TILE_256x208", + "CUBLASLT_MATMUL_TILE_256x200", "CUBLASLT_MATMUL_TILE_256x192", + "CUBLASLT_MATMUL_TILE_256x184", + "CUBLASLT_MATMUL_TILE_256x176", + "CUBLASLT_MATMUL_TILE_256x168", + "CUBLASLT_MATMUL_TILE_256x160", "CUBLASLT_MATMUL_TILE_256x16", + "CUBLASLT_MATMUL_TILE_256x152", + "CUBLASLT_MATMUL_TILE_256x144", + "CUBLASLT_MATMUL_TILE_256x136", "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_256x120", + "CUBLASLT_MATMUL_TILE_256x112", + "CUBLASLT_MATMUL_TILE_256x104", "CUBLASLT_MATMUL_TILE_24x768", "CUBLASLT_MATMUL_TILE_24x704", "CUBLASLT_MATMUL_TILE_24x640", @@ -14655,6 +14697,7 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_408x64", "CUBLASLT_MATMUL_TILE_400x64", "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x8", "CUBLASLT_MATMUL_TILE_384x64", "CUBLASLT_MATMUL_TILE_384x128", "CUBLASLT_MATMUL_TILE_376x64", @@ -14686,8 +14729,31 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_32x128", "CUBLASLT_MATMUL_TILE_328x64", "CUBLASLT_MATMUL_TILE_328x128", + "CUBLASLT_MATMUL_TILE_320x96", + "CUBLASLT_MATMUL_TILE_320x88", + "CUBLASLT_MATMUL_TILE_320x80", + "CUBLASLT_MATMUL_TILE_320x8", + "CUBLASLT_MATMUL_TILE_320x72", "CUBLASLT_MATMUL_TILE_320x64", + "CUBLASLT_MATMUL_TILE_320x56", + "CUBLASLT_MATMUL_TILE_320x48", + "CUBLASLT_MATMUL_TILE_320x40", + "CUBLASLT_MATMUL_TILE_320x32", + "CUBLASLT_MATMUL_TILE_320x24", + "CUBLASLT_MATMUL_TILE_320x200", + "CUBLASLT_MATMUL_TILE_320x192", + "CUBLASLT_MATMUL_TILE_320x184", + "CUBLASLT_MATMUL_TILE_320x176", + "CUBLASLT_MATMUL_TILE_320x168", + "CUBLASLT_MATMUL_TILE_320x160", + "CUBLASLT_MATMUL_TILE_320x16", + "CUBLASLT_MATMUL_TILE_320x152", + "CUBLASLT_MATMUL_TILE_320x144", + "CUBLASLT_MATMUL_TILE_320x136", "CUBLASLT_MATMUL_TILE_320x128", + "CUBLASLT_MATMUL_TILE_320x120", + "CUBLASLT_MATMUL_TILE_320x112", + "CUBLASLT_MATMUL_TILE_320x104", "CUBLASLT_MATMUL_TILE_312x64", "CUBLASLT_MATMUL_TILE_312x128", "CUBLASLT_MATMUL_TILE_304x64", @@ -14712,10 +14778,28 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_256x48", "CUBLASLT_MATMUL_TILE_256x40", "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x256", + "CUBLASLT_MATMUL_TILE_256x248", + "CUBLASLT_MATMUL_TILE_256x240", "CUBLASLT_MATMUL_TILE_256x24", + "CUBLASLT_MATMUL_TILE_256x232", + "CUBLASLT_MATMUL_TILE_256x224", + "CUBLASLT_MATMUL_TILE_256x216", + "CUBLASLT_MATMUL_TILE_256x208", + "CUBLASLT_MATMUL_TILE_256x200", "CUBLASLT_MATMUL_TILE_256x192", + "CUBLASLT_MATMUL_TILE_256x184", + "CUBLASLT_MATMUL_TILE_256x176", + "CUBLASLT_MATMUL_TILE_256x168", + "CUBLASLT_MATMUL_TILE_256x160", "CUBLASLT_MATMUL_TILE_256x16", + "CUBLASLT_MATMUL_TILE_256x152", + "CUBLASLT_MATMUL_TILE_256x144", + "CUBLASLT_MATMUL_TILE_256x136", "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_256x120", + "CUBLASLT_MATMUL_TILE_256x112", + "CUBLASLT_MATMUL_TILE_256x104", "CUBLASLT_MATMUL_TILE_24x768", "CUBLASLT_MATMUL_TILE_24x704", "CUBLASLT_MATMUL_TILE_24x640", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 7561d5c8..0076949a 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -526,10 +526,28 @@ |`CUBLASLT_MATMUL_TILE_24x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x152`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x232`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x248`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x40`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x48`|12.6| | | | | | | | | | @@ -554,8 +572,31 @@ |`CUBLASLT_MATMUL_TILE_304x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_312x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_312x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_320x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x184`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_320x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_328x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_328x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | @@ -587,6 +628,7 @@ |`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x8`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index ae05ad0e..09ee7a09 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -526,10 +526,28 @@ |`CUBLASLT_MATMUL_TILE_24x640`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x152`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x160`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x168`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x176`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x184`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x200`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x208`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x216`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x224`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x232`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x240`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x248`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x256`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x40`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x48`|12.6| | | | | | | | | | | | | | | | @@ -554,8 +572,31 @@ |`CUBLASLT_MATMUL_TILE_304x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_312x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_312x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_320x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x152`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x160`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x168`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x176`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x184`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x192`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x200`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_320x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_328x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_328x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | | | | | | | @@ -587,6 +628,7 @@ |`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x8`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index faf0f611..182d4f23 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -526,10 +526,28 @@ |`CUBLASLT_MATMUL_TILE_24x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_24x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x128`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x152`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x184`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x208`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x216`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x224`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x232`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x240`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x248`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_256x256`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x32`|12.1| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x40`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_256x48`|12.6| | | | | | | | | | @@ -554,8 +572,31 @@ |`CUBLASLT_MATMUL_TILE_304x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_312x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_312x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_320x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x176`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x184`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x192`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x200`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_320x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_320x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_328x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_328x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_32x128`|10.1| | | | | | | | | | @@ -587,6 +628,7 @@ |`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x8`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index aaaa92ba..7ef3c214 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -651,54 +651,96 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_128x496", {"HIPBLASLT_MATMUL_TILE_128x496", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_128x504", {"HIPBLASLT_MATMUL_TILE_128x504", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_128x512", {"HIPBLASLT_MATMUL_TILE_128x512", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x8", {"CUBLASLT_MATMUL_TILE_192x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x16", {"CUBLASLT_MATMUL_TILE_192x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x24", {"CUBLASLT_MATMUL_TILE_192x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x32", {"CUBLASLT_MATMUL_TILE_192x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x40", {"CUBLASLT_MATMUL_TILE_192x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x48", {"CUBLASLT_MATMUL_TILE_192x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x56", {"CUBLASLT_MATMUL_TILE_192x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x72", {"CUBLASLT_MATMUL_TILE_192x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x80", {"CUBLASLT_MATMUL_TILE_192x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x88", {"CUBLASLT_MATMUL_TILE_192x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x96", {"CUBLASLT_MATMUL_TILE_192x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x104", {"CUBLASLT_MATMUL_TILE_192x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x112", {"CUBLASLT_MATMUL_TILE_192x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x120", {"CUBLASLT_MATMUL_TILE_192x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x136", {"CUBLASLT_MATMUL_TILE_192x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x144", {"CUBLASLT_MATMUL_TILE_192x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x152", {"CUBLASLT_MATMUL_TILE_192x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x160", {"CUBLASLT_MATMUL_TILE_192x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x168", {"CUBLASLT_MATMUL_TILE_192x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x176", {"CUBLASLT_MATMUL_TILE_192x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x184", {"CUBLASLT_MATMUL_TILE_192x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x200", {"CUBLASLT_MATMUL_TILE_192x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x208", {"CUBLASLT_MATMUL_TILE_192x208", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x216", {"CUBLASLT_MATMUL_TILE_192x216", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x224", {"CUBLASLT_MATMUL_TILE_192x224", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x232", {"CUBLASLT_MATMUL_TILE_192x232", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x240", {"CUBLASLT_MATMUL_TILE_192x240", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x248", {"CUBLASLT_MATMUL_TILE_192x248", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x264", {"CUBLASLT_MATMUL_TILE_192x264", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x272", {"CUBLASLT_MATMUL_TILE_192x272", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x280", {"CUBLASLT_MATMUL_TILE_192x280", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x288", {"CUBLASLT_MATMUL_TILE_192x288", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x296", {"CUBLASLT_MATMUL_TILE_192x296", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x304", {"CUBLASLT_MATMUL_TILE_192x304", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x312", {"CUBLASLT_MATMUL_TILE_192x312", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x320", {"CUBLASLT_MATMUL_TILE_192x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x328", {"CUBLASLT_MATMUL_TILE_192x328", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_192x336", {"CUBLASLT_MATMUL_TILE_192x336", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x8", {"CUBLASLT_MATMUL_TILE_256x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x16", {"CUBLASLT_MATMUL_TILE_256x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x24", {"CUBLASLT_MATMUL_TILE_256x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x40", {"CUBLASLT_MATMUL_TILE_256x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x48", {"CUBLASLT_MATMUL_TILE_256x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x56", {"CUBLASLT_MATMUL_TILE_256x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x72", {"CUBLASLT_MATMUL_TILE_256x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x80", {"CUBLASLT_MATMUL_TILE_256x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x88", {"CUBLASLT_MATMUL_TILE_256x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, - {"CUBLASLT_MATMUL_TILE_256x96", {"CUBLASLT_MATMUL_TILE_256x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x8", {"HIPBLASLT_MATMUL_TILE_192x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x16", {"HIPBLASLT_MATMUL_TILE_192x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x24", {"HIPBLASLT_MATMUL_TILE_192x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x32", {"HIPBLASLT_MATMUL_TILE_192x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x40", {"HIPBLASLT_MATMUL_TILE_192x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x48", {"HIPBLASLT_MATMUL_TILE_192x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x56", {"HIPBLASLT_MATMUL_TILE_192x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x72", {"HIPBLASLT_MATMUL_TILE_192x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x80", {"HIPBLASLT_MATMUL_TILE_192x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x88", {"HIPBLASLT_MATMUL_TILE_192x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x96", {"HIPBLASLT_MATMUL_TILE_192x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x104", {"HIPBLASLT_MATMUL_TILE_192x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x112", {"HIPBLASLT_MATMUL_TILE_192x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x120", {"HIPBLASLT_MATMUL_TILE_192x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x136", {"HIPBLASLT_MATMUL_TILE_192x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x144", {"HIPBLASLT_MATMUL_TILE_192x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x152", {"HIPBLASLT_MATMUL_TILE_192x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x160", {"HIPBLASLT_MATMUL_TILE_192x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x168", {"HIPBLASLT_MATMUL_TILE_192x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x176", {"HIPBLASLT_MATMUL_TILE_192x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x184", {"HIPBLASLT_MATMUL_TILE_192x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x200", {"HIPBLASLT_MATMUL_TILE_192x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x208", {"HIPBLASLT_MATMUL_TILE_192x208", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x216", {"HIPBLASLT_MATMUL_TILE_192x216", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x224", {"HIPBLASLT_MATMUL_TILE_192x224", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x232", {"HIPBLASLT_MATMUL_TILE_192x232", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x240", {"HIPBLASLT_MATMUL_TILE_192x240", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x248", {"HIPBLASLT_MATMUL_TILE_192x248", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x264", {"HIPBLASLT_MATMUL_TILE_192x264", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x272", {"HIPBLASLT_MATMUL_TILE_192x272", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x280", {"HIPBLASLT_MATMUL_TILE_192x280", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x288", {"HIPBLASLT_MATMUL_TILE_192x288", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x296", {"HIPBLASLT_MATMUL_TILE_192x296", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x304", {"HIPBLASLT_MATMUL_TILE_192x304", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x312", {"HIPBLASLT_MATMUL_TILE_192x312", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x320", {"HIPBLASLT_MATMUL_TILE_192x320", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x328", {"HIPBLASLT_MATMUL_TILE_192x328", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_192x336", {"HIPBLASLT_MATMUL_TILE_192x336", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x8", {"HIPBLASLT_MATMUL_TILE_256x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x16", {"HIPBLASLT_MATMUL_TILE_256x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x24", {"HIPBLASLT_MATMUL_TILE_256x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x40", {"HIPBLASLT_MATMUL_TILE_256x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x48", {"HIPBLASLT_MATMUL_TILE_256x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x56", {"HIPBLASLT_MATMUL_TILE_256x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x72", {"HIPBLASLT_MATMUL_TILE_256x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x80", {"HIPBLASLT_MATMUL_TILE_256x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x88", {"HIPBLASLT_MATMUL_TILE_256x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x96", {"HIPBLASLT_MATMUL_TILE_256x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x104", {"HIPBLASLT_MATMUL_TILE_256x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x112", {"HIPBLASLT_MATMUL_TILE_256x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x120", {"HIPBLASLT_MATMUL_TILE_256x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x136", {"HIPBLASLT_MATMUL_TILE_256x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x144", {"HIPBLASLT_MATMUL_TILE_256x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x152", {"HIPBLASLT_MATMUL_TILE_256x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x160", {"HIPBLASLT_MATMUL_TILE_256x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x168", {"HIPBLASLT_MATMUL_TILE_256x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x176", {"HIPBLASLT_MATMUL_TILE_256x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x184", {"HIPBLASLT_MATMUL_TILE_256x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x200", {"HIPBLASLT_MATMUL_TILE_256x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x208", {"HIPBLASLT_MATMUL_TILE_256x208", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x216", {"HIPBLASLT_MATMUL_TILE_256x216", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x224", {"HIPBLASLT_MATMUL_TILE_256x224", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x232", {"HIPBLASLT_MATMUL_TILE_256x232", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x240", {"HIPBLASLT_MATMUL_TILE_256x240", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x248", {"HIPBLASLT_MATMUL_TILE_256x248", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_256x256", {"HIPBLASLT_MATMUL_TILE_256x256", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x8", {"HIPBLASLT_MATMUL_TILE_320x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x16", {"HIPBLASLT_MATMUL_TILE_320x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x24", {"HIPBLASLT_MATMUL_TILE_320x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x32", {"HIPBLASLT_MATMUL_TILE_320x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x40", {"HIPBLASLT_MATMUL_TILE_320x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x48", {"HIPBLASLT_MATMUL_TILE_320x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x56", {"HIPBLASLT_MATMUL_TILE_320x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x72", {"HIPBLASLT_MATMUL_TILE_320x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x80", {"HIPBLASLT_MATMUL_TILE_320x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x88", {"HIPBLASLT_MATMUL_TILE_320x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x96", {"HIPBLASLT_MATMUL_TILE_320x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x104", {"HIPBLASLT_MATMUL_TILE_320x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x112", {"HIPBLASLT_MATMUL_TILE_320x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x120", {"HIPBLASLT_MATMUL_TILE_320x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x136", {"HIPBLASLT_MATMUL_TILE_320x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x144", {"HIPBLASLT_MATMUL_TILE_320x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x152", {"HIPBLASLT_MATMUL_TILE_320x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x160", {"HIPBLASLT_MATMUL_TILE_320x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x168", {"HIPBLASLT_MATMUL_TILE_320x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x176", {"HIPBLASLT_MATMUL_TILE_320x176", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x184", {"HIPBLASLT_MATMUL_TILE_320x184", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x192", {"HIPBLASLT_MATMUL_TILE_320x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_320x200", {"HIPBLASLT_MATMUL_TILE_320x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x8", {"HIPBLASLT_MATMUL_TILE_384x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1815,6 +1857,48 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_256x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_256x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_256x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x152", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x160", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x168", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x176", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x184", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x200", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x208", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x216", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x224", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x232", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x240", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x248", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_256x256", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x152", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x160", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x168", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x176", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x184", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_320x200", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 23e196bd16b676342c2df6d5e5364b57acd5c25a Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 7 Nov 2024 13:57:04 +0100 Subject: [PATCH 42/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` - Step 13 + `hipblas(S|D|C|Z)trsm(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++------ docs/tables/CUBLAS_API_supported_by_HIP.md | 16 +++++------ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 +++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 20 +++++++------ .../synthetic/libraries/cublas2hipblas_v2.cu | 28 +++++++++++++++++++ 5 files changed, 64 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 8a4afeb6..484d180d 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -4543,7 +4543,9 @@ sub simpleSubstitutions { subst("cublasCtrmv_v2_64", "hipblasCtrmv_v2_64", "library"); subst("cublasCtrsm", "hipblasCtrsm_v2", "library"); subst("cublasCtrsmBatched", "hipblasCtrsmBatched_v2", "library"); + subst("cublasCtrsm_64", "hipblasCtrsm_v2_64", "library"); subst("cublasCtrsm_v2", "hipblasCtrsm_v2", "library"); + subst("cublasCtrsm_v2_64", "hipblasCtrsm_v2_64", "library"); subst("cublasCtrsv", "hipblasCtrsv_v2", "library"); subst("cublasCtrsv_64", "hipblasCtrsv_v2_64", "library"); subst("cublasCtrsv_v2", "hipblasCtrsv_v2", "library"); @@ -4693,7 +4695,9 @@ sub simpleSubstitutions { subst("cublasDtrmv_v2_64", "hipblasDtrmv_64", "library"); subst("cublasDtrsm", "hipblasDtrsm", "library"); subst("cublasDtrsmBatched", "hipblasDtrsmBatched", "library"); + subst("cublasDtrsm_64", "hipblasDtrsm_64", "library"); subst("cublasDtrsm_v2", "hipblasDtrsm", "library"); + subst("cublasDtrsm_v2_64", "hipblasDtrsm_64", "library"); subst("cublasDtrsv", "hipblasDtrsv", "library"); subst("cublasDtrsv_64", "hipblasDtrsv_64", "library"); subst("cublasDtrsv_v2", "hipblasDtrsv", "library"); @@ -4940,7 +4944,9 @@ sub simpleSubstitutions { subst("cublasStrmv_v2_64", "hipblasStrmv_64", "library"); subst("cublasStrsm", "hipblasStrsm", "library"); subst("cublasStrsmBatched", "hipblasStrsmBatched", "library"); + subst("cublasStrsm_64", "hipblasStrsm_64", "library"); subst("cublasStrsm_v2", "hipblasStrsm", "library"); + subst("cublasStrsm_v2_64", "hipblasStrsm_64", "library"); subst("cublasStrsv", "hipblasStrsv", "library"); subst("cublasStrsv_64", "hipblasStrsv_64", "library"); subst("cublasStrsv_v2", "hipblasStrsv", "library"); @@ -5112,7 +5118,9 @@ sub simpleSubstitutions { subst("cublasZtrmv_v2_64", "hipblasZtrmv_v2_64", "library"); subst("cublasZtrsm", "hipblasZtrsm_v2", "library"); subst("cublasZtrsmBatched", "hipblasZtrsmBatched_v2", "library"); + subst("cublasZtrsm_64", "hipblasZtrsm_v2_64", "library"); subst("cublasZtrsm_v2", "hipblasZtrsm_v2", "library"); + subst("cublasZtrsm_v2_64", "hipblasZtrsm_v2_64", "library"); subst("cublasZtrsv", "hipblasZtrsv_v2", "library"); subst("cublasZtrsv_64", "hipblasZtrsv_v2_64", "library"); subst("cublasZtrsv_v2", "hipblasZtrsv_v2", "library"); @@ -11645,8 +11653,6 @@ sub warnHipOnlyUnsupportedFunctions { "cudnnAdvInferVersionCheck", "cudnnActivationStruct", "cublasZtrttp", - "cublasZtrsm_v2_64", - "cublasZtrsm_64", "cublasZtrsmBatched_64", "cublasZtpttr", "cublasZmatinvBatched", @@ -11666,8 +11672,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsm_v2_64", - "cublasStrsm_64", "cublasStrsmBatched_64", "cublasStpttr", "cublasSmatinvBatched", @@ -11760,8 +11764,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsm_v2_64", - "cublasDtrsm_64", "cublasDtrsmBatched_64", "cublasDtpttr", "cublasDmatinvBatched", @@ -11769,8 +11771,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDgemmGroupedBatched", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsm_v2_64", - "cublasCtrsm_64", "cublasCtrsmBatched_64", "cublasCtpttr", "cublasCsyrkEx_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 0076949a..6c300f3b 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1569,9 +1569,9 @@ |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | | |`cublasCtrmm_v2_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | | -|`cublasCtrsm_64`|12.0| | | | | | | | | | +|`cublasCtrsm_64`|12.0| | | |`hipblasCtrsm_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | | -|`cublasCtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasCtrsm_v2_64`|12.0| | | |`hipblasCtrsm_v2_64`|6.3.0| | | |6.3.0| |`cublasDgemm`| | | | |`hipblasDgemm`|1.8.2| | | | | |`cublasDgemmBatched`| | | | |`hipblasDgemmBatched`|1.8.2| | | | | |`cublasDgemmBatched_64`|12.0| | | |`hipblasDgemmBatched_64`|6.3.0| | | |6.3.0| @@ -1605,9 +1605,9 @@ |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | | |`cublasDtrmm_v2_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | | -|`cublasDtrsm_64`|12.0| | | | | | | | | | +|`cublasDtrsm_64`|12.0| | | |`hipblasDtrsm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | | -|`cublasDtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasDtrsm_v2_64`|12.0| | | |`hipblasDtrsm_64`|6.3.0| | | |6.3.0| |`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | |`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | | | | | | | | @@ -1657,9 +1657,9 @@ |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | | |`cublasStrmm_v2_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | | -|`cublasStrsm_64`|12.0| | | | | | | | | | +|`cublasStrsm_64`|12.0| | | |`hipblasStrsm_64`|6.3.0| | | |6.3.0| |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | | -|`cublasStrsm_v2_64`|12.0| | | | | | | | | | +|`cublasStrsm_v2_64`|12.0| | | |`hipblasStrsm_64`|6.3.0| | | |6.3.0| |`cublasTSSgemvBatched`|11.6| | | | | | | | | | |`cublasTSSgemvBatched_64`|12.0| | | | | | | | | | |`cublasTSSgemvStridedBatched`|11.6| | | | | | | | | | @@ -1715,9 +1715,9 @@ |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | | |`cublasZtrmm_v2_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | | -|`cublasZtrsm_64`|12.0| | | | | | | | | | +|`cublasZtrsm_64`|12.0| | | |`hipblasZtrsm_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | | -|`cublasZtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasZtrsm_v2_64`|12.0| | | |`hipblasZtrsm_v2_64`|6.3.0| | | |6.3.0| ## **8. BLAS-like Extension** diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 09ee7a09..89d3f9b9 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1569,9 +1569,9 @@ |`cublasCtrmm_v2`| | | | |`hipblasCtrmm_v2`|6.0.0| | | | |`rocblas_ctrmm`|3.5.0| |6.0.0| | | |`cublasCtrmm_v2_64`|12.0| | | |`hipblasCtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrmm_64`|6.3.0| | | |6.3.0| |`cublasCtrsm`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | +|`cublasCtrsm_64`|12.0| | | |`hipblasCtrsm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasCtrsm_v2`| | | | |`hipblasCtrsm_v2`|6.0.0| | | | |`rocblas_ctrsm`|3.5.0| | | | | -|`cublasCtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ctrsm_64`|6.2.0| | | | | +|`cublasCtrsm_v2_64`|12.0| | | |`hipblasCtrsm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrsm_64`|6.2.0| | | | | |`cublasDgemm`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemmBatched`| | | | |`hipblasDgemmBatched`|1.8.2| | | | |`rocblas_dgemm_batched`|3.5.0| | | | | |`cublasDgemmBatched_64`|12.0| | | |`hipblasDgemmBatched_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_batched_64`|6.3.0| | | |6.3.0| @@ -1605,9 +1605,9 @@ |`cublasDtrmm_v2`| | | | |`hipblasDtrmm`|3.2.0| |6.0.0| | |`rocblas_dtrmm`|3.5.0| |6.0.0| | | |`cublasDtrmm_v2_64`|12.0| | | |`hipblasDtrmm_64`|6.3.0| | | |6.3.0|`rocblas_dtrmm_64`|6.3.0| | | |6.3.0| |`cublasDtrsm`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | +|`cublasDtrsm_64`|12.0| | | |`hipblasDtrsm_64`|6.3.0| | | |6.3.0|`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | -|`cublasDtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_dtrsm_64`|6.2.0| | | | | +|`cublasDtrsm_v2_64`|12.0| | | |`hipblasDtrsm_64`|6.3.0| | | |6.3.0|`rocblas_dtrsm_64`|6.2.0| | | | | |`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | | | | | | | |`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_batched`|6.0.0| | | | | @@ -1657,9 +1657,9 @@ |`cublasStrmm_v2`| | | | |`hipblasStrmm`|3.2.0| |6.0.0| | |`rocblas_strmm`|3.5.0| |6.0.0| | | |`cublasStrmm_v2_64`|12.0| | | |`hipblasStrmm_64`|6.3.0| | | |6.3.0|`rocblas_strmm_64`|6.3.0| | | |6.3.0| |`cublasStrsm`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | +|`cublasStrsm_64`|12.0| | | |`hipblasStrsm_64`|6.3.0| | | |6.3.0|`rocblas_strsm_64`|6.2.0| | | | | |`cublasStrsm_v2`| | | | |`hipblasStrsm`|1.8.2| | | | |`rocblas_strsm`|1.5.0| | | | | -|`cublasStrsm_v2_64`|12.0| | | | | | | | | |`rocblas_strsm_64`|6.2.0| | | | | +|`cublasStrsm_v2_64`|12.0| | | |`hipblasStrsm_64`|6.3.0| | | |6.3.0|`rocblas_strsm_64`|6.2.0| | | | | |`cublasTSSgemvBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | | | | | | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | @@ -1715,9 +1715,9 @@ |`cublasZtrmm_v2`| | | | |`hipblasZtrmm_v2`|6.0.0| | | | |`rocblas_ztrmm`|3.5.0| |6.0.0| | | |`cublasZtrmm_v2_64`|12.0| | | |`hipblasZtrmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrmm_64`|6.3.0| | | |6.3.0| |`cublasZtrsm`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | +|`cublasZtrsm_64`|12.0| | | |`hipblasZtrsm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrsm_64`|6.2.0| | | | | |`cublasZtrsm_v2`| | | | |`hipblasZtrsm_v2`|6.0.0| | | | |`rocblas_ztrsm`|3.5.0| | | | | -|`cublasZtrsm_v2_64`|12.0| | | | | | | | | |`rocblas_ztrsm_64`|6.2.0| | | | | +|`cublasZtrsm_v2_64`|12.0| | | |`hipblasZtrsm_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrsm_64`|6.2.0| | | | | ## **8. BLAS-like Extension** diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 436db2a6..59470f93 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -541,13 +541,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrsm_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasStrsm_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrsm_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasDtrsm_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCtrsm", {"hipblasCtrsm_v2", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrsm_64", {"hipblasCtrsm_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasCtrsm_64", {"hipblasCtrsm_v2_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZtrsm", {"hipblasZtrsm_v2", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrsm_64", {"hipblasZtrsm_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasZtrsm_64", {"hipblasZtrsm_v2_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // TRMM {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -908,13 +908,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM {"cublasStrsm_v2", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasStrsm_v2_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasStrsm_v2_64", {"hipblasStrsm_64", "rocblas_strsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDtrsm_v2", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDtrsm_v2_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasDtrsm_v2_64", {"hipblasDtrsm_64", "rocblas_dtrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCtrsm_v2", {"hipblasCtrsm_v2", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCtrsm_v2_64", {"hipblasCtrsm_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasCtrsm_v2_64", {"hipblasCtrsm_v2_64", "rocblas_ctrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZtrsm_v2", {"hipblasZtrsm_v2", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZtrsm_v2_64", {"hipblasZtrsm_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, + {"cublasZtrsm_v2_64", {"hipblasZtrsm_v2_64", "rocblas_ztrsm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // TRMM {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -2070,6 +2070,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCtrmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZtrmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStrsm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtrsm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrsm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrsm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 89919766..1f92cd9a 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3112,6 +3112,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, float* B, int64_t ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, float* BP, int64_t ldb); + // CHECK: blasStatus = hipblasStrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + // CHECK-NEXT: blasStatus = hipblasStrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + blasStatus = cublasStrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + blasStatus = cublasStrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, &fA, lda_64, &fB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag,int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, double* B, int64_t ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsm_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, double* BP, int64_t ldb); + // CHECK: blasStatus = hipblasDtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + // CHECK-NEXT: blasStatus = hipblasDtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + blasStatus = cublasDtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + blasStatus = cublasDtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, &dA, lda_64, &dB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, cuComplex* B, int64_t ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, hipComplex* BP, int64_t ldb); + // CHECK: blasStatus = hipblasCtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + // CHECK-NEXT: blasStatus = hipblasCtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + blasStatus = cublasCtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + blasStatus = cublasCtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, &complexA, lda_64, &complexB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* B, int64_t ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, hipDoubleComplex* BP, int64_t ldb); + // CHECK: blasStatus = hipblasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + // CHECK-NEXT: blasStatus = hipblasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + blasStatus = cublasZtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + blasStatus = cublasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); #endif return 0; From 39f1bf4e920234c49a5b364db61438a2cb140351 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 7 Nov 2024 14:26:05 +0100 Subject: [PATCH 43/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 14 - BlasLt API + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 100 ++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 50 +++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 50 +++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 50 +++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 100 ++++++++++++++++++ 5 files changed, 350 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 484d180d..832c12d0 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12760,7 +12760,22 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_536x64", "CUBLASLT_MATMUL_TILE_528x64", "CUBLASLT_MATMUL_TILE_520x64", + "CUBLASLT_MATMUL_TILE_512x96", + "CUBLASLT_MATMUL_TILE_512x88", + "CUBLASLT_MATMUL_TILE_512x80", + "CUBLASLT_MATMUL_TILE_512x8", + "CUBLASLT_MATMUL_TILE_512x72", "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_512x56", + "CUBLASLT_MATMUL_TILE_512x48", + "CUBLASLT_MATMUL_TILE_512x40", + "CUBLASLT_MATMUL_TILE_512x32", + "CUBLASLT_MATMUL_TILE_512x24", + "CUBLASLT_MATMUL_TILE_512x16", + "CUBLASLT_MATMUL_TILE_512x128", + "CUBLASLT_MATMUL_TILE_512x120", + "CUBLASLT_MATMUL_TILE_512x112", + "CUBLASLT_MATMUL_TILE_512x104", "CUBLASLT_MATMUL_TILE_504x64", "CUBLASLT_MATMUL_TILE_496x64", "CUBLASLT_MATMUL_TILE_48x768", @@ -12780,7 +12795,24 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_472x64", "CUBLASLT_MATMUL_TILE_464x64", "CUBLASLT_MATMUL_TILE_456x64", + "CUBLASLT_MATMUL_TILE_448x96", + "CUBLASLT_MATMUL_TILE_448x88", + "CUBLASLT_MATMUL_TILE_448x80", + "CUBLASLT_MATMUL_TILE_448x8", + "CUBLASLT_MATMUL_TILE_448x72", "CUBLASLT_MATMUL_TILE_448x64", + "CUBLASLT_MATMUL_TILE_448x56", + "CUBLASLT_MATMUL_TILE_448x48", + "CUBLASLT_MATMUL_TILE_448x40", + "CUBLASLT_MATMUL_TILE_448x32", + "CUBLASLT_MATMUL_TILE_448x24", + "CUBLASLT_MATMUL_TILE_448x16", + "CUBLASLT_MATMUL_TILE_448x144", + "CUBLASLT_MATMUL_TILE_448x136", + "CUBLASLT_MATMUL_TILE_448x128", + "CUBLASLT_MATMUL_TILE_448x120", + "CUBLASLT_MATMUL_TILE_448x112", + "CUBLASLT_MATMUL_TILE_448x104", "CUBLASLT_MATMUL_TILE_440x64", "CUBLASLT_MATMUL_TILE_432x64", "CUBLASLT_MATMUL_TILE_424x64", @@ -12800,9 +12832,27 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_408x64", "CUBLASLT_MATMUL_TILE_400x64", "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x96", + "CUBLASLT_MATMUL_TILE_384x88", + "CUBLASLT_MATMUL_TILE_384x80", "CUBLASLT_MATMUL_TILE_384x8", + "CUBLASLT_MATMUL_TILE_384x72", "CUBLASLT_MATMUL_TILE_384x64", + "CUBLASLT_MATMUL_TILE_384x56", + "CUBLASLT_MATMUL_TILE_384x48", + "CUBLASLT_MATMUL_TILE_384x40", + "CUBLASLT_MATMUL_TILE_384x32", + "CUBLASLT_MATMUL_TILE_384x24", + "CUBLASLT_MATMUL_TILE_384x168", + "CUBLASLT_MATMUL_TILE_384x160", + "CUBLASLT_MATMUL_TILE_384x16", + "CUBLASLT_MATMUL_TILE_384x152", + "CUBLASLT_MATMUL_TILE_384x144", + "CUBLASLT_MATMUL_TILE_384x136", "CUBLASLT_MATMUL_TILE_384x128", + "CUBLASLT_MATMUL_TILE_384x120", + "CUBLASLT_MATMUL_TILE_384x112", + "CUBLASLT_MATMUL_TILE_384x104", "CUBLASLT_MATMUL_TILE_376x64", "CUBLASLT_MATMUL_TILE_376x128", "CUBLASLT_MATMUL_TILE_368x64", @@ -14657,7 +14707,22 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_536x64", "CUBLASLT_MATMUL_TILE_528x64", "CUBLASLT_MATMUL_TILE_520x64", + "CUBLASLT_MATMUL_TILE_512x96", + "CUBLASLT_MATMUL_TILE_512x88", + "CUBLASLT_MATMUL_TILE_512x80", + "CUBLASLT_MATMUL_TILE_512x8", + "CUBLASLT_MATMUL_TILE_512x72", "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_512x56", + "CUBLASLT_MATMUL_TILE_512x48", + "CUBLASLT_MATMUL_TILE_512x40", + "CUBLASLT_MATMUL_TILE_512x32", + "CUBLASLT_MATMUL_TILE_512x24", + "CUBLASLT_MATMUL_TILE_512x16", + "CUBLASLT_MATMUL_TILE_512x128", + "CUBLASLT_MATMUL_TILE_512x120", + "CUBLASLT_MATMUL_TILE_512x112", + "CUBLASLT_MATMUL_TILE_512x104", "CUBLASLT_MATMUL_TILE_504x64", "CUBLASLT_MATMUL_TILE_496x64", "CUBLASLT_MATMUL_TILE_48x768", @@ -14677,7 +14742,24 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_472x64", "CUBLASLT_MATMUL_TILE_464x64", "CUBLASLT_MATMUL_TILE_456x64", + "CUBLASLT_MATMUL_TILE_448x96", + "CUBLASLT_MATMUL_TILE_448x88", + "CUBLASLT_MATMUL_TILE_448x80", + "CUBLASLT_MATMUL_TILE_448x8", + "CUBLASLT_MATMUL_TILE_448x72", "CUBLASLT_MATMUL_TILE_448x64", + "CUBLASLT_MATMUL_TILE_448x56", + "CUBLASLT_MATMUL_TILE_448x48", + "CUBLASLT_MATMUL_TILE_448x40", + "CUBLASLT_MATMUL_TILE_448x32", + "CUBLASLT_MATMUL_TILE_448x24", + "CUBLASLT_MATMUL_TILE_448x16", + "CUBLASLT_MATMUL_TILE_448x144", + "CUBLASLT_MATMUL_TILE_448x136", + "CUBLASLT_MATMUL_TILE_448x128", + "CUBLASLT_MATMUL_TILE_448x120", + "CUBLASLT_MATMUL_TILE_448x112", + "CUBLASLT_MATMUL_TILE_448x104", "CUBLASLT_MATMUL_TILE_440x64", "CUBLASLT_MATMUL_TILE_432x64", "CUBLASLT_MATMUL_TILE_424x64", @@ -14697,9 +14779,27 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_408x64", "CUBLASLT_MATMUL_TILE_400x64", "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x96", + "CUBLASLT_MATMUL_TILE_384x88", + "CUBLASLT_MATMUL_TILE_384x80", "CUBLASLT_MATMUL_TILE_384x8", + "CUBLASLT_MATMUL_TILE_384x72", "CUBLASLT_MATMUL_TILE_384x64", + "CUBLASLT_MATMUL_TILE_384x56", + "CUBLASLT_MATMUL_TILE_384x48", + "CUBLASLT_MATMUL_TILE_384x40", + "CUBLASLT_MATMUL_TILE_384x32", + "CUBLASLT_MATMUL_TILE_384x24", + "CUBLASLT_MATMUL_TILE_384x168", + "CUBLASLT_MATMUL_TILE_384x160", + "CUBLASLT_MATMUL_TILE_384x16", + "CUBLASLT_MATMUL_TILE_384x152", + "CUBLASLT_MATMUL_TILE_384x144", + "CUBLASLT_MATMUL_TILE_384x136", "CUBLASLT_MATMUL_TILE_384x128", + "CUBLASLT_MATMUL_TILE_384x120", + "CUBLASLT_MATMUL_TILE_384x112", + "CUBLASLT_MATMUL_TILE_384x104", "CUBLASLT_MATMUL_TILE_376x64", "CUBLASLT_MATMUL_TILE_376x128", "CUBLASLT_MATMUL_TILE_368x64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 6c300f3b..9b2ba4a2 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -626,9 +626,27 @@ |`CUBLASLT_MATMUL_TILE_368x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_376x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x72`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | @@ -648,7 +666,24 @@ |`CUBLASLT_MATMUL_TILE_424x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_432x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_440x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x120`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_448x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_456x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_464x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_472x64`|12.6| | | | | | | | | | @@ -668,7 +703,22 @@ |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_496x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_504x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x120`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_520x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_528x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_536x64`|12.6| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 89d3f9b9..466b842a 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -626,9 +626,27 @@ |`CUBLASLT_MATMUL_TILE_368x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_376x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x120`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x152`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x160`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x168`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x72`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | | | | | | | @@ -648,7 +666,24 @@ |`CUBLASLT_MATMUL_TILE_424x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_432x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_440x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x120`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x136`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x144`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_448x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_456x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_464x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_472x64`|12.6| | | | | | | | | | | | | | | | @@ -668,7 +703,22 @@ |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_496x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_504x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x120`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x128`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_520x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_528x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_536x64`|12.6| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 182d4f23..e534ed47 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -626,9 +626,27 @@ |`CUBLASLT_MATMUL_TILE_368x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_376x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_376x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x120`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x152`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x160`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x168`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x72`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_384x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_384x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_392x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_400x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_408x64`|12.6| | | | | | | | | | @@ -648,7 +666,24 @@ |`CUBLASLT_MATMUL_TILE_424x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_432x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_440x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x120`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x136`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x144`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_448x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_448x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_456x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_464x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_472x64`|12.6| | | | | | | | | | @@ -668,7 +703,22 @@ |`CUBLASLT_MATMUL_TILE_48x768`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_496x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_504x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x120`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x128`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_512x64`|10.1| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_512x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_520x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_528x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_536x64`|12.6| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 7ef3c214..eecb8099 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -741,6 +741,56 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_320x192", {"HIPBLASLT_MATMUL_TILE_320x192", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_320x200", {"HIPBLASLT_MATMUL_TILE_320x200", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_384x8", {"HIPBLASLT_MATMUL_TILE_384x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x16", {"HIPBLASLT_MATMUL_TILE_384x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x24", {"HIPBLASLT_MATMUL_TILE_384x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x32", {"HIPBLASLT_MATMUL_TILE_384x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x40", {"HIPBLASLT_MATMUL_TILE_384x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x48", {"HIPBLASLT_MATMUL_TILE_384x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x56", {"HIPBLASLT_MATMUL_TILE_384x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x72", {"HIPBLASLT_MATMUL_TILE_384x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x80", {"HIPBLASLT_MATMUL_TILE_384x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x88", {"HIPBLASLT_MATMUL_TILE_384x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x96", {"HIPBLASLT_MATMUL_TILE_384x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x104", {"HIPBLASLT_MATMUL_TILE_384x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x112", {"HIPBLASLT_MATMUL_TILE_384x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x120", {"HIPBLASLT_MATMUL_TILE_384x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x136", {"HIPBLASLT_MATMUL_TILE_384x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x144", {"HIPBLASLT_MATMUL_TILE_384x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x152", {"HIPBLASLT_MATMUL_TILE_384x152", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x160", {"HIPBLASLT_MATMUL_TILE_384x160", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_384x168", {"HIPBLASLT_MATMUL_TILE_384x168", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x8", {"HIPBLASLT_MATMUL_TILE_448x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x16", {"HIPBLASLT_MATMUL_TILE_448x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x24", {"HIPBLASLT_MATMUL_TILE_448x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x32", {"HIPBLASLT_MATMUL_TILE_448x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x40", {"HIPBLASLT_MATMUL_TILE_448x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x48", {"HIPBLASLT_MATMUL_TILE_448x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x56", {"HIPBLASLT_MATMUL_TILE_448x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x72", {"HIPBLASLT_MATMUL_TILE_448x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x80", {"HIPBLASLT_MATMUL_TILE_448x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x88", {"HIPBLASLT_MATMUL_TILE_448x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x96", {"HIPBLASLT_MATMUL_TILE_448x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x104", {"HIPBLASLT_MATMUL_TILE_448x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x112", {"HIPBLASLT_MATMUL_TILE_448x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x120", {"HIPBLASLT_MATMUL_TILE_448x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x128", {"HIPBLASLT_MATMUL_TILE_448x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x136", {"HIPBLASLT_MATMUL_TILE_448x136", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_448x144", {"HIPBLASLT_MATMUL_TILE_448x144", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x8", {"HIPBLASLT_MATMUL_TILE_512x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x16", {"HIPBLASLT_MATMUL_TILE_512x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x24", {"HIPBLASLT_MATMUL_TILE_512x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x32", {"HIPBLASLT_MATMUL_TILE_512x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x40", {"HIPBLASLT_MATMUL_TILE_512x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x48", {"HIPBLASLT_MATMUL_TILE_512x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x56", {"HIPBLASLT_MATMUL_TILE_512x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x72", {"HIPBLASLT_MATMUL_TILE_512x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x80", {"HIPBLASLT_MATMUL_TILE_512x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x88", {"HIPBLASLT_MATMUL_TILE_512x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x96", {"HIPBLASLT_MATMUL_TILE_512x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x104", {"HIPBLASLT_MATMUL_TILE_512x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x112", {"HIPBLASLT_MATMUL_TILE_512x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x120", {"HIPBLASLT_MATMUL_TILE_512x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_512x128", {"HIPBLASLT_MATMUL_TILE_512x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1899,6 +1949,56 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_320x192", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_320x200", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_384x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x152", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x160", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_384x168", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x136", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_448x144", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_512x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From af190dbba34eae252f133b27ff9c41e2b6fe45c0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 8 Nov 2024 15:05:24 +0100 Subject: [PATCH 44/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` - Step 14 + `hipblas(S|D|C|Z)trsm_batched(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 8 ++++---- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 ++++---- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 ++++---- src/CUDA2HIP_BLAS_API_functions.cpp | 12 +++++++---- .../synthetic/libraries/cublas2hipblas_v2.cu | 20 +++++++++++++++++++ 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 832c12d0..f0860edb 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -4543,6 +4543,7 @@ sub simpleSubstitutions { subst("cublasCtrmv_v2_64", "hipblasCtrmv_v2_64", "library"); subst("cublasCtrsm", "hipblasCtrsm_v2", "library"); subst("cublasCtrsmBatched", "hipblasCtrsmBatched_v2", "library"); + subst("cublasCtrsmBatched_64", "hipblasCtrsmBatched_v2_64", "library"); subst("cublasCtrsm_64", "hipblasCtrsm_v2_64", "library"); subst("cublasCtrsm_v2", "hipblasCtrsm_v2", "library"); subst("cublasCtrsm_v2_64", "hipblasCtrsm_v2_64", "library"); @@ -4695,6 +4696,7 @@ sub simpleSubstitutions { subst("cublasDtrmv_v2_64", "hipblasDtrmv_64", "library"); subst("cublasDtrsm", "hipblasDtrsm", "library"); subst("cublasDtrsmBatched", "hipblasDtrsmBatched", "library"); + subst("cublasDtrsmBatched_64", "hipblasDtrsmBatched_64", "library"); subst("cublasDtrsm_64", "hipblasDtrsm_64", "library"); subst("cublasDtrsm_v2", "hipblasDtrsm", "library"); subst("cublasDtrsm_v2_64", "hipblasDtrsm_64", "library"); @@ -4944,6 +4946,7 @@ sub simpleSubstitutions { subst("cublasStrmv_v2_64", "hipblasStrmv_64", "library"); subst("cublasStrsm", "hipblasStrsm", "library"); subst("cublasStrsmBatched", "hipblasStrsmBatched", "library"); + subst("cublasStrsmBatched_64", "hipblasStrsmBatched_64", "library"); subst("cublasStrsm_64", "hipblasStrsm_64", "library"); subst("cublasStrsm_v2", "hipblasStrsm", "library"); subst("cublasStrsm_v2_64", "hipblasStrsm_64", "library"); @@ -5118,6 +5121,7 @@ sub simpleSubstitutions { subst("cublasZtrmv_v2_64", "hipblasZtrmv_v2_64", "library"); subst("cublasZtrsm", "hipblasZtrsm_v2", "library"); subst("cublasZtrsmBatched", "hipblasZtrsmBatched_v2", "library"); + subst("cublasZtrsmBatched_64", "hipblasZtrsmBatched_v2_64", "library"); subst("cublasZtrsm_64", "hipblasZtrsm_v2_64", "library"); subst("cublasZtrsm_v2", "hipblasZtrsm_v2", "library"); subst("cublasZtrsm_v2_64", "hipblasZtrsm_v2_64", "library"); @@ -11653,7 +11657,6 @@ sub warnHipOnlyUnsupportedFunctions { "cudnnAdvInferVersionCheck", "cudnnActivationStruct", "cublasZtrttp", - "cublasZtrsmBatched_64", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgemm3m_64", @@ -11672,7 +11675,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsmBatched_64", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -11764,14 +11766,12 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsmBatched_64", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsmBatched_64", "cublasCtpttr", "cublasCsyrkEx_64", "cublasCsyrkEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 9b2ba4a2..f4bb304b 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1801,7 +1801,7 @@ |`cublasCsyrkEx_64`|12.0| | | | | | | | | | |`cublasCtpttr`| | | | | | | | | | | |`cublasCtrsmBatched`| | | | |`hipblasCtrsmBatched_v2`|6.0.0| | | | | -|`cublasCtrsmBatched_64`|12.0| | | | | | | | | | +|`cublasCtrsmBatched_64`|12.0| | | |`hipblasCtrsmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrttp`| | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | @@ -1819,7 +1819,7 @@ |`cublasDotcEx_64`|12.0| | | |`hipblasDotcEx_v2_64`|6.2.0| | | | | |`cublasDtpttr`| | | | | | | | | | | |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | | -|`cublasDtrsmBatched_64`|12.0| | | | | | | | | | +|`cublasDtrsmBatched_64`|12.0| | | |`hipblasDtrsmBatched_64`|6.3.0| | | |6.3.0| |`cublasDtrttp`| | | | | | | | | | | |`cublasGemmBatchedEx`|9.1| | | |`hipblasGemmBatchedEx_v2`|6.0.0| | | | | |`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | @@ -1853,7 +1853,7 @@ |`cublasSmatinvBatched`| | | | | | | | | | | |`cublasStpttr`| | | | | | | | | | | |`cublasStrsmBatched`| | | | |`hipblasStrsmBatched`|3.2.0| | | | | -|`cublasStrsmBatched_64`|12.0| | | | | | | | | | +|`cublasStrsmBatched_64`|12.0| | | |`hipblasStrsmBatched_64`|6.3.0| | | |6.3.0| |`cublasStrttp`| | | | | | | | | | | |`cublasSwapEx`|10.1| | | | | | | | | | |`cublasSwapEx_64`|12.0| | | | | | | | | | @@ -1870,7 +1870,7 @@ |`cublasZmatinvBatched`| | | | | | | | | | | |`cublasZtpttr`| | | | | | | | | | | |`cublasZtrsmBatched`| | | | |`hipblasZtrsmBatched_v2`|6.0.0| | | | | -|`cublasZtrsmBatched_64`|12.0| | | | | | | | | | +|`cublasZtrsmBatched_64`|12.0| | | |`hipblasZtrsmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasZtrttp`| | | | | | | | | | | ## **9. BLASLt Function Reference** diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 466b842a..1a9b319d 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1801,7 +1801,7 @@ |`cublasCsyrkEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasCtpttr`| | | | | | | | | | | | | | | | | |`cublasCtrsmBatched`| | | | |`hipblasCtrsmBatched_v2`|6.0.0| | | | |`rocblas_ctrsm_batched`|3.5.0| | | | | -|`cublasCtrsmBatched_64`|12.0| | | | | | | | | |`rocblas_ctrsm_batched_64`|6.2.0| | | | | +|`cublasCtrsmBatched_64`|12.0| | | |`hipblasCtrsmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | |`rocblas_ddgmm`|3.5.0| | | | | |`cublasDdgmm_64`|12.0| | | | | | | | | | | | | | | | @@ -1819,7 +1819,7 @@ |`cublasDotcEx_64`|12.0| | | |`hipblasDotcEx_v2_64`|6.2.0| | | | |`rocblas_dotc_ex_64`|6.1.0| | | | | |`cublasDtpttr`| | | | | | | | | | | | | | | | | |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | |`rocblas_dtrsm_batched`|3.5.0| | | | | -|`cublasDtrsmBatched_64`|12.0| | | | | | | | | |`rocblas_dtrsm_batched_64`|6.2.0| | | | | +|`cublasDtrsmBatched_64`|12.0| | | |`hipblasDtrsmBatched_64`|6.3.0| | | |6.3.0|`rocblas_dtrsm_batched_64`|6.2.0| | | | | |`cublasDtrttp`| | | | | | | | | | | | | | | | | |`cublasGemmBatchedEx`|9.1| | | |`hipblasGemmBatchedEx_v2`|6.0.0| | | | |`rocblas_gemm_batched_ex`|3.5.0| | | | | |`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | | | | | | | @@ -1853,7 +1853,7 @@ |`cublasSmatinvBatched`| | | | | | | | | | | | | | | | | |`cublasStpttr`| | | | | | | | | | | | | | | | | |`cublasStrsmBatched`| | | | |`hipblasStrsmBatched`|3.2.0| | | | |`rocblas_strsm_batched`|3.5.0| | | | | -|`cublasStrsmBatched_64`|12.0| | | | | | | | | |`rocblas_strsm_batched_64`|6.2.0| | | | | +|`cublasStrsmBatched_64`|12.0| | | |`hipblasStrsmBatched_64`|6.3.0| | | |6.3.0|`rocblas_strsm_batched_64`|6.2.0| | | | | |`cublasStrttp`| | | | | | | | | | | | | | | | | |`cublasSwapEx`|10.1| | | | | | | | | | | | | | | | |`cublasSwapEx_64`|12.0| | | | | | | | | | | | | | | | @@ -1870,7 +1870,7 @@ |`cublasZmatinvBatched`| | | | | | | | | | | | | | | | | |`cublasZtpttr`| | | | | | | | | | | | | | | | | |`cublasZtrsmBatched`| | | | |`hipblasZtrsmBatched_v2`|6.0.0| | | | |`rocblas_ztrsm_batched`|3.5.0| | | | | -|`cublasZtrsmBatched_64`|12.0| | | | | | | | | |`rocblas_ztrsm_batched_64`|6.2.0| | | | | +|`cublasZtrsmBatched_64`|12.0| | | |`hipblasZtrsmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_ztrsm_batched_64`|6.2.0| | | | | |`cublasZtrttp`| | | | | | | | | | | | | | | | | ## **9. BLASLt Function Reference** diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 59470f93..7ad123f1 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -590,13 +590,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRSM - Batched Triangular Solver {"cublasStrsmBatched", {"hipblasStrsmBatched", "rocblas_strsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasStrsmBatched_64", {"hipblasStrsmBatched_64", "rocblas_strsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasStrsmBatched_64", {"hipblasStrsmBatched_64", "rocblas_strsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasDtrsmBatched", {"hipblasDtrsmBatched", "rocblas_dtrsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDtrsmBatched_64", {"hipblasDtrsmBatched_64", "rocblas_dtrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasDtrsmBatched_64", {"hipblasDtrsmBatched_64", "rocblas_dtrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasCtrsmBatched", {"hipblasCtrsmBatched_v2", "rocblas_ctrsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasCtrsmBatched_64", {"hipblasCtrsmBatched_64", "rocblas_ctrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasCtrsmBatched_64", {"hipblasCtrsmBatched_v2_64", "rocblas_ctrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasZtrsmBatched", {"hipblasZtrsmBatched_v2", "rocblas_ztrsm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasZtrsmBatched_64", {"hipblasZtrsmBatched_64", "rocblas_ztrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasZtrsmBatched_64", {"hipblasZtrsmBatched_v2_64", "rocblas_ztrsm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, // MATINV - Batched {"cublasSmatinvBatched", {"hipblasSmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -2074,6 +2074,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDtrsm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCtrsm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZtrsm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStrsmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtrsmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrsmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrsmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 1f92cd9a..014a8104 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -3140,6 +3140,26 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); blasStatus = cublasZtrsm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); blasStatus = cublasZtrsm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* const A[], int64_t lda, float* const B[], int64_t ldb, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const float* alpha, const float* const AP[], int64_t lda, float* const BP[], int64_t ldb, int64_t batchCount); + // CHECK: blasStatus = hipblasStrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, fAarray_const, lda_64, fBarray, ldb_64, batchCount_64); + blasStatus = cublasStrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &fa, fAarray_const, lda_64, fBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* const A[], int64_t lda, double* const B[], int64_t ldb, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const double* alpha, const double* const AP[], int64_t lda, double* const BP[], int64_t ldb, int64_t batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, dAarray_const, lda_64, dBarray, ldb_64, batchCount_64); + blasStatus = cublasDtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &da, dAarray_const, lda_64, dBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* const A[], int64_t lda, cuComplex* const B[], int64_t ldb, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* const AP[], int64_t lda, hipComplex* const BP[], int64_t ldb, int64_t batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, complexAarray_const, lda_64, complexBarray, ldb_64, batchCount_64); + blasStatus = cublasCtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &complexa, complexAarray_const, lda_64, complexBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched_64(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int64_t lda, cuDoubleComplex* const B[], int64_t ldb, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* const AP[], int64_t lda, hipDoubleComplex* const BP[], int64_t ldb, int64_t batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); + blasStatus = cublasZtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); #endif return 0; From 5b657530ff01bec70756a1711ccc68a072b5a0af Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 8 Nov 2024 15:41:20 +0100 Subject: [PATCH 45/51] [HIPIFY][BLAS] Sync with `CUDA 12.6.2` - Step 15 - BlasLt API - final + Updated the regenerated `hipify-perl` and `BLAS` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 94 +++++++++++++++++++ docs/tables/CUBLAS_API_supported_by_HIP.md | 47 ++++++++++ .../CUBLAS_API_supported_by_HIP_and_ROC.md | 47 ++++++++++ docs/tables/CUBLAS_API_supported_by_ROC.md | 47 ++++++++++ src/CUDA2HIP_BLAS_API_types.cpp | 94 +++++++++++++++++++ 5 files changed, 329 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index f0860edb..07e34948 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -12526,6 +12526,10 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLAS_GEMM_ALGO1", "CUBLAS_GEMM_ALGO0_TENSOR_OP", "CUBLAS_GEMM_ALGO0", + "CUBLASLT_SEARCH_RESERVED_09", + "CUBLASLT_SEARCH_RESERVED_08", + "CUBLASLT_SEARCH_RESERVED_07", + "CUBLASLT_SEARCH_RESERVED_06", "CUBLASLT_SEARCH_RESERVED_05", "CUBLASLT_SEARCH_RESERVED_04", "CUBLASLT_SEARCH_RESERVED_03", @@ -12611,7 +12615,16 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_80x256", "CUBLASLT_MATMUL_TILE_80x192", "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_768x80", + "CUBLASLT_MATMUL_TILE_768x8", + "CUBLASLT_MATMUL_TILE_768x72", "CUBLASLT_MATMUL_TILE_768x64", + "CUBLASLT_MATMUL_TILE_768x56", + "CUBLASLT_MATMUL_TILE_768x48", + "CUBLASLT_MATMUL_TILE_768x40", + "CUBLASLT_MATMUL_TILE_768x32", + "CUBLASLT_MATMUL_TILE_768x24", + "CUBLASLT_MATMUL_TILE_768x16", "CUBLASLT_MATMUL_TILE_760x64", "CUBLASLT_MATMUL_TILE_752x64", "CUBLASLT_MATMUL_TILE_744x64", @@ -12629,7 +12642,17 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_728x64", "CUBLASLT_MATMUL_TILE_720x64", "CUBLASLT_MATMUL_TILE_712x64", + "CUBLASLT_MATMUL_TILE_704x88", + "CUBLASLT_MATMUL_TILE_704x80", + "CUBLASLT_MATMUL_TILE_704x8", + "CUBLASLT_MATMUL_TILE_704x72", "CUBLASLT_MATMUL_TILE_704x64", + "CUBLASLT_MATMUL_TILE_704x56", + "CUBLASLT_MATMUL_TILE_704x48", + "CUBLASLT_MATMUL_TILE_704x40", + "CUBLASLT_MATMUL_TILE_704x32", + "CUBLASLT_MATMUL_TILE_704x24", + "CUBLASLT_MATMUL_TILE_704x16", "CUBLASLT_MATMUL_TILE_696x64", "CUBLASLT_MATMUL_TILE_688x64", "CUBLASLT_MATMUL_TILE_680x64", @@ -12733,7 +12756,18 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x112", "CUBLASLT_MATMUL_TILE_64x104", "CUBLASLT_MATMUL_TILE_648x64", + "CUBLASLT_MATMUL_TILE_640x96", + "CUBLASLT_MATMUL_TILE_640x88", + "CUBLASLT_MATMUL_TILE_640x80", + "CUBLASLT_MATMUL_TILE_640x8", + "CUBLASLT_MATMUL_TILE_640x72", "CUBLASLT_MATMUL_TILE_640x64", + "CUBLASLT_MATMUL_TILE_640x56", + "CUBLASLT_MATMUL_TILE_640x48", + "CUBLASLT_MATMUL_TILE_640x40", + "CUBLASLT_MATMUL_TILE_640x32", + "CUBLASLT_MATMUL_TILE_640x24", + "CUBLASLT_MATMUL_TILE_640x16", "CUBLASLT_MATMUL_TILE_632x64", "CUBLASLT_MATMUL_TILE_624x64", "CUBLASLT_MATMUL_TILE_616x64", @@ -12741,7 +12775,20 @@ sub warnHipOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_600x64", "CUBLASLT_MATMUL_TILE_592x64", "CUBLASLT_MATMUL_TILE_584x64", + "CUBLASLT_MATMUL_TILE_576x96", + "CUBLASLT_MATMUL_TILE_576x88", + "CUBLASLT_MATMUL_TILE_576x80", + "CUBLASLT_MATMUL_TILE_576x8", + "CUBLASLT_MATMUL_TILE_576x72", "CUBLASLT_MATMUL_TILE_576x64", + "CUBLASLT_MATMUL_TILE_576x56", + "CUBLASLT_MATMUL_TILE_576x48", + "CUBLASLT_MATMUL_TILE_576x40", + "CUBLASLT_MATMUL_TILE_576x32", + "CUBLASLT_MATMUL_TILE_576x24", + "CUBLASLT_MATMUL_TILE_576x16", + "CUBLASLT_MATMUL_TILE_576x112", + "CUBLASLT_MATMUL_TILE_576x104", "CUBLASLT_MATMUL_TILE_56x768", "CUBLASLT_MATMUL_TILE_56x704", "CUBLASLT_MATMUL_TILE_56x640", @@ -14458,6 +14505,10 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLAS_COMPUTE_32F_FAST_16BF", "CUBLAS_COMPUTE_16F_PEDANTIC", "CUBLAS_COMPUTE_16F", + "CUBLASLT_SEARCH_RESERVED_09", + "CUBLASLT_SEARCH_RESERVED_08", + "CUBLASLT_SEARCH_RESERVED_07", + "CUBLASLT_SEARCH_RESERVED_06", "CUBLASLT_SEARCH_RESERVED_05", "CUBLASLT_SEARCH_RESERVED_04", "CUBLASLT_SEARCH_RESERVED_03", @@ -14558,7 +14609,16 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_80x256", "CUBLASLT_MATMUL_TILE_80x192", "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_768x80", + "CUBLASLT_MATMUL_TILE_768x8", + "CUBLASLT_MATMUL_TILE_768x72", "CUBLASLT_MATMUL_TILE_768x64", + "CUBLASLT_MATMUL_TILE_768x56", + "CUBLASLT_MATMUL_TILE_768x48", + "CUBLASLT_MATMUL_TILE_768x40", + "CUBLASLT_MATMUL_TILE_768x32", + "CUBLASLT_MATMUL_TILE_768x24", + "CUBLASLT_MATMUL_TILE_768x16", "CUBLASLT_MATMUL_TILE_760x64", "CUBLASLT_MATMUL_TILE_752x64", "CUBLASLT_MATMUL_TILE_744x64", @@ -14576,7 +14636,17 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_728x64", "CUBLASLT_MATMUL_TILE_720x64", "CUBLASLT_MATMUL_TILE_712x64", + "CUBLASLT_MATMUL_TILE_704x88", + "CUBLASLT_MATMUL_TILE_704x80", + "CUBLASLT_MATMUL_TILE_704x8", + "CUBLASLT_MATMUL_TILE_704x72", "CUBLASLT_MATMUL_TILE_704x64", + "CUBLASLT_MATMUL_TILE_704x56", + "CUBLASLT_MATMUL_TILE_704x48", + "CUBLASLT_MATMUL_TILE_704x40", + "CUBLASLT_MATMUL_TILE_704x32", + "CUBLASLT_MATMUL_TILE_704x24", + "CUBLASLT_MATMUL_TILE_704x16", "CUBLASLT_MATMUL_TILE_696x64", "CUBLASLT_MATMUL_TILE_688x64", "CUBLASLT_MATMUL_TILE_680x64", @@ -14680,7 +14750,18 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_64x112", "CUBLASLT_MATMUL_TILE_64x104", "CUBLASLT_MATMUL_TILE_648x64", + "CUBLASLT_MATMUL_TILE_640x96", + "CUBLASLT_MATMUL_TILE_640x88", + "CUBLASLT_MATMUL_TILE_640x80", + "CUBLASLT_MATMUL_TILE_640x8", + "CUBLASLT_MATMUL_TILE_640x72", "CUBLASLT_MATMUL_TILE_640x64", + "CUBLASLT_MATMUL_TILE_640x56", + "CUBLASLT_MATMUL_TILE_640x48", + "CUBLASLT_MATMUL_TILE_640x40", + "CUBLASLT_MATMUL_TILE_640x32", + "CUBLASLT_MATMUL_TILE_640x24", + "CUBLASLT_MATMUL_TILE_640x16", "CUBLASLT_MATMUL_TILE_632x64", "CUBLASLT_MATMUL_TILE_624x64", "CUBLASLT_MATMUL_TILE_616x64", @@ -14688,7 +14769,20 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLASLT_MATMUL_TILE_600x64", "CUBLASLT_MATMUL_TILE_592x64", "CUBLASLT_MATMUL_TILE_584x64", + "CUBLASLT_MATMUL_TILE_576x96", + "CUBLASLT_MATMUL_TILE_576x88", + "CUBLASLT_MATMUL_TILE_576x80", + "CUBLASLT_MATMUL_TILE_576x8", + "CUBLASLT_MATMUL_TILE_576x72", "CUBLASLT_MATMUL_TILE_576x64", + "CUBLASLT_MATMUL_TILE_576x56", + "CUBLASLT_MATMUL_TILE_576x48", + "CUBLASLT_MATMUL_TILE_576x40", + "CUBLASLT_MATMUL_TILE_576x32", + "CUBLASLT_MATMUL_TILE_576x24", + "CUBLASLT_MATMUL_TILE_576x16", + "CUBLASLT_MATMUL_TILE_576x112", + "CUBLASLT_MATMUL_TILE_576x104", "CUBLASLT_MATMUL_TILE_56x768", "CUBLASLT_MATMUL_TILE_56x704", "CUBLASLT_MATMUL_TILE_56x640", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index f4bb304b..209b342c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -737,7 +737,20 @@ |`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_576x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_584x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_592x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_600x64`|12.6| | | | | | | | | | @@ -745,7 +758,18 @@ |`CUBLASLT_MATMUL_TILE_616x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_624x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x104`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x112`|12.6| | | | | | | | | | @@ -849,7 +873,17 @@ |`CUBLASLT_MATMUL_TILE_680x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_688x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_696x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_704x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x88`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_712x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_720x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_728x64`|12.6| | | | | | | | | | @@ -867,7 +901,16 @@ |`CUBLASLT_MATMUL_TILE_744x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_752x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_760x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_768x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x80`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | @@ -969,6 +1012,10 @@ |`CUBLASLT_SEARCH_RESERVED_03`|11.0| | | | | | | | | | |`CUBLASLT_SEARCH_RESERVED_04`|11.0| | | | | | | | | | |`CUBLASLT_SEARCH_RESERVED_05`|11.0| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_06`|12.6| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_07`|12.6| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_08`|12.6| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_09`|12.6| | | | | | | | | | |`cublasLtClusterShape_t`|11.8| | | | | | | | | | |`cublasLtContext`|10.1| | | | | | | | | | |`cublasLtEpilogue_t`|10.1| | | |`hipblasLtEpilogue_t`|5.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 1a9b319d..3e1caf76 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -737,7 +737,20 @@ |`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x104`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x112`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_576x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_584x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_592x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_600x64`|12.6| | | | | | | | | | | | | | | | @@ -745,7 +758,18 @@ |`CUBLASLT_MATMUL_TILE_616x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_624x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x88`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x96`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x104`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x112`|12.6| | | | | | | | | | | | | | | | @@ -849,7 +873,17 @@ |`CUBLASLT_MATMUL_TILE_680x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_688x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_696x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_704x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x80`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x88`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_712x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_720x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_728x64`|12.6| | | | | | | | | | | | | | | | @@ -867,7 +901,16 @@ |`CUBLASLT_MATMUL_TILE_744x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_752x64`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_760x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x16`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x24`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x32`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x40`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x48`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x56`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_768x64`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x72`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x8`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x80`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | | | | | | | @@ -969,6 +1012,10 @@ |`CUBLASLT_SEARCH_RESERVED_03`|11.0| | | | | | | | | | | | | | | | |`CUBLASLT_SEARCH_RESERVED_04`|11.0| | | | | | | | | | | | | | | | |`CUBLASLT_SEARCH_RESERVED_05`|11.0| | | | | | | | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_06`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_07`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_08`|12.6| | | | | | | | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_09`|12.6| | | | | | | | | | | | | | | | |`cublasLtClusterShape_t`|11.8| | | | | | | | | | | | | | | | |`cublasLtContext`|10.1| | | | | | | | | | | | | | | | |`cublasLtEpilogue_t`|10.1| | | |`hipblasLtEpilogue_t`|5.5.0| | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index e534ed47..fdcbc8e2 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -737,7 +737,20 @@ |`CUBLASLT_MATMUL_TILE_56x640`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x704`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_56x768`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x104`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x112`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_576x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_576x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_584x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_592x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_600x64`|12.6| | | | | | | | | | @@ -745,7 +758,18 @@ |`CUBLASLT_MATMUL_TILE_616x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_624x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_632x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_640x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x88`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_640x96`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_648x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x104`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_64x112`|12.6| | | | | | | | | | @@ -849,7 +873,17 @@ |`CUBLASLT_MATMUL_TILE_680x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_688x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_696x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_704x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x80`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_704x88`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_712x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_720x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_728x64`|12.6| | | | | | | | | | @@ -867,7 +901,16 @@ |`CUBLASLT_MATMUL_TILE_744x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_752x64`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_760x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x16`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x24`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x32`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x40`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x48`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x56`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_768x64`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x72`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x8`|12.6| | | | | | | | | | +|`CUBLASLT_MATMUL_TILE_768x80`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x128`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x192`|12.6| | | | | | | | | | |`CUBLASLT_MATMUL_TILE_80x256`|12.6| | | | | | | | | | @@ -969,6 +1012,10 @@ |`CUBLASLT_SEARCH_RESERVED_03`|11.0| | | | | | | | | | |`CUBLASLT_SEARCH_RESERVED_04`|11.0| | | | | | | | | | |`CUBLASLT_SEARCH_RESERVED_05`|11.0| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_06`|12.6| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_07`|12.6| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_08`|12.6| | | | | | | | | | +|`CUBLASLT_SEARCH_RESERVED_09`|12.6| | | | | | | | | | |`cublasLtClusterShape_t`|11.8| | | | | | | | | | |`cublasLtContext`|10.1| | | | | | | | | | |`cublasLtEpilogue_t`|10.1| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index eecb8099..6ec55479 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -791,6 +791,49 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_MATMUL_TILE_512x112", {"HIPBLASLT_MATMUL_TILE_512x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_512x120", {"HIPBLASLT_MATMUL_TILE_512x120", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_512x128", {"HIPBLASLT_MATMUL_TILE_512x128", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x8", {"HIPBLASLT_MATMUL_TILE_576x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x16", {"HIPBLASLT_MATMUL_TILE_576x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x24", {"HIPBLASLT_MATMUL_TILE_576x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x32", {"HIPBLASLT_MATMUL_TILE_576x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x40", {"HIPBLASLT_MATMUL_TILE_576x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x48", {"HIPBLASLT_MATMUL_TILE_576x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x56", {"HIPBLASLT_MATMUL_TILE_576x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x72", {"HIPBLASLT_MATMUL_TILE_576x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x80", {"HIPBLASLT_MATMUL_TILE_576x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x88", {"HIPBLASLT_MATMUL_TILE_576x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x96", {"HIPBLASLT_MATMUL_TILE_576x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x104", {"HIPBLASLT_MATMUL_TILE_576x104", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_576x112", {"HIPBLASLT_MATMUL_TILE_576x112", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x8", {"HIPBLASLT_MATMUL_TILE_640x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x16", {"HIPBLASLT_MATMUL_TILE_640x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x24", {"HIPBLASLT_MATMUL_TILE_640x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x32", {"HIPBLASLT_MATMUL_TILE_640x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x40", {"HIPBLASLT_MATMUL_TILE_640x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x48", {"HIPBLASLT_MATMUL_TILE_640x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x56", {"HIPBLASLT_MATMUL_TILE_640x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x72", {"HIPBLASLT_MATMUL_TILE_640x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x80", {"HIPBLASLT_MATMUL_TILE_640x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x88", {"HIPBLASLT_MATMUL_TILE_640x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_640x96", {"HIPBLASLT_MATMUL_TILE_640x96", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x8", {"HIPBLASLT_MATMUL_TILE_704x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x16", {"HIPBLASLT_MATMUL_TILE_704x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x24", {"HIPBLASLT_MATMUL_TILE_704x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x32", {"HIPBLASLT_MATMUL_TILE_704x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x40", {"HIPBLASLT_MATMUL_TILE_704x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x48", {"HIPBLASLT_MATMUL_TILE_704x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x56", {"HIPBLASLT_MATMUL_TILE_704x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x72", {"HIPBLASLT_MATMUL_TILE_704x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x80", {"HIPBLASLT_MATMUL_TILE_704x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_704x88", {"HIPBLASLT_MATMUL_TILE_704x88", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x8", {"HIPBLASLT_MATMUL_TILE_768x8", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x16", {"HIPBLASLT_MATMUL_TILE_768x16", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x24", {"HIPBLASLT_MATMUL_TILE_768x24", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x32", {"HIPBLASLT_MATMUL_TILE_768x32", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x40", {"HIPBLASLT_MATMUL_TILE_768x40", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x48", {"HIPBLASLT_MATMUL_TILE_768x48", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x56", {"HIPBLASLT_MATMUL_TILE_768x56", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x72", {"HIPBLASLT_MATMUL_TILE_768x72", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_MATMUL_TILE_768x80", {"HIPBLASLT_MATMUL_TILE_768x80", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_TILE_END", {"HIPBLASLT_MATMUL_TILE_END", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulStages_t", {"hipblasLtMatmulStages_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_MATMUL_STAGES_UNDEFINED", {"HIPBLASLT_MATMUL_STAGES_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, @@ -1003,6 +1046,10 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLASLT_SEARCH_RESERVED_03", {"HIPBLASLT_SEARCH_RESERVED_03", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_SEARCH_RESERVED_04", {"HIPBLASLT_SEARCH_RESERVED_04", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"CUBLASLT_SEARCH_RESERVED_05", {"HIPBLASLT_SEARCH_RESERVED_05", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_SEARCH_RESERVED_06", {"HIPBLASLT_SEARCH_RESERVED_06", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_SEARCH_RESERVED_07", {"HIPBLASLT_SEARCH_RESERVED_07", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_SEARCH_RESERVED_08", {"HIPBLASLT_SEARCH_RESERVED_08", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, + {"CUBLASLT_SEARCH_RESERVED_09", {"HIPBLASLT_SEARCH_RESERVED_09", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, UNSUPPORTED}}, {"cublasLtMatmulPreferenceAttributes_t", {"hipblasLtMatmulPreferenceAttributes_t", "", CONV_TYPE, API_BLAS, SEC::BLAS_LT_DATA_TYPES, ROC_UNSUPPORTED}}, {"CUBLASLT_MATMUL_PREF_SEARCH_MODE", {"HIPBLASLT_MATMUL_PREF_SEARCH_MODE", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, ROC_UNSUPPORTED}}, {"CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES", {"HIPBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES", "", CONV_NUMERIC_LITERAL, API_BLAS, SEC::BLAS_LT_DATA_TYPES, ROC_UNSUPPORTED}}, @@ -1999,6 +2046,53 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLASLT_MATMUL_TILE_512x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_512x120", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 {"CUBLASLT_MATMUL_TILE_512x128", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + { "CUBLASLT_MATMUL_TILE_576x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x104", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_576x112", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_640x96", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_704x88", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x8", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x16", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x24", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x32", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x40", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x48", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x56", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x72", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_MATMUL_TILE_768x80", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_SEARCH_RESERVED_06", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_SEARCH_RESERVED_07", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_SEARCH_RESERVED_08", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 + {"CUBLASLT_SEARCH_RESERVED_09", {CUDA_126, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 12062, CUBLAS_VERSION 120603, CUBLAS_VER_MAJOR 12 CUBLAS_VER_MINOR 6 CUBLAS_VER_PATCH 3 }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { From 8c7c26cac46c9a6953f541d84544a5a9edda624d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 11 Nov 2024 13:05:15 +0100 Subject: [PATCH 46/51] [HIPIFY][doc] CUDA `12.6.2` is the latest supported release (LLVM 20.x) + `CUDA 12.6.2` is partially supported by LLVM >= 20.0.0, but might work with the `hipify-clang` built against LLVM 19.x + Tested on Windows 11 and Ubuntu 23.10 --- docs/hipify-clang.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index acb73b44..a03cf71e 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -41,7 +41,7 @@ Dependencies * `CUDA `_ of at least version `7.0 `_, the latest supported version is - `12.6.1 `_. + `12.6.2 `_. .. list-table:: @@ -188,7 +188,7 @@ Dependencies `19.1.1 `_, `19.1.2 `_, `19.1.3 `_:sup:`4` - - `12.6.1 `_:sup:`4` + - `12.6.2 `_:sup:`4` - **Latest stable config** - **Latest stable config** @@ -640,8 +640,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 19.1.3, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.1 -* Ubuntu 22-23: LLVM 13.0.0 - 19.1.3, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.1 +* Ubuntu 20-21: LLVM 9.0.0 - 19.1.3, CUDA 7.0 - 12.6.2, cuDNN 5.1.10 - 9.5.1 +* Ubuntu 22-23: LLVM 13.0.0 - 19.1.3, CUDA 7.0 - 12.6.2, cuDNN 8.0.5 - 9.5.1 Minimum build system requirements for the above configurations: @@ -660,7 +660,7 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.3/dist \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.6.1 \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.6.2 \ -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.5.1 \ -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.3/build/bin/llvm-lit \ ../hipify @@ -697,19 +697,19 @@ The corresponding successful output is: -- Found lit: /usr/local/bin/lit -- Found FileCheck: /GIT/LLVM/trunk/dist/FileCheck -- Initial CUDA to configure: - -- - CUDA Toolkit path : /usr/local/cuda-12.6.1 + -- - CUDA Toolkit path : /usr/local/cuda-12.6.2 -- - CUDA Samples path : -- - cuDNN path : /usr/local/cudnn-9.5.1 -- - CUB path : - -- Found CUDAToolkit: /usr/local/cuda-12.6.1/targets/x86_64-linux/include (found version "12.6.68") + -- Found CUDAToolkit: /usr/local/cuda-12.6.2/targets/x86_64-linux/include (found version "12.6.68") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE -- Found CUDA config: - -- - CUDA Toolkit path : /usr/local/cuda-12.6.1 + -- - CUDA Toolkit path : /usr/local/cuda-12.6.2 -- - CUDA Samples path : OFF -- - cuDNN path : /usr/local/cudnn-9.5.1 - -- - CUB path : /usr/local/cuda-12.6.1/include/cub + -- - CUB path : /usr/local/cuda-12.6.2/include/cub -- Configuring done (0.5s) -- Generating done (0.0s) -- Build files have been written to: /usr/hipify/build @@ -826,7 +826,7 @@ Tested configurations: - ``3.30.4`` - ``3.13.0`` * - ``19.1.0 - 19.1.3`` - - ``7.0 - 12.6.1`` + - ``7.0 - 12.6.2`` - ``8.0.5 - 9.5.1`` - ``2019.16.11.40, 2022.17.11.4`` - ``3.30.4`` From 898738cf23cc08d692f9e741d28f65006e7c9e40 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 11 Nov 2024 17:43:54 +0100 Subject: [PATCH 47/51] [HIPIFY][6.3.0][BLAS] Sync with `hipBLAS` and `rocBLAS` - Step 15 + `rocblas_(s|d|c|z)dgmm_64` and `hipblas(S|D|C|Z)dgmm(_v2)?_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 16 ++-- docs/tables/CUBLAS_API_supported_by_HIP.md | 8 +- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 8 +- docs/tables/CUBLAS_API_supported_by_ROC.md | 8 +- src/CUDA2HIP_BLAS_API_functions.cpp | 16 +++- .../synthetic/libraries/cublas2hipblas_v2.cu | 23 +++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 83 ++++++++++++------- 7 files changed, 108 insertions(+), 54 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 07e34948..2f7f923b 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1589,6 +1589,7 @@ sub rocSubstitutions { subst("cublasCcopy_v2", "rocblas_ccopy", "library"); subst("cublasCcopy_v2_64", "rocblas_ccopy_64", "library"); subst("cublasCdgmm", "rocblas_cdgmm", "library"); + subst("cublasCdgmm_64", "rocblas_cdgmm_64", "library"); subst("cublasCdotc", "rocblas_cdotc", "library"); subst("cublasCdotc_64", "rocblas_cdotc_64", "library"); subst("cublasCdotc_v2", "rocblas_cdotc", "library"); @@ -1766,6 +1767,7 @@ sub rocSubstitutions { subst("cublasDcopy_v2", "rocblas_dcopy", "library"); subst("cublasDcopy_v2_64", "rocblas_dcopy_64", "library"); subst("cublasDdgmm", "rocblas_ddgmm", "library"); + subst("cublasDdgmm_64", "rocblas_ddgmm_64", "library"); subst("cublasDdot", "rocblas_ddot", "library"); subst("cublasDdot_64", "rocblas_ddot_64", "library"); subst("cublasDdot_v2", "rocblas_ddot", "library"); @@ -1998,6 +2000,7 @@ sub rocSubstitutions { subst("cublasScopy_v2", "rocblas_scopy", "library"); subst("cublasScopy_v2_64", "rocblas_scopy_64", "library"); subst("cublasSdgmm", "rocblas_sdgmm", "library"); + subst("cublasSdgmm_64", "rocblas_sdgmm_64", "library"); subst("cublasSdot", "rocblas_sdot", "library"); subst("cublasSdot_64", "rocblas_sdot_64", "library"); subst("cublasSdot_v2", "rocblas_sdot", "library"); @@ -2155,6 +2158,7 @@ sub rocSubstitutions { subst("cublasZcopy_v2", "rocblas_zcopy", "library"); subst("cublasZcopy_v2_64", "rocblas_zcopy_64", "library"); subst("cublasZdgmm", "rocblas_zdgmm", "library"); + subst("cublasZdgmm_64", "rocblas_zdgmm_64", "library"); subst("cublasZdotc", "rocblas_zdotc", "library"); subst("cublasZdotc_64", "rocblas_zdotc_64", "library"); subst("cublasZdotc_v2", "rocblas_zdotc", "library"); @@ -4383,6 +4387,7 @@ sub simpleSubstitutions { subst("cublasCcopy_v2", "hipblasCcopy_v2", "library"); subst("cublasCcopy_v2_64", "hipblasCcopy_v2_64", "library"); subst("cublasCdgmm", "hipblasCdgmm_v2", "library"); + subst("cublasCdgmm_64", "hipblasCdgmm_v2_64", "library"); subst("cublasCdotc", "hipblasCdotc_v2", "library"); subst("cublasCdotc_64", "hipblasCdotc_v2_64", "library"); subst("cublasCdotc_v2", "hipblasCdotc_v2", "library"); @@ -4564,6 +4569,7 @@ sub simpleSubstitutions { subst("cublasDcopy_v2", "hipblasDcopy", "library"); subst("cublasDcopy_v2_64", "hipblasDcopy_64", "library"); subst("cublasDdgmm", "hipblasDdgmm", "library"); + subst("cublasDdgmm_64", "hipblasDdgmm_64", "library"); subst("cublasDdot", "hipblasDdot", "library"); subst("cublasDdot_64", "hipblasDdot_64", "library"); subst("cublasDdot_v2", "hipblasDdot", "library"); @@ -4810,6 +4816,7 @@ sub simpleSubstitutions { subst("cublasScopy_v2", "hipblasScopy", "library"); subst("cublasScopy_v2_64", "hipblasScopy_64", "library"); subst("cublasSdgmm", "hipblasSdgmm", "library"); + subst("cublasSdgmm_64", "hipblasSdgmm_64", "library"); subst("cublasSdot", "hipblasSdot", "library"); subst("cublasSdot_64", "hipblasSdot_64", "library"); subst("cublasSdot_v2", "hipblasSdot", "library"); @@ -4963,6 +4970,7 @@ sub simpleSubstitutions { subst("cublasZcopy_v2", "hipblasZcopy_v2", "library"); subst("cublasZcopy_v2_64", "hipblasZcopy_v2_64", "library"); subst("cublasZdgmm", "hipblasZdgmm_v2", "library"); + subst("cublasZdgmm_64", "hipblasZdgmm_v2_64", "library"); subst("cublasZdotc", "hipblasZdotc_v2", "library"); subst("cublasZdotc_64", "hipblasZdotc_v2_64", "library"); subst("cublasZdotc_v2", "hipblasZdotc_v2", "library"); @@ -11661,7 +11669,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZmatinvBatched", "cublasZgemm3m_64", "cublasZgemm3m", - "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", "cublasTSTgemvStridedBatched_64", @@ -11689,7 +11696,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSetMatrixAsync_64", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasSdgmm_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", @@ -11770,7 +11776,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDmatinvBatched", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", - "cublasDdgmm_64", "cublasCtrttp", "cublasCtpttr", "cublasCsyrkEx_64", @@ -11795,7 +11800,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgemm3mBatched_64", "cublasCgemm3mBatched", "cublasCgemm3m", - "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", @@ -13883,7 +13887,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgelsBatched", - "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", "cublasSwapEx_64", @@ -13908,7 +13911,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetMatrixAsync_64", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasSdgmm_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", @@ -14011,7 +14013,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgelsBatched", - "cublasDdgmm_64", "cublasCtrttp", "cublasCtpttr", "cublasCsyrkEx_64", @@ -14040,7 +14041,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemm3mBatched", "cublasCgemm3m", "cublasCgelsBatched", - "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 209b342c..fc9eeafb 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1825,7 +1825,7 @@ |`cublasAxpyEx`|8.0| | | |`hipblasAxpyEx_v2`|6.0.0| | | | | |`cublasAxpyEx_64`|12.0| | | |`hipblasAxpyEx_v2_64`|6.2.0| | | | | |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | | -|`cublasCdgmm_64`|12.0| | | | | | | | | | +|`cublasCdgmm_64`|12.0| | | |`hipblasCdgmm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | | |`cublasCgeam_64`|12.0| | | |`hipblasCgeam_v2_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | |`hipblasCgelsBatched_v2`|6.0.0| | | | | @@ -1851,7 +1851,7 @@ |`cublasCtrsmBatched_64`|12.0| | | |`hipblasCtrsmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCtrttp`| | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | | -|`cublasDdgmm_64`|12.0| | | | | | | | | | +|`cublasDdgmm_64`|12.0| | | |`hipblasDdgmm_64`|6.3.0| | | |6.3.0| |`cublasDgeam`| | | | |`hipblasDgeam`|1.8.2| | | | | |`cublasDgeam_64`|12.0| | | |`hipblasDgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | |`hipblasDgelsBatched`|5.4.0| | | | | @@ -1887,7 +1887,7 @@ |`cublasScalEx`|8.0| | | |`hipblasScalEx_v2`|6.0.0| | | | | |`cublasScalEx_64`|12.0| | | |`hipblasScalEx_v2_64`|6.2.0| | | | | |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | | -|`cublasSdgmm_64`|12.0| | | | | | | | | | +|`cublasSdgmm_64`|12.0| | | |`hipblasSdgmm_64`|6.3.0| | | |6.3.0| |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | | |`cublasSgeam_64`|12.0| | | |`hipblasSgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | |`hipblasSgelsBatched`|5.4.0| | | | | @@ -1906,7 +1906,7 @@ |`cublasSwapEx_64`|12.0| | | | | | | | | | |`cublasUint8gemmBias`|8.0| | | | | | | | | | |`cublasZdgmm`| | | | |`hipblasZdgmm_v2`|6.0.0| | | | | -|`cublasZdgmm_64`|12.0| | | | | | | | | | +|`cublasZdgmm_64`|12.0| | | |`hipblasZdgmm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgeam`| | | | |`hipblasZgeam_v2`|6.0.0| | | | | |`cublasZgeam_64`|12.0| | | |`hipblasZgeam_v2_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | |`hipblasZgelsBatched_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 3e1caf76..0c847d5d 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1825,7 +1825,7 @@ |`cublasAxpyEx`|8.0| | | |`hipblasAxpyEx_v2`|6.0.0| | | | |`rocblas_axpy_ex`|3.9.0| | | | | |`cublasAxpyEx_64`|12.0| | | |`hipblasAxpyEx_v2_64`|6.2.0| | | | |`rocblas_axpy_ex_64`|6.1.0| | | | | |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | |`rocblas_cdgmm`|3.5.0| | | | | -|`cublasCdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCdgmm_64`|12.0| | | |`hipblasCdgmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cdgmm_64`|6.3.0| | | |6.3.0| |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | |`rocblas_cgeam`|3.5.0| | | | | |`cublasCgeam_64`|12.0| | | |`hipblasCgeam_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgeam_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | |`hipblasCgelsBatched_v2`|6.0.0| | | | | | | | | | | @@ -1851,7 +1851,7 @@ |`cublasCtrsmBatched_64`|12.0| | | |`hipblasCtrsmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | | | | | | | |`cublasDdgmm`| | | | |`hipblasDdgmm`|3.6.0| | | | |`rocblas_ddgmm`|3.5.0| | | | | -|`cublasDdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDdgmm_64`|12.0| | | |`hipblasDdgmm_64`|6.3.0| | | |6.3.0|`rocblas_ddgmm_64`|6.3.0| | | |6.3.0| |`cublasDgeam`| | | | |`hipblasDgeam`|1.8.2| | | | |`rocblas_dgeam`|1.6.4| | | | | |`cublasDgeam_64`|12.0| | | |`hipblasDgeam_64`|6.3.0| | | |6.3.0|`rocblas_dgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | |`hipblasDgelsBatched`|5.4.0| | | | | | | | | | | @@ -1887,7 +1887,7 @@ |`cublasScalEx`|8.0| | | |`hipblasScalEx_v2`|6.0.0| | | | |`rocblas_scal_ex`|4.0.0| | | | | |`cublasScalEx_64`|12.0| | | |`hipblasScalEx_v2_64`|6.2.0| | | | |`rocblas_scal_ex_64`|6.1.0| | | | | |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | |`rocblas_sdgmm`|3.5.0| | | | | -|`cublasSdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSdgmm_64`|12.0| | | |`hipblasSdgmm_64`|6.3.0| | | |6.3.0|`rocblas_sdgmm_64`|6.3.0| | | |6.3.0| |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | |`rocblas_sgeam`|1.6.4| | | | | |`cublasSgeam_64`|12.0| | | |`hipblasSgeam_64`|6.3.0| | | |6.3.0|`rocblas_sgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | |`hipblasSgelsBatched`|5.4.0| | | | | | | | | | | @@ -1906,7 +1906,7 @@ |`cublasSwapEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasUint8gemmBias`|8.0| | | | | | | | | | | | | | | | |`cublasZdgmm`| | | | |`hipblasZdgmm_v2`|6.0.0| | | | |`rocblas_zdgmm`|3.5.0| | | | | -|`cublasZdgmm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZdgmm_64`|12.0| | | |`hipblasZdgmm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zdgmm_64`|6.3.0| | | |6.3.0| |`cublasZgeam`| | | | |`hipblasZgeam_v2`|6.0.0| | | | |`rocblas_zgeam`|3.5.0| | | | | |`cublasZgeam_64`|12.0| | | |`hipblasZgeam_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgeam_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | |`hipblasZgelsBatched_v2`|6.0.0| | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index fdcbc8e2..12a42598 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1825,7 +1825,7 @@ |`cublasAxpyEx`|8.0| | | |`rocblas_axpy_ex`|3.9.0| | | | | |`cublasAxpyEx_64`|12.0| | | |`rocblas_axpy_ex_64`|6.1.0| | | | | |`cublasCdgmm`| | | | |`rocblas_cdgmm`|3.5.0| | | | | -|`cublasCdgmm_64`|12.0| | | | | | | | | | +|`cublasCdgmm_64`|12.0| | | |`rocblas_cdgmm_64`|6.3.0| | | |6.3.0| |`cublasCgeam`| | | | |`rocblas_cgeam`|3.5.0| | | | | |`cublasCgeam_64`|12.0| | | |`rocblas_cgeam_64`|6.3.0| | | |6.3.0| |`cublasCgelsBatched`| | | | | | | | | | | @@ -1851,7 +1851,7 @@ |`cublasCtrsmBatched_64`|12.0| | | |`rocblas_ctrsm_batched_64`|6.2.0| | | | | |`cublasCtrttp`| | | | | | | | | | | |`cublasDdgmm`| | | | |`rocblas_ddgmm`|3.5.0| | | | | -|`cublasDdgmm_64`|12.0| | | | | | | | | | +|`cublasDdgmm_64`|12.0| | | |`rocblas_ddgmm_64`|6.3.0| | | |6.3.0| |`cublasDgeam`| | | | |`rocblas_dgeam`|1.6.4| | | | | |`cublasDgeam_64`|12.0| | | |`rocblas_dgeam_64`|6.3.0| | | |6.3.0| |`cublasDgelsBatched`| | | | | | | | | | | @@ -1887,7 +1887,7 @@ |`cublasScalEx`|8.0| | | |`rocblas_scal_ex`|4.0.0| | | | | |`cublasScalEx_64`|12.0| | | |`rocblas_scal_ex_64`|6.1.0| | | | | |`cublasSdgmm`| | | | |`rocblas_sdgmm`|3.5.0| | | | | -|`cublasSdgmm_64`|12.0| | | | | | | | | | +|`cublasSdgmm_64`|12.0| | | |`rocblas_sdgmm_64`|6.3.0| | | |6.3.0| |`cublasSgeam`| | | | |`rocblas_sgeam`|1.6.4| | | | | |`cublasSgeam_64`|12.0| | | |`rocblas_sgeam_64`|6.3.0| | | |6.3.0| |`cublasSgelsBatched`| | | | | | | | | | | @@ -1906,7 +1906,7 @@ |`cublasSwapEx_64`|12.0| | | | | | | | | | |`cublasUint8gemmBias`|8.0| | | | | | | | | | |`cublasZdgmm`| | | | |`rocblas_zdgmm`|3.5.0| | | | | -|`cublasZdgmm_64`|12.0| | | | | | | | | | +|`cublasZdgmm_64`|12.0| | | |`rocblas_zdgmm_64`|6.3.0| | | |6.3.0| |`cublasZgeam`| | | | |`rocblas_zgeam`|3.5.0| | | | | |`cublasZgeam_64`|12.0| | | |`rocblas_zgeam_64`|6.3.0| | | |6.3.0| |`cublasZgelsBatched`| | | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 7ad123f1..8c6d609f 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -618,13 +618,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // DGMM {"cublasSdgmm", {"hipblasSdgmm", "rocblas_sdgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasSdgmm_64", {"hipblasSdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasSdgmm_64", {"hipblasSdgmm_64", "rocblas_sdgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasDdgmm", {"hipblasDdgmm", "rocblas_ddgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDdgmm_64", {"hipblasDdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasDdgmm_64", {"hipblasDdgmm_64", "rocblas_ddgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasCdgmm", {"hipblasCdgmm_v2", "rocblas_cdgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasCdgmm_64", {"hipblasCdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasCdgmm_64", {"hipblasCdgmm_v2_64", "rocblas_cdgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, {"cublasZdgmm", {"hipblasZdgmm_v2", "rocblas_zdgmm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasZdgmm_64", {"hipblasZdgmm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasZdgmm_64", {"hipblasZdgmm_v2_64", "rocblas_zdgmm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, // TPTTR - Triangular Pack format to Triangular format {"cublasStpttr", {"hipblasStpttr", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -2078,6 +2078,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDtrsmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCtrsmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZtrsmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCdgmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZdgmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2513,6 +2517,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_ctrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_ztrmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_ddgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 014a8104..6678c6d5 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -231,6 +231,7 @@ int main() { float fy1 = 0.0f; float fc = 0.0f; float fC = 0.0f; + float fCP = 0.0f; float fs = 0.0f; float fd1 = 0.0f; float fd2 = 0.0f; @@ -251,6 +252,7 @@ int main() { double da = 0.0f; double dA = 0.0f; + double dAP = 0.0f; double db = 0.0f; double dB = 0.0f; double dx = 0.0f; @@ -259,6 +261,7 @@ int main() { double dy1 = 0.0f; double dc = 0.0f; double dC = 0.0f; + double dCP = 0.0f; double ds = 0.0f; double dd1 = 0.0f; double dd2 = 0.0f; @@ -3160,6 +3163,26 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* const AP[], int64_t lda, hipDoubleComplex* const BP[], int64_t ldb, int64_t batchCount); // CHECK: blasStatus = hipblasZtrsmBatched_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); blasStatus = cublasZtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdgmm_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const float* AP, int64_t lda, const float* x, int64_t incx, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + blasStatus = cublasSdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdgmm_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const double* AP, int64_t lda, const double* x, int64_t incx, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + blasStatus = cublasDdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdgmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCdgmm_v2_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + blasStatus = cublasCdgmm_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdgmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZdgmm_v2_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); + blasStatus = cublasZdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 5ad41633..b301e719 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -246,22 +246,23 @@ int main() { // CHECK: blasStatus = rocblas_get_matrix_async(rows, cols, num, image, incx, image_2, incy, stream); blasStatus = cublasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); - float fa = 0; - float fA = 0; + float fa = 0.0f; + float fA = 0.0f; float fAP = 0.0f; - float fb = 0; - float fB = 0; - float fx = 0; - float fx1 = 0; - float fy = 0; - float fy1 = 0; - float fc = 0; - float fC = 0; - float fs = 0; - float fd1 = 0; - float fd2 = 0; - float fresult = 0; - float fparam = 0; + float fb = 0.0f; + float fB = 0.0f; + float fx = 0.0f; + float fx1 = 0.0f; + float fy = 0.0f; + float fy1 = 0.0f; + float fc = 0.0f; + float fC = 0.0f; + float fCP = 0.0f; + float fs = 0.0f; + float fd1 = 0.0f; + float fd2 = 0.0f; + float fresult = 0.0f; + float fparam = 0.0f; float** fAarray = nullptr; const float** const fAarray_const = const_cast(fAarray); @@ -275,21 +276,23 @@ int main() { const float** const fCarray_const = const_cast(fCarray); float** fTauarray = nullptr; - double da = 0; - double dA = 0; - double db = 0; - double dB = 0; - double dx = 0; - double dx1 = 0; - double dy = 0; - double dy1 = 0; - double dc = 0; - double dC = 0; - double ds = 0; - double dd1 = 0; - double dd2 = 0; - double dresult = 0; - double dparam = 0; + double da = 0.0f; + double dA = 0.0f; + double dAP = 0.0f; + double db = 0.0f; + double dB = 0.0f; + double dx = 0.0f; + double dx1 = 0.0f; + double dy = 0.0f; + double dy1 = 0.0f; + double dc = 0.0f; + double dC = 0.0f; + double dCP = 0.0f; + double ds = 0.0f; + double dd1 = 0.0f; + double dd2 = 0.0f; + double dresult = 0.0f; + double dparam = 0.0f; double** dAarray = nullptr; const double** const dAarray_const = const_cast(dAarray); @@ -3317,6 +3320,26 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_ztrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZtrmm_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); blasStatus = cublasZtrmm_v2_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexB, ldb_64, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_sdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + blasStatus = cublasSdgmm_64(blasHandle, blasSideMode, m_64, n_64, &fAP, lda_64, &fx, incx_64, &fCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ddgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_ddgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + blasStatus = cublasDdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dAP, lda_64, &dx, incx_64, &dCP, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* x, int64_t incx, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cdgmm_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + blasStatus = cublasCdgmm_64(blasHandle, blasSideMode, m_64, n_64, &complexA, lda_64, &complexx, incx_64, &complexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm_64(cublasHandle_t handle, cublasSideMode_t mode, int64_t m, int64_t n, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); + blasStatus = cublasZdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); #endif return 0; From b2265ffe2563be2f880dc503d8f67bae6c1e202c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 11 Nov 2024 19:38:18 +0100 Subject: [PATCH 48/51] [HIPIFY][SWDEV-493184][6.2.0][device][fix] Added missing support for device intrinsics and built-ins that appeared in HIP `6.2.0` + `__all_sync`, `__any_sync`, `__ballot_sync`, `__activemask`, `__match_any_sync`, `__match_all_sync`, `__shfl_sync`, `__shfl_up_sync`, `__shfl_down_sync`, and `__shfl_xor_sync` --- bin/hipify-perl | 16 ++++++--- .../CUDA_Device_API_supported_by_HIP.md | 14 +++++--- src/CUDA2HIP_Device_functions.cpp | 36 ++++++++++++++++--- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 2f7f923b..e327d367 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -8644,8 +8644,12 @@ sub countSupportedDeviceFunctions { "__short2half_ru", "__short2half_rn", "__short2half_rd", + "__shfl_xor_sync", "__shfl_xor", + "__shfl_up_sync", "__shfl_up", + "__shfl_sync", + "__shfl_down_sync", "__shfl_down", "__shfl", "__saturatef", @@ -8657,6 +8661,8 @@ sub countSupportedDeviceFunctions { "__mulhi", "__mul64hi", "__mul24", + "__match_any_sync", + "__match_all_sync", "__lows2half2", "__lowhigh2highlow", "__low2half2", @@ -8871,11 +8877,15 @@ sub countSupportedDeviceFunctions { "__byte_perm", "__brevll", "__brev", + "__ballot_sync", "__ballot", "__assertfail", "__assert_fail", + "__any_sync", "__any", - "__all" + "__all_sync", + "__all", + "__activemask" ) { # match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...)); @@ -9022,10 +9032,6 @@ sub warnUnsupportedDeviceFunctions { "__short2bfloat16_ru", "__short2bfloat16_rn", "__short2bfloat16_rd", - "__shfl_xor_sync", - "__shfl_up_sync", - "__shfl_sync", - "__shfl_down_sync", "__prof_trigger", "__pm3", "__pm2", diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md index 0a3c62ab..50759924 100644 --- a/docs/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md @@ -5,11 +5,15 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`_Pow_int`| | | | | | | | | | | +|`__activemask`|9.0| | | |`__activemask`|6.2.0| | | | | |`__all`| | | | |`__all`|1.6.0| | | | | +|`__all_sync`|9.0| | | |`__all_sync`|6.2.0| | | | | |`__any`| | | | |`__any`|1.6.0| | | | | +|`__any_sync`|9.0| | | |`__any_sync`|6.2.0| | | | | |`__assert_fail`| | | | |`__assert_fail`|1.9.0| | | | | |`__assertfail`| | | | |`__assertfail`|1.9.0| | | | | |`__ballot`| | | | |`__ballot`|1.6.0| | | | | +|`__ballot_sync`|9.0| | | |`__ballot_sync`|6.2.0| | | | | |`__bfloat1622float2`|11.0| | | | | | | | | | |`__bfloat162bfloat162`|11.0| | | | | | | | | | |`__bfloat162char_rz`|12.2| | | | | | | | | | @@ -361,6 +365,8 @@ |`__lowhigh2highlow`| | | | |`__lowhigh2highlow`|1.6.0| | | | | |`__lows2bfloat162`|11.0| | | | | | | | | | |`__lows2half2`| | | | |`__lows2half2`|1.6.0| | | | | +|`__match_all_sync`|9.0| | | |`__match_all_sync`|6.2.0| | | | | +|`__match_any_sync`|9.0| | | |`__match_any_sync`|6.2.0| | | | | |`__mul24`| | | | |`__mul24`|1.6.0| | | | | |`__mul64hi`| | | | |`__mul64hi`|1.6.0| | | | | |`__mulhi`| | | | |`__mulhi`|1.6.0| | | | | @@ -387,12 +393,12 @@ |`__saturatef`| | | | |`__saturatef`|1.6.0| | | | | |`__shfl`|7.5|9.0| | |`__shfl`|1.6.0| | | | | |`__shfl_down`|7.5|9.0| | |`__shfl_down`|1.6.0| | | | | -|`__shfl_down_sync`| | | | | | | | | | | -|`__shfl_sync`| | | | | | | | | | | +|`__shfl_down_sync`|9.0| | | |`__shfl_down_sync`|6.2.0| | | | | +|`__shfl_sync`|9.0| | | |`__shfl_sync`|6.2.0| | | | | |`__shfl_up`|7.5|9.0| | |`__shfl_up`|1.6.0| | | | | -|`__shfl_up_sync`| | | | | | | | | | | +|`__shfl_up_sync`|9.0| | | |`__shfl_up_sync`|6.2.0| | | | | |`__shfl_xor`|7.5|9.0| | |`__shfl_xor`|1.6.0| | | | | -|`__shfl_xor_sync`| | | | | | | | | | | +|`__shfl_xor_sync`|9.0| | | |`__shfl_xor_sync`|6.2.0| | | | | |`__short2bfloat16_rd`|11.0| | | | | | | | | | |`__short2bfloat16_rn`|11.0| | | | | | | | | | |`__short2bfloat16_ru`|11.0| | | | | | | | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index ed2e4a49..469d1e60 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -672,13 +672,13 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_sync", {"__shfl_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_up_sync", {"__shfl_up_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_down_sync", {"__shfl_down_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_xor_sync", {"__shfl_xor_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -829,6 +829,14 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__nv_cvt_bfloat16raw2_to_fp8x2", {"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + // intrinsics + {"__all_sync", {"__all_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__any_sync", {"__any_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ballot_sync", {"__ballot_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__activemask", {"__activemask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + // built-ins + {"__match_any_sync", {"__match_any_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__match_all_sync", {"__match_all_sync", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, }; const std::map CUDA_DEVICE_FUNCTION_VER_MAP { @@ -959,6 +967,16 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"make_half2", {CUDA_122, CUDA_0, CUDA_0 }}, {"__half2char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, {"__half2uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__all_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__any_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__ballot_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__activemask", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__match_any_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__match_all_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_up_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_down_sync", {CUDA_90, CUDA_0, CUDA_0 }}, + {"__shfl_xor_sync", {CUDA_90, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { @@ -1470,6 +1488,16 @@ const std::map HIP_DEVICE_FUNCTION_VER_MAP { {"__hmax_nan", {HIP_5050, HIP_0, HIP_0 }}, {"__hmin", {HIP_5050, HIP_0, HIP_0 }}, {"__hmin_nan", {HIP_5050, HIP_0, HIP_0 }}, + {"__all_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__any_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__ballot_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__activemask", {HIP_6020, HIP_0, HIP_0 }}, + {"__match_any_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__match_all_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_up_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_down_sync", {HIP_6020, HIP_0, HIP_0 }}, + {"__shfl_xor_sync", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map CUDA_DEVICE_FUNCTION_API_SECTION_MAP { From b46b461854f116a6a2834a8c0a3e690562c794f4 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 12 Nov 2024 22:05:12 +0100 Subject: [PATCH 49/51] [HIPIFY][6.3.0][BLAS][fix] Sync with `hipBLAS` and `rocBLAS` - Step 16 - final for `hipBLAS` + `hipblasGemm(Strided(BatchedEx}?)?_v2_64` support + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation + Minor fixes and annotation of `BLAS` synthetic tests + [FIX] `cublasGemm(Strided(Batched)?)?Ex` mark as API-changed since 11.0 + [TODO] [feature] `CUDA VERSION` detection by `HIPIFY` itself to support different CUDA/HIP signatures on hipification based on the CUDA version + [IMP] `rocblas_gemm(_strided(_batched)?)?_ex_64` are not yet supported [REASON] `compute_type` argument of the `rocblas_computetype` type is needed instead of the `rocblas_datatype` type + [TODO] File a ticket for `rocblas_gemm(_strided(_batched)?)?_ex_64` with `compute_type` argument of the `rocblas_computetype` type instead of the `rocblas_datatype` type --- bin/hipify-perl | 6 +- docs/tables/CUBLAS_API_supported_by_HIP.md | 12 +- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 12 +- docs/tables/CUBLAS_API_supported_by_ROC.md | 6 +- src/CUDA2HIP_BLAS_API_functions.cpp | 15 +- .../synthetic/libraries/cublas2hipblas.cu | 19 +- .../synthetic/libraries/cublas2hipblas_v2.cu | 36 +- .../synthetic/libraries/cublas2rocblas.cu | 403 ++++++++-------- .../synthetic/libraries/cublas2rocblas_v2.cu | 436 ++++++++++-------- 9 files changed, 512 insertions(+), 433 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index e327d367..3ab7d25a 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -4719,8 +4719,11 @@ sub simpleSubstitutions { subst("cublasDznrm2_v2", "hipblasDznrm2_v2", "library"); subst("cublasDznrm2_v2_64", "hipblasDznrm2_v2_64", "library"); subst("cublasGemmBatchedEx", "hipblasGemmBatchedEx_v2", "library"); + subst("cublasGemmBatchedEx_64", "hipblasGemmBatchedEx_v2_64", "library"); subst("cublasGemmEx", "hipblasGemmEx_v2", "library"); + subst("cublasGemmEx_64", "hipblasGemmEx_v2_64", "library"); subst("cublasGemmStridedBatchedEx", "hipblasGemmStridedBatchedEx_v2", "library"); + subst("cublasGemmStridedBatchedEx_64", "hipblasGemmStridedBatchedEx_v2_64", "library"); subst("cublasGetAtomicsMode", "hipblasGetAtomicsMode", "library"); subst("cublasGetMathMode", "hipblasGetMathMode", "library"); subst("cublasGetMatrix", "hipblasGetMatrix", "library"); @@ -11771,11 +11774,8 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGetLoggerCallback", "cublasGetError", "cublasGetCudartVersion", - "cublasGemmStridedBatchedEx_64", "cublasGemmGroupedBatchedEx_64", "cublasGemmGroupedBatchedEx", - "cublasGemmEx_64", - "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", "cublasDtpttr", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index fc9eeafb..ed8ba957 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1868,12 +1868,12 @@ |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | | |`cublasDtrsmBatched_64`|12.0| | | |`hipblasDtrsmBatched_64`|6.3.0| | | |6.3.0| |`cublasDtrttp`| | | | | | | | | | | -|`cublasGemmBatchedEx`|9.1| | | |`hipblasGemmBatchedEx_v2`|6.0.0| | | | | -|`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | -|`cublasGemmEx`|8.0| | | |`hipblasGemmEx_v2`|6.0.0| | | | | -|`cublasGemmEx_64`|12.0| | | | | | | | | | -|`cublasGemmStridedBatchedEx`|9.1| | | |`hipblasGemmStridedBatchedEx_v2`|6.0.0| | | | | -|`cublasGemmStridedBatchedEx_64`|12.0| | | | | | | | | | +|`cublasGemmBatchedEx`|9.1| |11.0| |`hipblasGemmBatchedEx_v2`|6.0.0| | | | | +|`cublasGemmBatchedEx_64`|12.0| | | |`hipblasGemmBatchedEx_v2_64`|6.3.0| | | |6.3.0| +|`cublasGemmEx`|8.0| |11.0| |`hipblasGemmEx_v2`|6.0.0| | | | | +|`cublasGemmEx_64`|12.0| | | |`hipblasGemmEx_v2_64`|6.3.0| | | |6.3.0| +|`cublasGemmStridedBatchedEx`|9.1| |11.0| |`hipblasGemmStridedBatchedEx_v2`|6.0.0| | | | | +|`cublasGemmStridedBatchedEx_64`|12.0| | | |`hipblasGemmStridedBatchedEx_v2_64`|6.3.0| | | |6.3.0| |`cublasIamaxEx`|10.1| | | | | | | | | | |`cublasIamaxEx_64`|12.0| | | | | | | | | | |`cublasIaminEx`|10.1| | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 0c847d5d..d569d999 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1868,12 +1868,12 @@ |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | |`rocblas_dtrsm_batched`|3.5.0| | | | | |`cublasDtrsmBatched_64`|12.0| | | |`hipblasDtrsmBatched_64`|6.3.0| | | |6.3.0|`rocblas_dtrsm_batched_64`|6.2.0| | | | | |`cublasDtrttp`| | | | | | | | | | | | | | | | | -|`cublasGemmBatchedEx`|9.1| | | |`hipblasGemmBatchedEx_v2`|6.0.0| | | | |`rocblas_gemm_batched_ex`|3.5.0| | | | | -|`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | | | | | | | -|`cublasGemmEx`|8.0| | | |`hipblasGemmEx_v2`|6.0.0| | | | |`rocblas_gemm_ex`|1.8.2| | | | | -|`cublasGemmEx_64`|12.0| | | | | | | | | | | | | | | | -|`cublasGemmStridedBatchedEx`|9.1| | | |`hipblasGemmStridedBatchedEx_v2`|6.0.0| | | | |`rocblas_gemm_strided_batched_ex`|1.9.0| | | | | -|`cublasGemmStridedBatchedEx_64`|12.0| | | | | | | | | | | | | | | | +|`cublasGemmBatchedEx`|9.1| |11.0| |`hipblasGemmBatchedEx_v2`|6.0.0| | | | |`rocblas_gemm_batched_ex`|3.5.0| | | | | +|`cublasGemmBatchedEx_64`|12.0| | | |`hipblasGemmBatchedEx_v2_64`|6.3.0| | | |6.3.0| | | | | | | +|`cublasGemmEx`|8.0| |11.0| |`hipblasGemmEx_v2`|6.0.0| | | | |`rocblas_gemm_ex`|1.8.2| | | | | +|`cublasGemmEx_64`|12.0| | | |`hipblasGemmEx_v2_64`|6.3.0| | | |6.3.0| | | | | | | +|`cublasGemmStridedBatchedEx`|9.1| |11.0| |`hipblasGemmStridedBatchedEx_v2`|6.0.0| | | | |`rocblas_gemm_strided_batched_ex`|1.9.0| | | | | +|`cublasGemmStridedBatchedEx_64`|12.0| | | |`hipblasGemmStridedBatchedEx_v2_64`|6.3.0| | | |6.3.0| | | | | | | |`cublasIamaxEx`|10.1| | | | | | | | | | | | | | | | |`cublasIamaxEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasIaminEx`|10.1| | | | | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 12a42598..c76de30a 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1868,11 +1868,11 @@ |`cublasDtrsmBatched`| | | | |`rocblas_dtrsm_batched`|3.5.0| | | | | |`cublasDtrsmBatched_64`|12.0| | | |`rocblas_dtrsm_batched_64`|6.2.0| | | | | |`cublasDtrttp`| | | | | | | | | | | -|`cublasGemmBatchedEx`|9.1| | | |`rocblas_gemm_batched_ex`|3.5.0| | | | | +|`cublasGemmBatchedEx`|9.1| |11.0| |`rocblas_gemm_batched_ex`|3.5.0| | | | | |`cublasGemmBatchedEx_64`|12.0| | | | | | | | | | -|`cublasGemmEx`|8.0| | | |`rocblas_gemm_ex`|1.8.2| | | | | +|`cublasGemmEx`|8.0| |11.0| |`rocblas_gemm_ex`|1.8.2| | | | | |`cublasGemmEx_64`|12.0| | | | | | | | | | -|`cublasGemmStridedBatchedEx`|9.1| | | |`rocblas_gemm_strided_batched_ex`|1.9.0| | | | | +|`cublasGemmStridedBatchedEx`|9.1| |11.0| |`rocblas_gemm_strided_batched_ex`|1.9.0| | | | | |`cublasGemmStridedBatchedEx_64`|12.0| | | | | | | | | | |`cublasIamaxEx`|10.1| | | | | | | | | | |`cublasIamaxEx_64`|12.0| | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 8c6d609f..6dda671f 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -835,11 +835,11 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasSgemmEx", {"hipblasSgemmEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, {"cublasSgemmEx_64", {"hipblasSgemmEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, {"cublasGemmEx", {"hipblasGemmEx_v2", "rocblas_gemm_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasGemmEx_64", {"hipblasGemmEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasGemmEx_64", {"hipblasGemmEx_v2_64", "rocblas_gemm_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, ROC_UNSUPPORTED}}, {"cublasGemmBatchedEx", {"hipblasGemmBatchedEx_v2", "rocblas_gemm_batched_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasGemmBatchedEx_64", {"hipblasGemmBatchedEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasGemmBatchedEx_64", {"hipblasGemmBatchedEx_v2_64", "rocblas_gemm_batched_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, ROC_UNSUPPORTED}}, {"cublasGemmStridedBatchedEx", {"hipblasGemmStridedBatchedEx_v2", "rocblas_gemm_strided_batched_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasGemmStridedBatchedEx_64", {"hipblasGemmStridedBatchedEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, + {"cublasGemmStridedBatchedEx_64", {"hipblasGemmStridedBatchedEx_v2_64", "rocblas_gemm_strided_batched_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, ROC_UNSUPPORTED}}, // IO in Int8 complex/cuComplex, computation in cuComplex {"cublasCgemmEx", {"hipblasCgemmEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, {"cublasCgemmEx_64", {"hipblasCgemmEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, UNSUPPORTED}}, @@ -2082,6 +2082,9 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCdgmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZdgmm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasGemmEx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasGemmBatchedEx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasGemmStridedBatchedEx_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2521,6 +2524,9 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_ddgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zdgmm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_gemm_ex_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_gemm_batched_ex_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_gemm_strided_batched_ex_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { @@ -2534,6 +2540,9 @@ const std::map HIP_BLAS_FUNCTION_CHANGED const std::map CUDA_BLAS_FUNCTION_CHANGED_VER_MAP { {"cublasLtMatmulDescCreate", {CUDA_110}}, + {"cublasGemmEx", {CUDA_110}}, + {"cublasGemmBatchedEx", {CUDA_110}}, + {"cublasGemmStridedBatchedEx", {CUDA_110}}, }; const std::map CUDA_BLAS_API_SECTION_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 034b8c33..9687afb1 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -241,6 +241,7 @@ int main() { void** voidBarray = nullptr; const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; + const void** const voidCarray_const = const_cast(voidCarray); // NOTE: float CUBLASWINAPI cublasSnrm2(int n, const float* x, int incx) is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); @@ -1517,6 +1518,9 @@ int main() { // CHECK: hipDataType DataType_2, DataType_3; cudaDataType DataType_2, DataType_3; + // CHECK: hipDataType computeType; + cudaDataType computeType; + // CHECK: hipblasGemmAlgo_t blasGemmAlgo; // CHECK-NEXT: hipblasGemmAlgo_t BLAS_GEMM_DFALT = HIPBLAS_GEMM_DEFAULT; cublasGemmAlgo_t blasGemmAlgo; @@ -1599,9 +1603,7 @@ int main() { #endif #if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000 - // CHECK: hipDataType computeType; - cudaDataType computeType; - + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, const void* B, hipDataType bType, int ldb, const void* beta, void* C, hipDataType cType, int ldc, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); @@ -1636,12 +1638,16 @@ int main() { #endif #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *const Aarray[], cudaDataType Atype, int lda, const void *const Barray[], cudaDataType Btype, int ldb, const void *beta, void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); + // HIP [Actual] HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipDataType aType, int lda, const void* B[], hipDataType bType, int ldb, const void* beta, void* C[], hipDataType cType, int ldc, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, long long int strideA, const void *B, cudaDataType Btype, int ldb, long long int strideB, const void *beta, void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); + // HIP [Actual] HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, hipblasStride strideA, const void* B, hipDataType bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipDataType cType, int ldc, hipblasStride strideC, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmStridedBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); @@ -1687,16 +1693,19 @@ int main() { // CHECK: hipblasComputeType_t blasComputeType; cublasComputeType_t blasComputeType; + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, const void* B, hipDataType bType, int ldb, const void* beta, void* C, hipDataType cType, int ldc, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipDataType aType, int lda, const void* B[], hipDataType bType, int ldb, const void* beta, void* C[], hipDataType cType, int ldc, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, hipblasStride strideA, const void* B, hipDataType bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipDataType cType, int ldc, hipblasStride strideC, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmStridedBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 6678c6d5..4576ad41 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -284,6 +284,7 @@ int main() { void** voidBarray = nullptr; const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; + const void** const voidCarray_const = const_cast(voidCarray); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result); @@ -1726,6 +1727,9 @@ int main() { // CHECK: hipDataType DataType_2, DataType_3; cudaDataType DataType_2, DataType_3; + // CHECK: hipDataType computeType; + cudaDataType computeType; + // CHECK: hipblasGemmAlgo_t blasGemmAlgo; // CHECK-NEXT: hipblasGemmAlgo_t BLAS_GEMM_DFALT = HIPBLAS_GEMM_DEFAULT; cublasGemmAlgo_t blasGemmAlgo; @@ -1808,9 +1812,7 @@ int main() { #endif #if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000 - // CHECK: hipDataType computeType; - cudaDataType computeType; - + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, const void* B, hipDataType bType, int ldb, const void* beta, void* C, hipDataType cType, int ldc, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); @@ -1828,12 +1830,16 @@ int main() { #endif #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *const Aarray[], cudaDataType Atype, int lda, const void *const Barray[], cudaDataType Btype, int ldb, const void *beta, void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); + // HIP [Actual] HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipDataType aType, int lda, const void* B[], hipDataType bType, int ldb, const void* beta, void* C[], hipDataType cType, int ldc, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, long long int strideA, const void *B, cudaDataType Btype, int ldb, long long int strideB, const void *beta, void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); + // HIP [Actual] HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, hipblasStride strideA, const void* B, hipDataType bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipDataType cType, int ldc, hipblasStride strideC, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmStridedBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); @@ -1880,16 +1886,19 @@ int main() { // CHECK: hipblasComputeType_t blasComputeType; cublasComputeType_t blasComputeType; + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, const void* B, hipDataType bType, int ldb, const void* beta, void* C, hipDataType cType, int ldc, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipDataType aType, int lda, const void* B[], hipDataType bType, int ldb, const void* beta, void* C[], hipDataType cType, int ldc, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx_v2(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipDataType aType, int lda, hipblasStride strideA, const void* B, hipDataType bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipDataType cType, int ldc, hipblasStride strideC, int batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); // CHECK: blasStatus = hipblasGemmStridedBatchedEx_v2(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); @@ -2927,7 +2936,7 @@ int main() { blasStatus = cublasZherk_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &da, &dcomplexA, lda_64, &db, &dcomplexC, ldc_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const float* beta, cuComplex* C, int64_t ldc); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const float* beta, hipComplex* CP, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const float* beta, hipComplex* CP, int64_t ldc); // CHECK: blasStatus = hipblasCherkx_v2_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); blasStatus = cublasCherkx_64(blasHandle, blasFillMode, blasOperation, n_64, k_64, &complexa, &complexA, lda_64, &complexB, ldb_64, &fb, &complexC, ldc_64); @@ -3183,6 +3192,21 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdgmm_v2_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* CP, int64_t ldc); // CHECK: blasStatus = hipblasZdgmm_v2_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); blasStatus = cublasZdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A, cudaDataType Atype, int64_t lda, const void* B, cudaDataType Btype, int64_t ldb, const void* beta, void* C, cudaDataType Ctype, int64_t ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A, hipDataType aType, int64_t lda, const void* B, hipDataType bType, int64_t ldb, const void* beta, void* C, hipDataType cType, int64_t ldc, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmEx_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, Bptr, Btype, ldb_64, bptr, Cptr, Ctype, ldc_64, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmEx_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, Bptr, Btype, ldb_64, bptr, Cptr, Ctype, ldc_64, blasComputeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int64_t lda, const void* const Barray[], cudaDataType Btype, int64_t ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int64_t ldc, int64_t batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A[], hipDataType aType, int64_t lda, const void* B[], hipDataType bType, int64_t ldb, const void* beta, void* C[], hipDataType cType, int64_t ldc, int64_t batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmBatchedEx_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, voidAarray_const, Atype, lda_64, voidBarray_const, Btype, ldb_64, bptr, voidCarray, Ctype, ldc_64, batchCount_64, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, voidAarray_const, Atype, lda_64, voidBarray_const, Btype, ldb_64, bptr, voidCarray, Ctype, ldc_64, batchCount_64, blasComputeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A, cudaDataType Atype, int64_t lda, long long int strideA, const void* B, cudaDataType Btype, int64_t ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int64_t ldc, long long int strideC, int64_t batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A, hipDataType aType, int64_t lda, hipblasStride strideA, const void* B, hipDataType bType, int64_t ldb, hipblasStride strideB, const void* beta, void* C, hipDataType cType, int64_t ldc, hipblasStride strideC, int64_t batchCount, hipblasComputeType_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmStridedBatchedEx_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, strideA, Bptr, Btype, ldb_64, strideB, bptr, Cptr, Ctype, ldc_64, strideC, batchCount_64, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmStridedBatchedEx_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, strideA, Bptr, Btype, ldb_64, strideB, bptr, Cptr, Ctype, ldc_64, strideC, batchCount_64, blasComputeType, blasGemmAlgo); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu index 8ff385ee..4fbcbdbd 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu @@ -13,7 +13,7 @@ #endif int main() { - printf("16. cuBLAS API to hipBLAS API synthetic test\n"); + printf("16. cuBLAS API to rocBLAS API synthetic test\n"); // CHECK: rocblas_operation blasOperation; // CHECK-NEXT: rocblas_operation BLAS_OP_N = rocblas_operation_none; @@ -168,25 +168,25 @@ int main() { #endif */ - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void* x, int incx, void* devicePtr, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_vector(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy); // CHECK: blasStatus = rocblas_set_vector(n, num, image, incx, image_2, incy); blasStatus = cublasSetVector(n, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_vector(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy); // CHECK: blasStatus = rocblas_get_vector(n, num, image, incx, image_2, incy); blasStatus = cublasGetVector(n, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, const void* hostPtr, int incx, void* devicePtr, int incy, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_vector_async(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy, hipStream_t stream); // CHECK: blasStatus = rocblas_set_vector_async(n, num, image, incx, image_2, incy, stream); blasStatus = cublasSetVectorAsync(n, num, image, incx, image_2, incy, stream); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, const void* devicePtr, int incx, void* hostPtr, int incy, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_vector_async(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy, hipStream_t stream); // CHECK: blasStatus = rocblas_get_vector_async(n, num, image, incx, image_2, incy, stream); @@ -195,25 +195,25 @@ int main() { int rows = 0; int cols = 0; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_matrix(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb); // CHECK: blasStatus = rocblas_set_matrix(rows, cols, num, image, incx, image_2, incy); blasStatus = cublasSetMatrix(rows, cols, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_matrix(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb); // CHECK: blasStatus = rocblas_get_matrix(rows, cols, num, image, incx, image_2, incy); blasStatus = cublasGetMatrix(rows, cols, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_matrix_async(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb, hipStream_t stream); // CHECK: blasStatus = rocblas_set_matrix_async(rows, cols, num, image, incx, image_2, incy, stream); blasStatus = cublasSetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_matrix_async(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb, hipStream_t stream); // CHECK: blasStatus = rocblas_get_matrix_async(rows, cols, num, image, incx, image_2, incy, stream); @@ -269,15 +269,16 @@ int main() { void** voidBarray = nullptr; const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; + const void** const voidCarray_const = const_cast(voidCarray); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: float CUBLASWINAPI cublasScnrm2(int n, const cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_snrm2(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_snrm2(blasHandle, n, &fx, incx, &fresult); blasStatus = cublasSnrm2_v2(blasHandle, n, &fx, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: double CUBLASWINAPI cublasDnrm2(int n, const double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dnrm2(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, double* result); @@ -331,308 +332,308 @@ int main() { cuDoubleComplex** dcomplexYarray = 0; const cuDoubleComplex** const dcomplexYarray_const = const_cast(dcomplexYarray); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: float CUBLASWINAPI cublasScnrm2(int n, const cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scnrm2(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_scnrm2(blasHandle, n, &complex, incx, &fresult); blasStatus = cublasScnrm2_v2(blasHandle, n, &complex, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dznrm2(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dznrm2(blasHandle, n, &dcomplex, incx, &dresult); blasStatus = cublasDznrm2_v2(blasHandle, n, &dcomplex, incx, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: float CUBLASWINAPI cublasSdot(int n, const float* x, int incx, const float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sdot(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* result); // CHECK: blasStatus = rocblas_sdot(blasHandle, n, &fx, incx, &fy, incy, &fresult); blasStatus = cublasSdot_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: double CUBLASWINAPI cublasDdot(int n, const double* x, int incx, const double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ddot(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* result); // CHECK: blasStatus = rocblas_ddot(blasHandle, n, &dx, incx, &dy, incy, &dresult); blasStatus = cublasDdot_v2(blasHandle, n, &dx, incx, &dy, incy, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex* x, int incx, const cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdotu(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* result); // CHECK: blasStatus = rocblas_cdotu(blasHandle, n, &complexx, incx, &complexy, incy, &complex); blasStatus = cublasCdotu_v2(blasHandle, n, &complexx, incx, &complexy, incy, &complex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex* x, int incx, const cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdotc(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* result); // CHECK: blasStatus = rocblas_cdotc(blasHandle, n, &complexx, incx, &complexy, incy, &complex); blasStatus = cublasCdotc_v2(blasHandle, n, &complexx, incx, &complexy, incy, &complex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdotu(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* result); // CHECK: blasStatus = rocblas_zdotu(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); blasStatus = cublasZdotu_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdotc(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* result); // CHECK: blasStatus = rocblas_zdotc(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); blasStatus = cublasZdotc_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSscal(int n, float alpha, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, int n, const float* alpha, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sscal(rocblas_handle handle, rocblas_int n, const float* alpha, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_sscal(blasHandle, n, &fy, &fx, incx); blasStatus = cublasSscal_v2(blasHandle, n, &fy, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDscal(int n, double alpha, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle, int n, const double* alpha, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dscal(rocblas_handle handle, rocblas_int n, const double* alpha, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dscal(blasHandle, n, &dx, &dy, incx); blasStatus = cublasDscal_v2(blasHandle, n, &dx, &dy, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, int n, const cuComplex* alpha, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cscal(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* alpha, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_cscal(blasHandle, n, &complexa, &complexx, incx); blasStatus = cublasCscal_v2(blasHandle, n, &complexa, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, int n, const float* alpha, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csscal(rocblas_handle handle, rocblas_int n, const float* alpha, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_csscal(blasHandle, n, &fx, &complexx, incx); blasStatus = cublasCsscal_v2(blasHandle, n, &fx, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zscal(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* alpha, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_zscal(blasHandle, n, &dcomplexa, &dcomplexx, incx); blasStatus = cublasZscal_v2(blasHandle, n, &dcomplexa, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, int n, const double* alpha, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdscal(rocblas_handle handle, rocblas_int n, const double* alpha, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_zdscal(blasHandle, n, &dx, &dcomplexx, incx); blasStatus = cublasZdscal_v2(blasHandle, n, &dx, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float* x, int incx, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2(cublasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_saxpy(rocblas_handle handle, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_saxpy(blasHandle, n, &fa, &fx, incx, &fy, incy); blasStatus = cublasSaxpy_v2(blasHandle, n, &fa, &fx, incx, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double* x, int incx, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2(cublasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_daxpy(rocblas_handle handle, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_daxpy(blasHandle, n, &da, &dx, incx, &dy, incy); blasStatus = cublasDaxpy_v2(blasHandle, n, &da, &dx, incx, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex* x, int incx, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2(cublasHandle_t handle, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_caxpy(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_caxpy(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); blasStatus = cublasCaxpy_v2(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zaxpy(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zaxpy(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); blasStatus = cublasZaxpy_v2(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasScopy(int n, const float* x, int incx, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, const float* x, int incx, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scopy(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_scopy(blasHandle, n, &fx, incx, &fy, incy); blasStatus = cublasScopy_v2(blasHandle, n, &fx, incx, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDcopy(int n, const double* x, int incx, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, const double* x, int incx, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dcopy(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dcopy(blasHandle, n, &dx, incx, &dy, incy); blasStatus = cublasDcopy_v2(blasHandle, n, &dx, incx, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCcopy(int n, const cuComplex* x, int incx, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ccopy(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_ccopy(blasHandle, n, &complexx, incx, &complexy, incy); blasStatus = cublasCcopy_v2(blasHandle, n, &complexx, incx, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zcopy(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zcopy(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); blasStatus = cublasZcopy_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSswap(int n, float* x, int incx, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sswap(rocblas_handle handle, rocblas_int n, float* x, rocblas_int incx, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sswap(blasHandle, n, &fx, incx, &fy, incy); blasStatus = cublasSswap_v2(blasHandle, n, &fx, incx, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDswap(int n, double* x, int incx, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dswap(rocblas_handle handle, rocblas_int n, double* x, rocblas_int incx, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dswap(blasHandle, n, &dx, incx, &dy, incy); blasStatus = cublasDswap_v2(blasHandle, n, &dx, incx, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCswap(int n, cuComplex* x, int incx, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cswap(rocblas_handle handle, rocblas_int n, rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_cswap(blasHandle, n, &complexx, incx, &complexy, incy); blasStatus = cublasCswap_v2(blasHandle, n, &complexx, incx, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zswap(rocblas_handle handle, rocblas_int n, rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zswap(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); blasStatus = cublasZswap_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIsamax(int n, const float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_isamax(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_isamax(blasHandle, n, &fx, incx, &res); blasStatus = cublasIsamax_v2(blasHandle, n, &fx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIdamax(int n, const double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_idamax(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_idamax(blasHandle, n, &dx, incx, &res); blasStatus = cublasIdamax_v2(blasHandle, n, &dx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIcamax(int n, const cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_icamax(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_icamax(blasHandle, n, &complexx, incx, &res); blasStatus = cublasIcamax_v2(blasHandle, n, &complexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_izamax(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_izamax(blasHandle, n, &dcomplexx, incx, &res); blasStatus = cublasIzamax_v2(blasHandle, n, &dcomplexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIsamin(int n, const float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_isamin(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_isamin(blasHandle, n, &fx, incx, &res); blasStatus = cublasIsamin_v2(blasHandle, n, &fx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIdamin(int n, const double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_idamin(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_idamin(blasHandle, n, &dx, incx, &res); blasStatus = cublasIdamin_v2(blasHandle, n, &dx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIcamin(int n, const cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_icamin(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_icamin(blasHandle, n, &complexx, incx, &res); blasStatus = cublasIcamin_v2(blasHandle, n, &complexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_izamin(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_izamin(blasHandle, n, &dcomplexx, incx, &res); blasStatus = cublasIzamin_v2(blasHandle, n, &dcomplexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: float CUBLASWINAPI cublasSasum(int n, const float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sasum(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_sasum(blasHandle, n, &fx, incx, &fresult); blasStatus = cublasSasum_v2(blasHandle, n, &fx, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: double CUBLASWINAPI cublasDasum(int n, const double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dasum(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dasum(blasHandle, n, &dx, incx, &dresult); blasStatus = cublasDasum_v2(blasHandle, n, &dx, incx, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: float CUBLASWINAPI cublasScasum(int n, const cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scasum(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_scasum(blasHandle, n, &complexx, incx, &fresult); blasStatus = cublasScasum_v2(blasHandle, n, &complexx, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dzasum(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dzasum(blasHandle, n, &dcomplexx, incx, &dresult); blasStatus = cublasDzasum_v2(blasHandle, n, &dcomplexx, incx, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSrot(int n, float* x, int incx, float* y, int incy, float sc, float ss); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrot_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_srot(rocblas_handle handle, rocblas_int n, float* x, rocblas_int incx, float* y, rocblas_int incy, const float* c, const float* s); // CHECK: blasStatus = rocblas_srot(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); blasStatus = cublasSrot_v2(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDrot(int n, double* x, int incx, double* y, int incy, double sc, double ss); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrot_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_drot(rocblas_handle handle, rocblas_int n, double* x, rocblas_int incx, double* y, rocblas_int incy, const double* c, const double* s); // CHECK: blasStatus = rocblas_drot(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); blasStatus = cublasDrot_v2(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCrot(int n, cuComplex* x, int incx, cuComplex* y, int incy, float c, cuComplex s); is not supported by HIP // CUDA: CUBLASAPI CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const cuComplex* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_crot(rocblas_handle handle, rocblas_int n, rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy, const float* c, const rocblas_float_complex* s); // CHECK: blasStatus = rocblas_crot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); blasStatus = cublasCrot_v2(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCsrot(int n, cuComplex* x, int incx, cuComplex* y, int incy, float c, float s); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const float* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csrot(rocblas_handle handle, rocblas_int n, rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy, const float* c, const float* s); // CHECK: blasStatus = rocblas_csrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); blasStatus = cublasCsrot_v2(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, double sc, cuDoubleComplex cs); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, const double* c, const cuDoubleComplex* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zrot(rocblas_handle handle, rocblas_int n, rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy, const double* c, const rocblas_double_complex* s); // CHECK: blasStatus = rocblas_zrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); blasStatus = cublasZrot_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, double c, double s); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, const double* c, const double* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdrot(rocblas_handle handle, rocblas_int n, rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy, const double* c, const double* s); @@ -663,14 +664,14 @@ int main() { // CHECK: blasStatus = rocblas_zrotg(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); blasStatus = cublasZrotg_v2(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSrotm(int n, float* x, int incx, float* y, int incy, const float* sparam); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_srotm(rocblas_handle handle, rocblas_int n, float* x, rocblas_int incx, float* y, rocblas_int incy, const float* param); // CHECK: blasStatus = rocblas_srotm(blasHandle, n, &fx, incx, &fy, incy, &fresult); blasStatus = cublasSrotm_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDrotm(int n, double* x, int incx, double* y, int incy, const double* sparam); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_drotm(rocblas_handle handle, rocblas_int n, double* x, rocblas_int incx, double* y, rocblas_int incy, const double* param); @@ -689,498 +690,498 @@ int main() { // CHECK: blasStatus = rocblas_drotmg(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); blasStatus = cublasDrotmg_v2(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha, const float* A, int lda, const float* x, int incx, float beta, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sgemv(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSgemv_v2(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, const double* A, int lda, const double* x, int incx, double beta, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dgemv(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDgemv_v2(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, const double* A, int lda, const double* x, int incx, double beta, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_cgemv(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasCgemv_v2(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex beta, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zgemv(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZgemv_v2(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku, float alpha, const float* A, int lda, const float* x, int incx, float beta, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sgbmv(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku, double alpha, const double* A, int lda, const double* x, int incx, double beta, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dgbmv(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex beta, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_cgbmv(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasCgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex beta, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zgbmv(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n, const float* A, int lda, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_strmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); blasStatus = cublasStrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n, const double* A, int lda, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); blasStatus = cublasDtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); blasStatus = cublasCtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k, const float* A, int lda, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); blasStatus = cublasStbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k, const double* A, int lda, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); blasStatus = cublasDtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); blasStatus = cublasCtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float* AP, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* AP, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const float* A, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); blasStatus = cublasStpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double* AP, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* AP, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const double* A, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); blasStatus = cublasDtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex* AP, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* AP, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_float_complex* A, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); blasStatus = cublasCtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_double_complex* A, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); blasStatus = cublasZtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float* A, int lda, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_strsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); blasStatus = cublasStrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double* A, int lda, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); blasStatus = cublasDtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); blasStatus = cublasCtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float* AP, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* AP, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const float* AP, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); blasStatus = cublasStpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double* AP, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* AP, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const double* AP, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); blasStatus = cublasDtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex* AP, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* AP, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const rocblas_float_complex* AP, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); blasStatus = cublasCtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const rocblas_double_complex* AP, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); blasStatus = cublasZtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k, const float* A, int lda, float* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); blasStatus = cublasStbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k, const double* A, int lda, double* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); blasStatus = cublasDtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); blasStatus = cublasCtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float* A, int lda, const float* x, int incx, float beta, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_ssymv(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSsymv_v2(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double* A, int lda, const double* x, int incx, double beta, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dsymv(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDsymv_v2(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_csymv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasCsymv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zsymv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZsymv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex beta, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chemv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_chemv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasChemv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex beta, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhemv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zhemv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZhemv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha, const float* A, int lda, const float* x, int incx, float beta, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_ssbmv(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSsbmv_v2(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha, const double* A, int lda, const double* x, int incx, double beta, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dsbmv(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDsbmv_v2(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex beta, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_chbmv(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasChbmv_v2(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex beta, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zhbmv(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZhbmv_v2(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float* AP, const float* x, int incx, float beta, float* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* A, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sspmv(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); blasStatus = cublasSspmv_v2(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double* AP, const double* x, int incx, double beta, double* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* A, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dspmv(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); blasStatus = cublasDspmv_v2(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha, const cuComplex* AP, const cuComplex* x, int incx, cuComplex beta, cuComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* AP, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* AP, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_chpmv(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasChpmv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex* AP, const cuDoubleComplex* x, int incx, cuDoubleComplex beta, cuDoubleComplex* y, int incy); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* AP, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* AP, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zhpmv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZhpmv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2(cublasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sger(rocblas_handle handle, rocblas_int m, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* A, rocblas_int lda); // CHECK: blasStatus = rocblas_sger(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); blasStatus = cublasSger_v2(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2(cublasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dger(rocblas_handle handle, rocblas_int m, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* A, rocblas_int lda); // CHECK: blasStatus = rocblas_dger(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); blasStatus = cublasDger_v2(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2(cublasHandle_t handle, int m, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cgeru(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCgeru_v2(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2(cublasHandle_t handle, int m, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgerc(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cgerc(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCgerc_v2(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2(cublasHandle_t handle, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zgeru(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZgeru_v2(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2(cublasHandle_t handle, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgerc(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zgerc(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZgerc_v2(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float* x, int incx, float* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, float* A, rocblas_int lda); // CHECK: blasStatus = rocblas_ssyr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); blasStatus = cublasSsyr_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double* x, int incx, double* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, double* A, rocblas_int lda); // CHECK: blasStatus = rocblas_dsyr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); blasStatus = cublasDsyr_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_csyr(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); blasStatus = cublasCsyr_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zsyr(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); blasStatus = cublasZsyr_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex* x, int incx, cuComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const cuComplex* x, int incx, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cher(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); blasStatus = cublasCher_v2(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZher(char uplo, int n, double alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zher(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); blasStatus = cublasZher_v2(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float* x, int incx, float* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, float* AP); // CHECK: blasStatus = rocblas_sspr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); blasStatus = cublasSspr_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double* x, int incx, double* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, double* AP); // CHECK: blasStatus = rocblas_dspr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); blasStatus = cublasDspr_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex* x, int incx, cuComplex* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const cuComplex* x, int incx, cuComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* AP); // CHECK: blasStatus = rocblas_chpr(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); blasStatus = cublasChpr_v2(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* AP); // CHECK: blasStatus = rocblas_zhpr(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); blasStatus = cublasZhpr_v2(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* A, rocblas_int lda); // CHECK: blasStatus = rocblas_ssyr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); blasStatus = cublasSsyr2_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* A, rocblas_int lda); // CHECK: blasStatus = rocblas_dsyr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); blasStatus = cublasDsyr2_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_csyr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCsyr2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zsyr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZsyr2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cher2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCher2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zher2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZher2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float* x, int incx, const float* y, int incy, float* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* AP); // CHECK: blasStatus = rocblas_sspr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); blasStatus = cublasSspr2_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double* x, int incx, const double* y, int incy, double* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* AP); // CHECK: blasStatus = rocblas_dspr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); blasStatus = cublasDspr2_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* AP); // CHECK: blasStatus = rocblas_chpr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); blasStatus = cublasChpr2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* AP); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* AP); @@ -1190,343 +1191,343 @@ int main() { // CHECK rocblas_operation transa, transb; cublasOperation_t transa, transb; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k, float alpha, const float* A, int lda, const float* B, int ldb, float beta, float* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_sgemm(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); blasStatus = cublasSgemm_v2(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k, double alpha, const double* A, int lda, const double* B, int ldb, double beta, double* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dgemm(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); blasStatus = cublasDgemm_v2(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cgemm(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasCgemm_v2(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zgemm(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZgemm_v2(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); blasStatus = cublasSsyrk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha, const double* A, int lda, double beta, double* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); blasStatus = cublasDsyrk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k, cuComplex alpha, const cuComplex* A, int lda, cuComplex beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); blasStatus = cublasCsyrk_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsyrk_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha, const cuComplex* A, int lda, float beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const cuComplex* A, int lda, const float* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const float* alpha, const rocblas_float_complex* A, rocblas_int lda, const float* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); blasStatus = cublasCherk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha, const cuDoubleComplex* A, int lda, double beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const cuDoubleComplex* A, int lda, const double* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const double* alpha, const rocblas_double_complex* A, rocblas_int lda, const double* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); blasStatus = cublasZherk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, const float* B, int ldb, float beta, float* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); blasStatus = cublasSsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k, double alpha, const double* A, int lda, const double* B, int ldb, double beta, double* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); blasStatus = cublasDsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); blasStatus = cublasCsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); blasStatus = cublasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); blasStatus = cublasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, float beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const float* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); blasStatus = cublasCher2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, double beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const double* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); blasStatus = cublasZher2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const float* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); blasStatus = cublasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const double* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); blasStatus = cublasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha, const float* A, int lda, const float* B, int ldb, float beta, float* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); blasStatus = cublasSsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha, const double* A, int lda, const double* B, int ldb, double beta, double* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); blasStatus = cublasDsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasCsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n, cuComplex alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex beta, cuComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chemm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_chemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasChemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex beta, cuDoubleComplex* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhemm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZhemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, const float* A, int lda, float* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, float* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_strsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); blasStatus = cublasStrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag, int m, int n, double alpha, const double* A, int lda, double* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, double* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_dtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); blasStatus = cublasDtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_ctrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); blasStatus = cublasCtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_ztrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); blasStatus = cublasZtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, int m, int n, float alpha, const float* A, int lda, float* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_strmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); blasStatus = cublasStrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, int m, int n, double alpha, const double* A, int lda, double* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); blasStatus = cublasDtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ctrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); blasStatus = cublasCtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // NOTE: void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ztrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* beta, const float* B, rocblas_int ldb, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_sgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); blasStatus = cublasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* beta, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); blasStatus = cublasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, const cuComplex* B, int ldb, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* beta, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); blasStatus = cublasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* beta, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sdgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const float* A, rocblas_int lda, const float* x, rocblas_int incx, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_sdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); blasStatus = cublasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ddgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const double* A, rocblas_int lda, const double* x, rocblas_int incx, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ddgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); blasStatus = cublasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); blasStatus = cublasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc); @@ -1575,7 +1576,7 @@ int main() { // CHECK: rocblas_half** hyarray = 0; __half** hyarray = 0; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* A, int lda, const __half* B, int ldb, const __half* beta, __half* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_half* alpha, const rocblas_half* A, rocblas_int lda, const rocblas_half* B, rocblas_int ldb, const rocblas_half* beta, rocblas_half* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_hgemm(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); @@ -1626,37 +1627,37 @@ int main() { cublasGemmAlgo_t blasGemmAlgo; cublasGemmAlgo_t BLAS_GEMM_DFALT = CUBLAS_GEMM_DFALT; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, void* result, cudaDataType resultType, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_nrm2_ex(rocblas_handle handle, rocblas_int n, const void* x, rocblas_datatype x_type, rocblas_int incx, void* results, rocblas_datatype result_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_nrm2_ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); blasStatus = cublasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, long long int strideA, const float* B, int ldb, long long int strideB, const float* beta, float* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, rocblas_stride stride_a, const float* B, rocblas_int ldb, rocblas_stride stride_b, const float* beta, float* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_sgemm_strided_batched(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, strideA, &fB, ldb, strideB, &fb, &fC, ldc, strideC, batchCount); blasStatus = cublasSgemmStridedBatched(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, strideA, &fB, ldb, strideB, &fb, &fC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, long long int strideA, const double* B, int ldb, long long int strideB, const double* beta, double* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, rocblas_stride stride_a, const double* B, rocblas_int ldb, rocblas_stride stride_b, const double* beta, double* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dgemm_strided_batched(blasHandle, transa, transb, m, n, k, &da, &dA, lda, strideA, &dB, ldb, strideB, &db, &dC, ldc, strideC, batchCount); blasStatus = cublasDgemmStridedBatched(blasHandle, transa, transb, m, n, k, &da, &dA, lda, strideA, &dB, ldb, strideB, &db, &dC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, long long int strideA, const cuComplex* B, int ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, rocblas_stride stride_a, const rocblas_float_complex* B, rocblas_int ldb, rocblas_stride stride_b, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_cgemm_strided_batched(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, strideA, &complexB, ldb, strideB, &complexb, &complexC, ldc, strideC, batchCount); blasStatus = cublasCgemmStridedBatched(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, strideA, &complexB, ldb, strideB, &complexb, &complexC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, long long int strideA, const cuDoubleComplex* B, int ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, rocblas_stride stride_a, const rocblas_double_complex* B, rocblas_int ldb, rocblas_stride stride_b, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_zgemm_strided_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); blasStatus = cublasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* A, int lda, long long int strideA, const __half* B, int ldb, long long int strideB, const __half* beta, __half* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_half* alpha, const rocblas_half* A, rocblas_int lda, rocblas_stride stride_a, const rocblas_half* B, rocblas_int ldb, rocblas_stride stride_b, const rocblas_half* beta, rocblas_half* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_hgemm_strided_batched(blasHandle, transa, transb, m, n, k, ha, hA, lda, strideA, hB, ldb, strideB, hb, hC, ldc, strideC, batchCount); @@ -1687,25 +1688,25 @@ int main() { cudaDataType CStype; cudaDataType Executiontype; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, void* x, cudaDataType xType, int incx, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scal_ex(rocblas_handle handle, rocblas_int n, const void* alpha, rocblas_datatype alpha_type, void* x, rocblas_datatype x_type, rocblas_int incx, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_scal_ex(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); blasStatus = cublasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, const void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, cudaDataType executiontype); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_axpy_ex(rocblas_handle handle, rocblas_int n, const void* alpha, rocblas_datatype alpha_type, const void* x, rocblas_datatype x_type, rocblas_int incx, void* y, rocblas_datatype y_type, rocblas_int incy, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_axpy_ex(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); blasStatus = cublasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, const void* y, cudaDataType yType, int incy, void* result, cudaDataType resultType, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dot_ex(rocblas_handle handle, rocblas_int n, const void* x, rocblas_datatype x_type, rocblas_int incx, const void* y, rocblas_datatype y_type, rocblas_int incy, void* result, rocblas_datatype result_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_dot_ex(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, image, DataType, Executiontype); blasStatus = cublasDotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, image, DataType, Executiontype); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, const void* y, cudaDataType yType, int incy, void* result, cudaDataType resultType, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dotc_ex(rocblas_handle handle, rocblas_int n, const void* x, rocblas_datatype x_type, rocblas_int incx, const void* y, rocblas_datatype y_type, rocblas_int incy, void* result, rocblas_datatype result_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_dotc_ex(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, image, DataType, Executiontype); @@ -1716,7 +1717,9 @@ int main() { // CHECK: rocblas_datatype computeType; cudaDataType computeType; - // TODO: #1281 + // TODO: [rocBLAS][#1281] + // TODO: [rocBLAS] File a ticket for rocblas_gemm_ex_v2 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); // CHECK: blasStatus = rocblas_gemm_ex(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); @@ -1742,7 +1745,7 @@ int main() { // CHECK: blasStatus = rocblas_set_math_mode(blasHandle, blasMath); blasStatus = cublasSetMathMode(blasHandle, blasMath); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_half* alpha, const rocblas_half* const A[], rocblas_int lda, const rocblas_half* const B[], rocblas_int ldb, const rocblas_half* beta, rocblas_half* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_hgemm_batched(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount); @@ -1750,13 +1753,17 @@ int main() { #endif #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 - // TODO: #1281 - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [rocBLAS][#1281] + // TODO: [rocBLAS] File a ticket for rocblas_gemm_batched_ex_v2 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *const Aarray[], cudaDataType Atype, int lda, const void *const Barray[], cudaDataType Btype, int ldb, const void *beta, void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - // TODO: #1281 + // TODO: [rocBLAS][#1281] + // TODO: [rocBLAS] File a ticket for rocblas_gemm_strided_batched_ex_v2 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_strided_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, rocblas_stride stride_a, const void* b, rocblas_datatype b_type, rocblas_int ldb, rocblas_stride stride_b, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, rocblas_stride stride_c, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_stride stride_d, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); // CHECK: blasStatus = rocblas_gemm_strided_batched_ex(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); @@ -1770,7 +1777,7 @@ int main() { // CHECK: rocblas_fill BLAS_FILL_MODE_FULL = rocblas_fill_full; cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx(cublasHandle_t handle, int n, void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, const void* c, const void* s, cudaDataType csType, cudaDataType executiontype); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_rot_ex(rocblas_handle handle, rocblas_int n, void* x, rocblas_datatype x_type, rocblas_int incx, void* y, rocblas_datatype y_type, rocblas_int incy, const void* c, const void* s, rocblas_datatype cs_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_rot_ex(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype); diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index b301e719..440b662b 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -14,7 +14,7 @@ #endif int main() { - printf("16.v2. cuBLAS API to hipBLAS API synthetic test\n"); + printf("16.v2. cuBLAS API to rocBLAS API synthetic test\n"); // CHECK: rocblas_operation blasOperation; // CHECK-NEXT: rocblas_operation BLAS_OP_N = rocblas_operation_none; @@ -195,25 +195,25 @@ int main() { #endif */ - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void* x, int incx, void* devicePtr, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_vector(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy); // CHECK: blasStatus = rocblas_set_vector(n, num, image, incx, image_2, incy); blasStatus = cublasSetVector(n, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_vector(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy); // CHECK: blasStatus = rocblas_get_vector(n, num, image, incx, image_2, incy); blasStatus = cublasGetVector(n, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, const void* hostPtr, int incx, void* devicePtr, int incy, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_vector_async(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy, hipStream_t stream); // CHECK: blasStatus = rocblas_set_vector_async(n, num, image, incx, image_2, incy, stream); blasStatus = cublasSetVectorAsync(n, num, image, incx, image_2, incy, stream); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, const void* devicePtr, int incx, void* hostPtr, int incy, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_vector_async(rocblas_int n, rocblas_int elem_size, const void* x, rocblas_int incx, void* y, rocblas_int incy, hipStream_t stream); // CHECK: blasStatus = rocblas_get_vector_async(n, num, image, incx, image_2, incy, stream); @@ -222,25 +222,25 @@ int main() { int rows = 0; int cols = 0; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_matrix(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb); // CHECK: blasStatus = rocblas_set_matrix(rows, cols, num, image, incx, image_2, incy); blasStatus = cublasSetMatrix(rows, cols, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_matrix(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb); // CHECK: blasStatus = rocblas_get_matrix(rows, cols, num, image, incx, image_2, incy); blasStatus = cublasGetMatrix(rows, cols, num, image, incx, image_2, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_matrix_async(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb, hipStream_t stream); // CHECK: blasStatus = rocblas_set_matrix_async(rows, cols, num, image, incx, image_2, incy, stream); blasStatus = cublasSetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_matrix_async(rocblas_int rows, rocblas_int cols, rocblas_int elem_size, const void* a, rocblas_int lda, void* b, rocblas_int ldb, hipStream_t stream); // CHECK: blasStatus = rocblas_get_matrix_async(rows, cols, num, image, incx, image_2, incy, stream); @@ -310,8 +310,9 @@ int main() { void** voidBarray = nullptr; const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; + const void** const voidCarray_const = const_cast(voidCarray); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_snrm2(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_snrm2(blasHandle, n, &fx, incx, &fresult); @@ -319,7 +320,7 @@ int main() { blasStatus = cublasSnrm2(blasHandle, n, &fx, incx, &fresult); blasStatus = cublasSnrm2_v2(blasHandle, n, &fx, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dnrm2(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dnrm2(blasHandle, n, &dx, incx, &dresult); @@ -374,7 +375,7 @@ int main() { cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scnrm2(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_scnrm2(blasHandle, n, &complex, incx, &fresult); @@ -382,7 +383,7 @@ int main() { blasStatus = cublasScnrm2(blasHandle, n, &complex, incx, &fresult); blasStatus = cublasScnrm2_v2(blasHandle, n, &complex, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dznrm2(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dznrm2(blasHandle, n, &dcomplex, incx, &dresult); @@ -390,7 +391,7 @@ int main() { blasStatus = cublasDznrm2(blasHandle, n, &dcomplex, incx, &dresult); blasStatus = cublasDznrm2_v2(blasHandle, n, &dcomplex, incx, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sdot(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* result); // CHECK: blasStatus = rocblas_sdot(blasHandle, n, &fx, incx, &fy, incy, &fresult); @@ -398,7 +399,7 @@ int main() { blasStatus = cublasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult); blasStatus = cublasSdot_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ddot(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* result); // CHECK: blasStatus = rocblas_ddot(blasHandle, n, &dx, incx, &dy, incy, &dresult); @@ -406,7 +407,7 @@ int main() { blasStatus = cublasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult); blasStatus = cublasDdot_v2(blasHandle, n, &dx, incx, &dy, incy, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdotu(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* result); // CHECK: blasStatus = rocblas_cdotu(blasHandle, n, &complexx, incx, &complexy, incy, &complex); @@ -414,7 +415,7 @@ int main() { blasStatus = cublasCdotu(blasHandle, n, &complexx, incx, &complexy, incy, &complex); blasStatus = cublasCdotu_v2(blasHandle, n, &complexx, incx, &complexy, incy, &complex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdotc(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* result); // CHECK: blasStatus = rocblas_cdotc(blasHandle, n, &complexx, incx, &complexy, incy, &complex); @@ -422,7 +423,7 @@ int main() { blasStatus = cublasCdotc(blasHandle, n, &complexx, incx, &complexy, incy, &complex); blasStatus = cublasCdotc_v2(blasHandle, n, &complexx, incx, &complexy, incy, &complex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdotu(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* result); // CHECK: blasStatus = rocblas_zdotu(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); @@ -430,7 +431,7 @@ int main() { blasStatus = cublasZdotu(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); blasStatus = cublasZdotu_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdotc(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* result); // CHECK: blasStatus = rocblas_zdotc(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); @@ -438,7 +439,7 @@ int main() { blasStatus = cublasZdotc(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); blasStatus = cublasZdotc_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, int n, const float* alpha, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sscal(rocblas_handle handle, rocblas_int n, const float* alpha, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_sscal(blasHandle, n, &fy, &fx, incx); @@ -446,7 +447,7 @@ int main() { blasStatus = cublasSscal(blasHandle, n, &fy, &fx, incx); blasStatus = cublasSscal_v2(blasHandle, n, &fy, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle, int n, const double* alpha, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dscal(rocblas_handle handle, rocblas_int n, const double* alpha, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dscal(blasHandle, n, &dx, &dy, incx); @@ -454,7 +455,7 @@ int main() { blasStatus = cublasDscal(blasHandle, n, &dx, &dy, incx); blasStatus = cublasDscal_v2(blasHandle, n, &dx, &dy, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, int n, const cuComplex* alpha, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cscal(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* alpha, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_cscal(blasHandle, n, &complexa, &complexx, incx); @@ -462,7 +463,7 @@ int main() { blasStatus = cublasCscal(blasHandle, n, &complexa, &complexx, incx); blasStatus = cublasCscal_v2(blasHandle, n, &complexa, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, int n, const float* alpha, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csscal(rocblas_handle handle, rocblas_int n, const float* alpha, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_csscal(blasHandle, n, &fx, &complexx, incx); @@ -470,7 +471,7 @@ int main() { blasStatus = cublasCsscal(blasHandle, n, &fx, &complexx, incx); blasStatus = cublasCsscal_v2(blasHandle, n, &fx, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zscal(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* alpha, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_zscal(blasHandle, n, &dcomplexa, &dcomplexx, incx); @@ -478,7 +479,7 @@ int main() { blasStatus = cublasZscal(blasHandle, n, &dcomplexa, &dcomplexx, incx); blasStatus = cublasZscal_v2(blasHandle, n, &dcomplexa, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, int n, const double* alpha, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdscal(rocblas_handle handle, rocblas_int n, const double* alpha, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_zdscal(blasHandle, n, &dx, &dcomplexx, incx); @@ -486,7 +487,7 @@ int main() { blasStatus = cublasZdscal(blasHandle, n, &dx, &dcomplexx, incx); blasStatus = cublasZdscal_v2(blasHandle, n, &dx, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2(cublasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_saxpy(rocblas_handle handle, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_saxpy(blasHandle, n, &fa, &fx, incx, &fy, incy); @@ -494,7 +495,7 @@ int main() { blasStatus = cublasSaxpy(blasHandle, n, &fa, &fx, incx, &fy, incy); blasStatus = cublasSaxpy_v2(blasHandle, n, &fa, &fx, incx, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2(cublasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_daxpy(rocblas_handle handle, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_daxpy(blasHandle, n, &da, &dx, incx, &dy, incy); @@ -502,7 +503,7 @@ int main() { blasStatus = cublasDaxpy(blasHandle, n, &da, &dx, incx, &dy, incy); blasStatus = cublasDaxpy_v2(blasHandle, n, &da, &dx, incx, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2(cublasHandle_t handle, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_caxpy(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_caxpy(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); @@ -510,7 +511,7 @@ int main() { blasStatus = cublasCaxpy(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); blasStatus = cublasCaxpy_v2(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zaxpy(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zaxpy(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); @@ -518,7 +519,7 @@ int main() { blasStatus = cublasZaxpy(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); blasStatus = cublasZaxpy_v2(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, const float* x, int incx, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scopy(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_scopy(blasHandle, n, &fx, incx, &fy, incy); @@ -526,7 +527,7 @@ int main() { blasStatus = cublasScopy(blasHandle, n, &fx, incx, &fy, incy); blasStatus = cublasScopy_v2(blasHandle, n, &fx, incx, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, const double* x, int incx, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dcopy(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dcopy(blasHandle, n, &dx, incx, &dy, incy); @@ -534,7 +535,7 @@ int main() { blasStatus = cublasDcopy(blasHandle, n, &dx, incx, &dy, incy); blasStatus = cublasDcopy_v2(blasHandle, n, &dx, incx, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ccopy(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_ccopy(blasHandle, n, &complexx, incx, &complexy, incy); @@ -542,7 +543,7 @@ int main() { blasStatus = cublasCcopy(blasHandle, n, &complexx, incx, &complexy, incy); blasStatus = cublasCcopy_v2(blasHandle, n, &complexx, incx, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zcopy(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zcopy(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); @@ -550,7 +551,7 @@ int main() { blasStatus = cublasZcopy(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); blasStatus = cublasZcopy_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sswap(rocblas_handle handle, rocblas_int n, float* x, rocblas_int incx, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sswap(blasHandle, n, &fx, incx, &fy, incy); @@ -558,7 +559,7 @@ int main() { blasStatus = cublasSswap(blasHandle, n, &fx, incx, &fy, incy); blasStatus = cublasSswap_v2(blasHandle, n, &fx, incx, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dswap(rocblas_handle handle, rocblas_int n, double* x, rocblas_int incx, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dswap(blasHandle, n, &dx, incx, &dy, incy); @@ -566,7 +567,7 @@ int main() { blasStatus = cublasDswap(blasHandle, n, &dx, incx, &dy, incy); blasStatus = cublasDswap_v2(blasHandle, n, &dx, incx, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cswap(rocblas_handle handle, rocblas_int n, rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_cswap(blasHandle, n, &complexx, incx, &complexy, incy); @@ -574,7 +575,7 @@ int main() { blasStatus = cublasCswap(blasHandle, n, &complexx, incx, &complexy, incy); blasStatus = cublasCswap_v2(blasHandle, n, &complexx, incx, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zswap(rocblas_handle handle, rocblas_int n, rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zswap(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); @@ -582,7 +583,7 @@ int main() { blasStatus = cublasZswap(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); blasStatus = cublasZswap_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_isamax(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_isamax(blasHandle, n, &fx, incx, &res); @@ -590,7 +591,7 @@ int main() { blasStatus = cublasIsamax(blasHandle, n, &fx, incx, &res); blasStatus = cublasIsamax_v2(blasHandle, n, &fx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_idamax(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_idamax(blasHandle, n, &dx, incx, &res); @@ -598,7 +599,7 @@ int main() { blasStatus = cublasIdamax(blasHandle, n, &dx, incx, &res); blasStatus = cublasIdamax_v2(blasHandle, n, &dx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_icamax(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_icamax(blasHandle, n, &complexx, incx, &res); @@ -606,7 +607,7 @@ int main() { blasStatus = cublasIcamax(blasHandle, n, &complexx, incx, &res); blasStatus = cublasIcamax_v2(blasHandle, n, &complexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_izamax(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_izamax(blasHandle, n, &dcomplexx, incx, &res); @@ -614,7 +615,7 @@ int main() { blasStatus = cublasIzamax(blasHandle, n, &dcomplexx, incx, &res); blasStatus = cublasIzamax_v2(blasHandle, n, &dcomplexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_isamin(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_isamin(blasHandle, n, &fx, incx, &res); @@ -622,7 +623,7 @@ int main() { blasStatus = cublasIsamin(blasHandle, n, &fx, incx, &res); blasStatus = cublasIsamin_v2(blasHandle, n, &fx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_idamin(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_idamin(blasHandle, n, &dx, incx, &res); @@ -630,7 +631,7 @@ int main() { blasStatus = cublasIdamin(blasHandle, n, &dx, incx, &res); blasStatus = cublasIdamin_v2(blasHandle, n, &dx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_icamin(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_icamin(blasHandle, n, &complexx, incx, &res); @@ -638,7 +639,7 @@ int main() { blasStatus = cublasIcamin(blasHandle, n, &complexx, incx, &res); blasStatus = cublasIcamin_v2(blasHandle, n, &complexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_izamin(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, rocblas_int* result); // CHECK: blasStatus = rocblas_izamin(blasHandle, n, &dcomplexx, incx, &res); @@ -646,7 +647,7 @@ int main() { blasStatus = cublasIzamin(blasHandle, n, &dcomplexx, incx, &res); blasStatus = cublasIzamin_v2(blasHandle, n, &dcomplexx, incx, &res); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sasum(rocblas_handle handle, rocblas_int n, const float* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_sasum(blasHandle, n, &fx, incx, &fresult); @@ -654,7 +655,7 @@ int main() { blasStatus = cublasSasum(blasHandle, n, &fx, incx, &fresult); blasStatus = cublasSasum_v2(blasHandle, n, &fx, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dasum(rocblas_handle handle, rocblas_int n, const double* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dasum(blasHandle, n, &dx, incx, &dresult); @@ -662,7 +663,7 @@ int main() { blasStatus = cublasDasum(blasHandle, n, &dx, incx, &dresult); blasStatus = cublasDasum_v2(blasHandle, n, &dx, incx, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scasum(rocblas_handle handle, rocblas_int n, const rocblas_float_complex* x, rocblas_int incx, float* result); // CHECK: blasStatus = rocblas_scasum(blasHandle, n, &complexx, incx, &fresult); @@ -670,7 +671,7 @@ int main() { blasStatus = cublasScasum(blasHandle, n, &complexx, incx, &fresult); blasStatus = cublasScasum_v2(blasHandle, n, &complexx, incx, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dzasum(rocblas_handle handle, rocblas_int n, const rocblas_double_complex* x, rocblas_int incx, double* result); // CHECK: blasStatus = rocblas_dzasum(blasHandle, n, &dcomplexx, incx, &dresult); @@ -678,7 +679,7 @@ int main() { blasStatus = cublasDzasum(blasHandle, n, &dcomplexx, incx, &dresult); blasStatus = cublasDzasum_v2(blasHandle, n, &dcomplexx, incx, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrot_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_srot(rocblas_handle handle, rocblas_int n, float* x, rocblas_int incx, float* y, rocblas_int incy, const float* c, const float* s); // CHECK: blasStatus = rocblas_srot(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); @@ -686,7 +687,7 @@ int main() { blasStatus = cublasSrot(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); blasStatus = cublasSrot_v2(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrot_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_drot(rocblas_handle handle, rocblas_int n, double* x, rocblas_int incx, double* y, rocblas_int incy, const double* c, const double* s); // CHECK: blasStatus = rocblas_drot(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); @@ -694,7 +695,7 @@ int main() { blasStatus = cublasDrot(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); blasStatus = cublasDrot_v2(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const cuComplex* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_crot(rocblas_handle handle, rocblas_int n, rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy, const float* c, const rocblas_float_complex* s); // CHECK: blasStatus = rocblas_crot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); @@ -702,7 +703,7 @@ int main() { blasStatus = cublasCrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); blasStatus = cublasCrot_v2(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const float* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csrot(rocblas_handle handle, rocblas_int n, rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* y, rocblas_int incy, const float* c, const float* s); // CHECK: blasStatus = rocblas_csrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); @@ -710,7 +711,7 @@ int main() { blasStatus = cublasCsrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); blasStatus = cublasCsrot_v2(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, const double* c, const cuDoubleComplex* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zrot(rocblas_handle handle, rocblas_int n, rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy, const double* c, const rocblas_double_complex* s); // CHECK: blasStatus = rocblas_zrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); @@ -718,7 +719,7 @@ int main() { blasStatus = cublasZrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); blasStatus = cublasZrot_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, const double* c, const double* s); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdrot(rocblas_handle handle, rocblas_int n, rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* y, rocblas_int incy, const double* c, const double* s); // CHECK: blasStatus = rocblas_zdrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &ds); @@ -754,7 +755,7 @@ int main() { blasStatus = cublasZrotg(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); blasStatus = cublasZrotg_v2(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_srotm(rocblas_handle handle, rocblas_int n, float* x, rocblas_int incx, float* y, rocblas_int incy, const float* param); // CHECK: blasStatus = rocblas_srotm(blasHandle, n, &fx, incx, &fy, incy, &fresult); @@ -762,7 +763,7 @@ int main() { blasStatus = cublasSrotm(blasHandle, n, &fx, incx, &fy, incy, &fresult); blasStatus = cublasSrotm_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_drotm(rocblas_handle handle, rocblas_int n, double* x, rocblas_int incx, double* y, rocblas_int incy, const double* param); // CHECK: blasStatus = rocblas_drotm(blasHandle, n, &dx, incx, &dy, incy, &dresult); @@ -784,7 +785,7 @@ int main() { blasStatus = cublasDrotmg(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); blasStatus = cublasDrotmg_v2(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sgemv(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); @@ -792,7 +793,7 @@ int main() { blasStatus = cublasSgemv(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSgemv_v2(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dgemv(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); @@ -800,7 +801,7 @@ int main() { blasStatus = cublasDgemv(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDgemv_v2(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_cgemv(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); @@ -808,7 +809,7 @@ int main() { blasStatus = cublasCgemv(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasCgemv_v2(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zgemv(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); @@ -816,7 +817,7 @@ int main() { blasStatus = cublasZgemv(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZgemv_v2(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sgbmv(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); @@ -824,7 +825,7 @@ int main() { blasStatus = cublasSgbmv(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dgbmv(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); @@ -832,7 +833,7 @@ int main() { blasStatus = cublasDgbmv(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_cgbmv(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); @@ -840,7 +841,7 @@ int main() { blasStatus = cublasCgbmv(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasCgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgbmv(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, rocblas_int kl, rocblas_int ku, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zgbmv(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); @@ -848,7 +849,7 @@ int main() { blasStatus = cublasZgbmv(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_strmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); @@ -856,7 +857,7 @@ int main() { blasStatus = cublasStrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); blasStatus = cublasStrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); @@ -864,7 +865,7 @@ int main() { blasStatus = cublasDtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); blasStatus = cublasDtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); @@ -872,7 +873,7 @@ int main() { blasStatus = cublasCtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); blasStatus = cublasCtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); @@ -880,7 +881,7 @@ int main() { blasStatus = cublasZtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); @@ -888,7 +889,7 @@ int main() { blasStatus = cublasStbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); blasStatus = cublasStbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); @@ -896,7 +897,7 @@ int main() { blasStatus = cublasDtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); blasStatus = cublasDtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); @@ -904,7 +905,7 @@ int main() { blasStatus = cublasCtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); blasStatus = cublasCtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, rocblas_int m, rocblas_int k, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); @@ -912,7 +913,7 @@ int main() { blasStatus = cublasZtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* AP, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const float* A, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); @@ -920,7 +921,7 @@ int main() { blasStatus = cublasStpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); blasStatus = cublasStpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* AP, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const double* A, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); @@ -928,7 +929,7 @@ int main() { blasStatus = cublasDtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); blasStatus = cublasDtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* AP, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_float_complex* A, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); @@ -936,7 +937,7 @@ int main() { blasStatus = cublasCtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); blasStatus = cublasCtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_double_complex* A, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); @@ -944,7 +945,7 @@ int main() { blasStatus = cublasZtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); blasStatus = cublasZtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_strsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); @@ -952,7 +953,7 @@ int main() { blasStatus = cublasStrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); blasStatus = cublasStrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); @@ -960,7 +961,7 @@ int main() { blasStatus = cublasDtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); blasStatus = cublasDtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); @@ -968,7 +969,7 @@ int main() { blasStatus = cublasCtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); blasStatus = cublasCtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); @@ -976,7 +977,7 @@ int main() { blasStatus = cublasZtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* AP, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const float* AP, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); @@ -984,7 +985,7 @@ int main() { blasStatus = cublasStpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); blasStatus = cublasStpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* AP, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const double* AP, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); @@ -992,7 +993,7 @@ int main() { blasStatus = cublasDtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); blasStatus = cublasDtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* AP, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const rocblas_float_complex* AP, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); @@ -1000,7 +1001,7 @@ int main() { blasStatus = cublasCtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); blasStatus = cublasCtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztpsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, const rocblas_double_complex* AP, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); @@ -1008,7 +1009,7 @@ int main() { blasStatus = cublasZtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); blasStatus = cublasZtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const float* A, rocblas_int lda, float* x, rocblas_int incx); // CHECK: blasStatus = rocblas_stbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); @@ -1016,7 +1017,7 @@ int main() { blasStatus = cublasStbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); blasStatus = cublasStbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const double* A, rocblas_int lda, double* x, rocblas_int incx); // CHECK: blasStatus = rocblas_dtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); @@ -1024,7 +1025,7 @@ int main() { blasStatus = cublasDtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); blasStatus = cublasDtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ctbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); @@ -1032,7 +1033,7 @@ int main() { blasStatus = cublasCtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); blasStatus = cublasCtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztbsv(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int n, rocblas_int k, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* x, rocblas_int incx); // CHECK: blasStatus = rocblas_ztbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); @@ -1040,7 +1041,7 @@ int main() { blasStatus = cublasZtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); blasStatus = cublasZtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_ssymv(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); @@ -1048,7 +1049,7 @@ int main() { blasStatus = cublasSsymv(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSsymv_v2(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dsymv(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); @@ -1056,7 +1057,7 @@ int main() { blasStatus = cublasDsymv(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDsymv_v2(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_csymv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); @@ -1064,7 +1065,7 @@ int main() { blasStatus = cublasCsymv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasCsymv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsymv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zsymv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); @@ -1072,7 +1073,7 @@ int main() { blasStatus = cublasZsymv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZsymv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chemv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_chemv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); @@ -1080,7 +1081,7 @@ int main() { blasStatus = cublasChemv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasChemv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhemv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zhemv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); @@ -1088,7 +1089,7 @@ int main() { blasStatus = cublasZhemv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZhemv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_ssbmv(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); @@ -1096,7 +1097,7 @@ int main() { blasStatus = cublasSsbmv(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); blasStatus = cublasSsbmv_v2(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dsbmv(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); @@ -1104,7 +1105,7 @@ int main() { blasStatus = cublasDsbmv(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); blasStatus = cublasDsbmv_v2(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_chbmv(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); @@ -1112,7 +1113,7 @@ int main() { blasStatus = cublasChbmv(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasChbmv_v2(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhbmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zhbmv(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); @@ -1120,7 +1121,7 @@ int main() { blasStatus = cublasZhbmv(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZhbmv_v2(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* A, const float* x, rocblas_int incx, const float* beta, float* y, rocblas_int incy); // CHECK: blasStatus = rocblas_sspmv(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); @@ -1128,7 +1129,7 @@ int main() { blasStatus = cublasSspmv(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); blasStatus = cublasSspmv_v2(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* A, const double* x, rocblas_int incx, const double* beta, double* y, rocblas_int incy); // CHECK: blasStatus = rocblas_dspmv(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); @@ -1136,7 +1137,7 @@ int main() { blasStatus = cublasDspmv(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); blasStatus = cublasDspmv_v2(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* AP, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* AP, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_chpmv(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); @@ -1144,7 +1145,7 @@ int main() { blasStatus = cublasChpmv(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); blasStatus = cublasChpmv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* AP, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpmv(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* AP, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy); // CHECK: blasStatus = rocblas_zhpmv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); @@ -1152,7 +1153,7 @@ int main() { blasStatus = cublasZhpmv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); blasStatus = cublasZhpmv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2(cublasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sger(rocblas_handle handle, rocblas_int m, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* A, rocblas_int lda); // CHECK: blasStatus = rocblas_sger(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); @@ -1160,7 +1161,7 @@ int main() { blasStatus = cublasSger(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); blasStatus = cublasSger_v2(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2(cublasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dger(rocblas_handle handle, rocblas_int m, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* A, rocblas_int lda); // CHECK: blasStatus = rocblas_dger(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); @@ -1168,7 +1169,7 @@ int main() { blasStatus = cublasDger(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); blasStatus = cublasDger_v2(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2(cublasHandle_t handle, int m, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cgeru(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); @@ -1176,7 +1177,7 @@ int main() { blasStatus = cublasCgeru(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCgeru_v2(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2(cublasHandle_t handle, int m, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgerc(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cgerc(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); @@ -1184,7 +1185,7 @@ int main() { blasStatus = cublasCgerc(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCgerc_v2(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2(cublasHandle_t handle, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeru(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zgeru(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); @@ -1192,7 +1193,7 @@ int main() { blasStatus = cublasZgeru(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZgeru_v2(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2(cublasHandle_t handle, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgerc(rocblas_handle handle, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zgerc(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); @@ -1200,7 +1201,7 @@ int main() { blasStatus = cublasZgerc(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZgerc_v2(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, float* A, rocblas_int lda); // CHECK: blasStatus = rocblas_ssyr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); @@ -1208,7 +1209,7 @@ int main() { blasStatus = cublasSsyr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); blasStatus = cublasSsyr_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, double* A, rocblas_int lda); // CHECK: blasStatus = rocblas_dsyr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); @@ -1216,7 +1217,7 @@ int main() { blasStatus = cublasDsyr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); blasStatus = cublasDsyr_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_csyr(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); @@ -1224,7 +1225,7 @@ int main() { blasStatus = cublasCsyr(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); blasStatus = cublasCsyr_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zsyr(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); @@ -1232,7 +1233,7 @@ int main() { blasStatus = cublasZsyr(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); blasStatus = cublasZsyr_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const cuComplex* x, int incx, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cher(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); @@ -1240,7 +1241,7 @@ int main() { blasStatus = cublasCher(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); blasStatus = cublasCher_v2(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zher(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); @@ -1248,7 +1249,7 @@ int main() { blasStatus = cublasZher(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); blasStatus = cublasZher_v2(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, float* AP); // CHECK: blasStatus = rocblas_sspr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); @@ -1256,7 +1257,7 @@ int main() { blasStatus = cublasSspr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); blasStatus = cublasSspr_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, double* AP); // CHECK: blasStatus = rocblas_dspr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); @@ -1264,7 +1265,7 @@ int main() { blasStatus = cublasDspr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); blasStatus = cublasDspr_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const cuComplex* x, int incx, cuComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* AP); // CHECK: blasStatus = rocblas_chpr(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); @@ -1272,7 +1273,7 @@ int main() { blasStatus = cublasChpr(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); blasStatus = cublasChpr_v2(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpr(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* AP); // CHECK: blasStatus = rocblas_zhpr(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); @@ -1280,7 +1281,7 @@ int main() { blasStatus = cublasZhpr(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); blasStatus = cublasZhpr_v2(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* A, rocblas_int lda); // CHECK: blasStatus = rocblas_ssyr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); @@ -1288,7 +1289,7 @@ int main() { blasStatus = cublasSsyr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); blasStatus = cublasSsyr2_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* A, rocblas_int lda); // CHECK: blasStatus = rocblas_dsyr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); @@ -1296,7 +1297,7 @@ int main() { blasStatus = cublasDsyr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); blasStatus = cublasDsyr2_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_csyr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); @@ -1304,7 +1305,7 @@ int main() { blasStatus = cublasCsyr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCsyr2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zsyr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); @@ -1312,7 +1313,7 @@ int main() { blasStatus = cublasZsyr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZsyr2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_cher2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); @@ -1320,7 +1321,7 @@ int main() { blasStatus = cublasCher2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); blasStatus = cublasCher2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* A, rocblas_int lda); // CHECK: blasStatus = rocblas_zher2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); @@ -1328,7 +1329,7 @@ int main() { blasStatus = cublasZher2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); blasStatus = cublasZher2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const float* alpha, const float* x, rocblas_int incx, const float* y, rocblas_int incy, float* AP); // CHECK: blasStatus = rocblas_sspr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); @@ -1336,7 +1337,7 @@ int main() { blasStatus = cublasSspr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); blasStatus = cublasSspr2_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const double* alpha, const double* x, rocblas_int incx, const double* y, rocblas_int incy, double* AP); // CHECK: blasStatus = rocblas_dspr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); @@ -1344,7 +1345,7 @@ int main() { blasStatus = cublasDspr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); blasStatus = cublasDspr2_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, rocblas_int incx, const rocblas_float_complex* y, rocblas_int incy, rocblas_float_complex* AP); // CHECK: blasStatus = rocblas_chpr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); @@ -1352,7 +1353,7 @@ int main() { blasStatus = cublasChpr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); blasStatus = cublasChpr2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* AP); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpr2(rocblas_handle handle, rocblas_fill uplo, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, rocblas_int incx, const rocblas_double_complex* y, rocblas_int incy, rocblas_double_complex* AP); // CHECK: blasStatus = rocblas_zhpr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA); @@ -1363,7 +1364,7 @@ int main() { // CHECK rocblas_operation transa, transb; cublasOperation_t transa, transb; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_sgemm(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); @@ -1371,7 +1372,7 @@ int main() { blasStatus = cublasSgemm(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); blasStatus = cublasSgemm_v2(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dgemm(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); @@ -1379,7 +1380,7 @@ int main() { blasStatus = cublasDgemm(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); blasStatus = cublasDgemm_v2(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cgemm(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); @@ -1387,7 +1388,7 @@ int main() { blasStatus = cublasCgemm(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasCgemm_v2(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zgemm(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); @@ -1395,31 +1396,31 @@ int main() { blasStatus = cublasZgemm(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZgemm_v2(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); @@ -1427,7 +1428,7 @@ int main() { blasStatus = cublasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); blasStatus = cublasSsyrk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); @@ -1435,7 +1436,7 @@ int main() { blasStatus = cublasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); blasStatus = cublasDsyrk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); @@ -1443,7 +1444,7 @@ int main() { blasStatus = cublasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); blasStatus = cublasCsyrk_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); @@ -1451,7 +1452,7 @@ int main() { blasStatus = cublasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsyrk_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const cuComplex* A, int lda, const float* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const float* alpha, const rocblas_float_complex* A, rocblas_int lda, const float* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); @@ -1459,7 +1460,7 @@ int main() { blasStatus = cublasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); blasStatus = cublasCherk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const cuDoubleComplex* A, int lda, const double* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherk(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_int n, rocblas_int k, const double* alpha, const rocblas_double_complex* A, rocblas_int lda, const double* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); @@ -1467,7 +1468,7 @@ int main() { blasStatus = cublasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); blasStatus = cublasZherk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); @@ -1475,7 +1476,7 @@ int main() { blasStatus = cublasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); blasStatus = cublasSsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); @@ -1483,7 +1484,7 @@ int main() { blasStatus = cublasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); blasStatus = cublasDsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); @@ -1491,7 +1492,7 @@ int main() { blasStatus = cublasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); blasStatus = cublasCsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); @@ -1499,31 +1500,31 @@ int main() { blasStatus = cublasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); blasStatus = cublasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); blasStatus = cublasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyrkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const float* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); @@ -1531,7 +1532,7 @@ int main() { blasStatus = cublasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); blasStatus = cublasCher2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher2k(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const double* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); @@ -1539,19 +1540,19 @@ int main() { blasStatus = cublasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); blasStatus = cublasZher2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cherkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const float* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); blasStatus = cublasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zherkx(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const double* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); blasStatus = cublasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, const float* beta, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ssymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); @@ -1559,7 +1560,7 @@ int main() { blasStatus = cublasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); blasStatus = cublasSsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, const double* beta, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); @@ -1567,7 +1568,7 @@ int main() { blasStatus = cublasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); blasStatus = cublasDsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_csymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); @@ -1575,7 +1576,7 @@ int main() { blasStatus = cublasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasCsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsymm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); @@ -1583,7 +1584,7 @@ int main() { blasStatus = cublasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chemm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_chemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); @@ -1591,7 +1592,7 @@ int main() { blasStatus = cublasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); blasStatus = cublasChemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhemm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); @@ -1599,7 +1600,7 @@ int main() { blasStatus = cublasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); blasStatus = cublasZhemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, float* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_strsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); @@ -1607,7 +1608,7 @@ int main() { blasStatus = cublasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); blasStatus = cublasStrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, double* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_dtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); @@ -1615,7 +1616,7 @@ int main() { blasStatus = cublasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); blasStatus = cublasDtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, rocblas_float_complex* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_ctrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); @@ -1623,7 +1624,7 @@ int main() { blasStatus = cublasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); blasStatus = cublasCtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, rocblas_double_complex* B, rocblas_int ldb); // CHECK: blasStatus = rocblas_ztrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); @@ -1631,7 +1632,7 @@ int main() { blasStatus = cublasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); blasStatus = cublasZtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* B, rocblas_int ldb, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_strmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); @@ -1639,7 +1640,7 @@ int main() { blasStatus = cublasStrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); blasStatus = cublasStrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); @@ -1647,7 +1648,7 @@ int main() { blasStatus = cublasDtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); blasStatus = cublasDtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ctrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); @@ -1655,7 +1656,7 @@ int main() { blasStatus = cublasCtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); blasStatus = cublasCtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmm(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ztrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); @@ -1663,73 +1664,73 @@ int main() { blasStatus = cublasZtrmm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZtrmm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, const float* beta, const float* B, rocblas_int ldb, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_sgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); blasStatus = cublasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, const double* beta, const double* B, rocblas_int ldb, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_dgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); blasStatus = cublasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, const cuComplex* B, int ldb, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* beta, const rocblas_float_complex* B, rocblas_int ldb, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); blasStatus = cublasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgeam(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* beta, const rocblas_double_complex* B, rocblas_int ldb, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count); // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sdgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const float* A, rocblas_int lda, const float* x, rocblas_int incx, float* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_sdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); blasStatus = cublasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ddgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const double* A, rocblas_int lda, const double* x, rocblas_int incx, double* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_ddgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); blasStatus = cublasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cdgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const rocblas_float_complex* A, rocblas_int lda, const rocblas_float_complex* x, rocblas_int incx, rocblas_float_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_cdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); blasStatus = cublasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdgmm(rocblas_handle handle, rocblas_side side, rocblas_int m, rocblas_int n, const rocblas_double_complex* A, rocblas_int lda, const rocblas_double_complex* x, rocblas_int incx, rocblas_double_complex* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_zdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc); @@ -1778,7 +1779,7 @@ int main() { // CHECK: rocblas_half** hyarray = 0; __half** hyarray = 0; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* A, int lda, const __half* B, int ldb, const __half* beta, __half* C, int ldc); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_half* alpha, const rocblas_half* A, rocblas_int lda, const rocblas_half* B, rocblas_int ldb, const rocblas_half* beta, rocblas_half* C, rocblas_int ldc); // CHECK: blasStatus = rocblas_hgemm(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); @@ -1821,6 +1822,9 @@ int main() { cublasDataType_t R_32U = CUDA_R_32U; cublasDataType_t C_32U = CUDA_C_32U; + // CHECK: rocblas_datatype computeType; + cudaDataType computeType; + // CHECK: rocblas_datatype DataType_2, DataType_3, alpha_type, cs_type, x_type, y_type, execution_type, result_type; cudaDataType DataType_2, DataType_3, alpha_type, cs_type, x_type, y_type, execution_type, result_type; @@ -1829,37 +1833,37 @@ int main() { cublasGemmAlgo_t blasGemmAlgo; cublasGemmAlgo_t BLAS_GEMM_DFALT = CUBLAS_GEMM_DFALT; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, void* result, cudaDataType resultType, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_nrm2_ex(rocblas_handle handle, rocblas_int n, const void* x, rocblas_datatype x_type, rocblas_int incx, void* results, rocblas_datatype result_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_nrm2_ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); blasStatus = cublasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, long long int strideA, const float* B, int ldb, long long int strideB, const float* beta, float* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* A, rocblas_int lda, rocblas_stride stride_a, const float* B, rocblas_int ldb, rocblas_stride stride_b, const float* beta, float* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_sgemm_strided_batched(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, strideA, &fB, ldb, strideB, &fb, &fC, ldc, strideC, batchCount); blasStatus = cublasSgemmStridedBatched(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, strideA, &fB, ldb, strideB, &fb, &fC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, long long int strideA, const double* B, int ldb, long long int strideB, const double* beta, double* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* A, rocblas_int lda, rocblas_stride stride_a, const double* B, rocblas_int ldb, rocblas_stride stride_b, const double* beta, double* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dgemm_strided_batched(blasHandle, transa, transb, m, n, k, &da, &dA, lda, strideA, &dB, ldb, strideB, &db, &dC, ldc, strideC, batchCount); blasStatus = cublasDgemmStridedBatched(blasHandle, transa, transb, m, n, k, &da, &dA, lda, strideA, &dB, ldb, strideB, &db, &dC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, long long int strideA, const cuComplex* B, int ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, rocblas_stride stride_a, const rocblas_float_complex* B, rocblas_int ldb, rocblas_stride stride_b, const rocblas_float_complex* beta, rocblas_float_complex* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_cgemm_strided_batched(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, strideA, &complexB, ldb, strideB, &complexb, &complexC, ldc, strideC, batchCount); blasStatus = cublasCgemmStridedBatched(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, strideA, &complexB, ldb, strideB, &complexb, &complexC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, long long int strideA, const cuDoubleComplex* B, int ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, rocblas_stride stride_a, const rocblas_double_complex* B, rocblas_int ldb, rocblas_stride stride_b, const rocblas_double_complex* beta, rocblas_double_complex* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_zgemm_strided_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); blasStatus = cublasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* A, int lda, long long int strideA, const __half* B, int ldb, long long int strideB, const __half* beta, __half* C, int ldc, long long int strideC, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_half* alpha, const rocblas_half* A, rocblas_int lda, rocblas_stride stride_a, const rocblas_half* B, rocblas_int ldb, rocblas_stride stride_b, const rocblas_half* beta, rocblas_half* C, rocblas_int ldc, rocblas_stride stride_c, rocblas_int batch_count); // CHECK: blasStatus = rocblas_hgemm_strided_batched(blasHandle, transa, transb, m, n, k, ha, hA, lda, strideA, hB, ldb, strideB, hb, hC, ldc, strideC, batchCount); @@ -1890,25 +1894,25 @@ int main() { cudaDataType CStype; cudaDataType Executiontype; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, void* x, cudaDataType xType, int incx, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scal_ex(rocblas_handle handle, rocblas_int n, const void* alpha, rocblas_datatype alpha_type, void* x, rocblas_datatype x_type, rocblas_int incx, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_scal_ex(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); blasStatus = cublasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, const void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, cudaDataType executiontype); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_axpy_ex(rocblas_handle handle, rocblas_int n, const void* alpha, rocblas_datatype alpha_type, const void* x, rocblas_datatype x_type, rocblas_int incx, void* y, rocblas_datatype y_type, rocblas_int incy, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_axpy_ex(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); blasStatus = cublasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, const void* y, cudaDataType yType, int incy, void* result, cudaDataType resultType, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dot_ex(rocblas_handle handle, rocblas_int n, const void* x, rocblas_datatype x_type, rocblas_int incx, const void* y, rocblas_datatype y_type, rocblas_int incy, void* result, rocblas_datatype result_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_dot_ex(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, image, DataType, Executiontype); blasStatus = cublasDotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, image, DataType, Executiontype); - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, const void* y, cudaDataType yType, int incy, void* result, cudaDataType resultType, cudaDataType executionType); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dotc_ex(rocblas_handle handle, rocblas_int n, const void* x, rocblas_datatype x_type, rocblas_int incx, const void* y, rocblas_datatype y_type, rocblas_int incy, void* result, rocblas_datatype result_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_dotc_ex(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, image, DataType, Executiontype); @@ -1916,10 +1920,9 @@ int main() { #endif #if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000 - // CHECK: rocblas_datatype computeType; - cudaDataType computeType; - - // TODO: #1281 + // TODO: [rocBLAS][#1281] + // TODO: [rocBLAS] File a ticket for rocblas_gemm_ex_v2 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); // CHECK: blasStatus = rocblas_gemm_ex(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); @@ -1930,7 +1933,7 @@ int main() { // CHECK: rocblas_gemm_algo BLAS_GEMM_DEFAULT = rocblas_gemm_algo_standard; cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_half* alpha, const rocblas_half* const A[], rocblas_int lda, const rocblas_half* const B[], rocblas_int ldb, const rocblas_half* beta, rocblas_half* const C[], rocblas_int ldc, rocblas_int batch_count); // CHECK: blasStatus = rocblas_hgemm_batched(blasHandle, transa, transb, m, n, k, ha, hAarray_const, lda, hBarray_const, ldb, hb, hCarray, ldc, batchCount); @@ -1938,14 +1941,18 @@ int main() { #endif #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 - // TODO: #1281 - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [rocBLAS][#1281] + // TODO: [rocBLAS] File a ticket for rocblas_gemm_batched_ex_v2 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *const Aarray[], cudaDataType Atype, int lda, const void *const Barray[], cudaDataType Btype, int ldb, const void *beta, void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - // TODO: #1281 - // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // TODO: [rocBLAS][#1281] + // TODO: [rocBLAS] File a ticket for rocblas_gemm_strided_batched_ex_v2 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // TODO: [HIPIFY] CUDA VERSION detection by HIPIFY itself + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, long long int strideA, const void *B, cudaDataType Btype, int ldb, long long int strideB, const void *beta, void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cudaDataType computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_strided_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, rocblas_stride stride_a, const void* b, rocblas_datatype b_type, rocblas_int ldb, rocblas_stride stride_b, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, rocblas_stride stride_c, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_stride stride_d, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); // CHECK: blasStatus = rocblas_gemm_strided_batched_ex(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); @@ -1958,7 +1965,7 @@ int main() { // CHECK: rocblas_fill BLAS_FILL_MODE_FULL = rocblas_fill_full; cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL; - // TODO: #1281 + // TODO: [rocBLAS][#1281] // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx(cublasHandle_t handle, int n, void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, const void* c, const void* s, cudaDataType csType, cudaDataType executiontype); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_rot_ex(rocblas_handle handle, rocblas_int n, void* x, rocblas_datatype x_type, rocblas_int incx, void* y, rocblas_datatype y_type, rocblas_int incy, const void* c, const void* s, rocblas_datatype cs_type, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_rot_ex(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype); @@ -1971,6 +1978,11 @@ int main() { cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; + // CHECK: rocblas_computetype blasComputeType; + // CHECK-NEXT: rocblas_computetype BLAS_COMPUTE_32F = rocblas_compute_type_f32; + cublasComputeType_t blasComputeType; + cublasComputeType_t BLAS_COMPUTE_32F = CUBLAS_COMPUTE_32F; + // CHECK: rocblas_bfloat16* bf16A = nullptr; __nv_bfloat16* bf16A = nullptr; // CHECK: rocblas_bfloat16** bf16Aarray = 0; @@ -2840,7 +2852,7 @@ int main() { blasStatus = cublasStbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); - // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); // CHECK: blasStatus = rocblas_dtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); // CHECK-NEXT: blasStatus = rocblas_dtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); blasStatus = cublasDtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); @@ -3340,6 +3352,24 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zdgmm_64(rocblas_handle handle, rocblas_side side, int64_t m, int64_t n, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* C, int64_t ldc); // CHECK: blasStatus = rocblas_zdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); blasStatus = cublasZdgmm_64(blasHandle, blasSideMode, m_64, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexC, ldc_64); + + // TODO: [rocBLAS] File a ticket for rocblas_gemm_ex_v2_64 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A, cudaDataType Atype, int64_t lda, const void* B, cudaDataType Btype, int64_t ldb, const void* beta, void* C, cudaDataType Ctype, int64_t ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_ex_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const void* alpha, const void* a, rocblas_datatype a_type, int64_t lda, const void* b, rocblas_datatype b_type, int64_t ldb, const void* beta, const void* c, rocblas_datatype c_type, int64_t ldc, void* d, rocblas_datatype d_type, int64_t ldd, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); + // [CHECK] blasStatus = rocblas_gemm_ex_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, Bptr, Btype, ldb_64, bptr, Cptr, Ctype, ldc_64, blasComputeType, blasGemmAlgo, 0, rocblas_gemm_flags_none); + blasStatus = cublasGemmEx_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, Bptr, Btype, ldb_64, bptr, Cptr, Ctype, ldc_64, blasComputeType, blasGemmAlgo); + + // TODO: [rocBLAS] File a ticket for rocblas_gemm_batched_ex_v2_64 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int64_t lda, const void* const Barray[], cudaDataType Btype, int64_t ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int64_t ldc, int64_t batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const void* alpha, const void* a, rocblas_datatype a_type, int64_t lda, const void* b, rocblas_datatype b_type, int64_t ldb, const void* beta, const void* c, rocblas_datatype c_type, int64_t ldc, void* d, rocblas_datatype d_type, int64_t ldd, int64_t batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); + // [CHECK] blasStatus = rocblas_gemm_batched_ex_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, voidAarray_const, Atype, lda_64, voidBarray_const, Btype, ldb_64, bptr, voidCarray, Ctype, ldc_64, batchCount_64, blasComputeType, blasGemmAlgo, 0, rocblas_gemm_flags_none); + blasStatus = cublasGemmBatchedEx_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, voidAarray_const, Atype, lda_64, voidBarray_const, Btype, ldb_64, bptr, voidCarray, Ctype, ldc_64, batchCount_64, blasComputeType, blasGemmAlgo); + + // TODO: [rocBLAS] File a ticket for rocblas_gemm_strided_batched_ex_v2_64 with compute_type argument of the rocblas_computetype type instead of rocblas_datatype type + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const void* alpha, const void* A, cudaDataType Atype, int64_t lda, long long int strideA, const void* B, cudaDataType Btype, int64_t ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int64_t ldc, long long int strideC, int64_t batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_strided_batched_ex_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const void* alpha, const void* a, rocblas_datatype a_type, int64_t lda, rocblas_stride stride_a, const void* b, rocblas_datatype b_type, int64_t ldb, rocblas_stride stride_b, const void* beta, const void* c, rocblas_datatype c_type, int64_t ldc, rocblas_stride stride_c, void* d, rocblas_datatype d_type, int64_t ldd, rocblas_stride stride_d, int64_t batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); + // [CHECK] blasStatus = rocblas_gemm_strided_batched_ex_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, strideA, Bptr, Btype, ldb_64, strideB, bptr, Cptr, Ctype, ldc_64, strideC, batchCount_64, blasComputeType, blasGemmAlgo, 0, rocblas_gemm_flags_none); + blasStatus = cublasGemmStridedBatchedEx_64(blasHandle, transa, transb, m_64, n_64, k_64, aptr, Aptr, Atype, lda_64, strideA, Bptr, Btype, ldb_64, strideB, bptr, Cptr, Ctype, ldc_64, strideC, batchCount_64, blasComputeType, blasGemmAlgo); #endif return 0; From ba12a930f3d4e9b2b7c766a6cb75d3a169e09519 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 13 Nov 2024 23:05:01 +0100 Subject: [PATCH 50/51] [HIPIFY][doc][6.3.1] `CHANGELOG.md` update --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a2e9344..b94c1348 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,20 @@ Documentation for HIPIFY is available at [https://rocmdocs.amd.com/projects/HIPIFY/en/latest/](https://rocmdocs.amd.com/projects/HIPIFY/en/latest/). +## HIPIFY for ROCm 6.3.1 + +### Added + +* CUDA 12.6.2 support +* cuDNN 9.5.1 support +* LLVM 19.1.3 support +* Full `hipBLAS` 64-bit APIs support +* Full `rocBLAS` 64-bit APIs support + +### Resolved issues + +* Added missing support for device intrinsics and built-ins: `__all_sync`, `__any_sync`, `__ballot_sync`, `__activemask`, `__match_any_sync`, `__match_all_sync`, `__shfl_sync`, `__shfl_up_sync`, `__shfl_down_sync`, and `__shfl_xor_sync` + ## HIPIFY for ROCm 6.3.0 ### Added From a40e07e0dbaac5d5be77556b6f0e51ff49650640 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 14 Nov 2024 23:33:32 +0100 Subject: [PATCH 51/51] [HIPIFY][doc] 3rd party SW versions update --- docs/hipify-clang.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index a03cf71e..9897eef9 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -649,7 +649,7 @@ Minimum build system requirements for the above configurations: Recommended build system requirements: -* CMake 3.30.4, GNU C/C++ 13.2, Python 3.13.0. +* CMake 3.31.0, GNU C/C++ 13.2, Python 3.13.0. Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01``: @@ -822,14 +822,14 @@ Tested configurations: * - ``17.0.1`` :sup:`6` - ``18.1.8`` :sup:`7` - ``7.0 - 12.3.2`` - ``8.0.5 - 9.5.1`` - - ``2019.16.11.40, 2022.17.11.4`` - - ``3.30.4`` + - ``2019.16.11.42, 2022.17.11.6`` + - ``3.31.0`` - ``3.13.0`` * - ``19.1.0 - 19.1.3`` - ``7.0 - 12.6.2`` - ``8.0.5 - 9.5.1`` - - ``2019.16.11.40, 2022.17.11.4`` - - ``3.30.4`` + - ``2019.16.11.42, 2022.17.11.6`` + - ``3.31.0`` - ``3.13.0`` :sup:`5` LLVM 14.x.x is the latest major release supporting Visual Studio 2017.