diff --git a/bin/hipify-perl b/bin/hipify-perl index 36e0be46..ab714494 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1402,6 +1402,11 @@ my %experimental_funcs = ( "cudaGraphNodeSetParams" => "6.3.0", "cudaGraphExecNodeSetParams" => "6.3.0", "cudaGraphExecGetFlags" => "6.3.0", + "cublasZgemmStridedBatched_64" => "6.3.0", + "cublasSgemmStridedBatched_64" => "6.3.0", + "cublasHgemmStridedBatched_64" => "6.3.0", + "cublasDgemmStridedBatched_64" => "6.3.0", + "cublasCgemmStridedBatched_64" => "6.3.0", "cuGraphNodeSetParams" => "6.3.0", "cuGraphMemcpyNodeSetParams" => "6.3.0", "cuGraphMemcpyNodeGetParams" => "6.3.0", @@ -1558,6 +1563,11 @@ sub experimentalSubstitutions { subst("cudaGraphExecGetFlags", "hipGraphExecGetFlags", "graph"); subst("cudaGraphExecNodeSetParams", "hipGraphExecNodeSetParams", "graph"); subst("cudaGraphNodeSetParams", "hipGraphNodeSetParams", "graph"); + subst("cublasCgemmStridedBatched_64", "hipblasCgemmStridedBatched_v2_64", "library"); + subst("cublasDgemmStridedBatched_64", "hipblasDgemmStridedBatched_64", "library"); + subst("cublasHgemmStridedBatched_64", "hipblasHgemmStridedBatched_64", "library"); + subst("cublasSgemmStridedBatched_64", "hipblasSgemmStridedBatched_64", "library"); + subst("cublasZgemmStridedBatched_64", "hipblasZgemmStridedBatched_v2_64", "library"); subst("cusolverDnGetDeterministicMode", "hipsolverDnGetDeterministicMode", "library"); subst("cusolverDnSetDeterministicMode", "hipsolverDnSetDeterministicMode", "library"); subst("cusolverDnXgeqrf", "hipsolverDnXgeqrf", "library"); @@ -1596,6 +1606,7 @@ sub rocSubstitutions { subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); subst("cublasCgemmBatched_64", "rocblas_cgemm_batched_64", "library"); subst("cublasCgemmStridedBatched", "rocblas_cgemm_strided_batched", "library"); + subst("cublasCgemmStridedBatched_64", "rocblas_cgemm_strided_batched_64", "library"); subst("cublasCgemm_64", "rocblas_cgemm_64", "library"); subst("cublasCgemm_v2", "rocblas_cgemm", "library"); subst("cublasCgemm_v2_64", "rocblas_cgemm_64", "library"); @@ -1753,6 +1764,7 @@ sub rocSubstitutions { subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); subst("cublasDgemmBatched_64", "rocblas_dgemm_batched_64", "library"); subst("cublasDgemmStridedBatched", "rocblas_dgemm_strided_batched", "library"); + subst("cublasDgemmStridedBatched_64", "rocblas_dgemm_strided_batched_64", "library"); subst("cublasDgemm_64", "rocblas_dgemm_64", "library"); subst("cublasDgemm_v2", "rocblas_dgemm", "library"); subst("cublasDgemm_v2_64", "rocblas_dgemm_64", "library"); @@ -1897,6 +1909,7 @@ sub rocSubstitutions { subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmBatched_64", "rocblas_hgemm_batched_64", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); + subst("cublasHgemmStridedBatched_64", "rocblas_hgemm_strided_batched_64", "library"); subst("cublasHgemm_64", "rocblas_hgemm_64", "library"); subst("cublasIcamax", "rocblas_icamax", "library"); subst("cublasIcamax_64", "rocblas_icamax_64", "library"); @@ -1981,6 +1994,7 @@ sub rocSubstitutions { subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); subst("cublasSgemmBatched_64", "rocblas_sgemm_batched_64", "library"); subst("cublasSgemmStridedBatched", "rocblas_sgemm_strided_batched", "library"); + subst("cublasSgemmStridedBatched_64", "rocblas_sgemm_strided_batched_64", "library"); subst("cublasSgemm_64", "rocblas_sgemm_64", "library"); subst("cublasSgemm_v2", "rocblas_sgemm", "library"); subst("cublasSgemm_v2_64", "rocblas_sgemm_64", "library"); @@ -2129,6 +2143,7 @@ sub rocSubstitutions { subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); subst("cublasZgemmBatched_64", "rocblas_zgemm_batched_64", "library"); subst("cublasZgemmStridedBatched", "rocblas_zgemm_strided_batched", "library"); + subst("cublasZgemmStridedBatched_64", "rocblas_zgemm_strided_batched_64", "library"); subst("cublasZgemm_64", "rocblas_zgemm_64", "library"); subst("cublasZgemm_v2", "rocblas_zgemm", "library"); subst("cublasZgemm_v2_64", "rocblas_zgemm_64", "library"); @@ -11543,7 +11558,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZher2k_64", "cublasZhemm_v2_64", "cublasZhemm_64", - "cublasZgemmStridedBatched_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgeam_64", @@ -11576,7 +11590,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSsymm_64", "cublasSmatinvBatched", "cublasShutdown", - "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", "cublasSgemmEx_64", @@ -11638,7 +11651,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemmStridedBatched_64", "cublasHSSgemvStridedBatched_64", "cublasHSSgemvStridedBatched", "cublasHSSgemvBatched_64", @@ -11681,7 +11693,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDsymm_v2_64", "cublasDsymm_64", "cublasDmatinvBatched", - "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgeam_64", @@ -11719,7 +11730,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCher2k_64", "cublasChemm_v2_64", "cublasChemm_64", - "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", "cublasCgemm3m_64", @@ -13302,7 +13312,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetriBatched", "cublasZgetrfBatched", "cublasZgeqrfBatched", - "cublasZgemmStridedBatched_64", "cublasZgemm3m_64", "cublasZgemm3m", "cublasZgelsBatched", @@ -13329,7 +13338,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgetriBatched", "cublasSgetrfBatched", "cublasSgeqrfBatched", - "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", "cublasSgemmEx_64", @@ -13418,7 +13426,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemmStridedBatched_64", "cublasGetVersion_v2", "cublasGetVersion", "cublasGetVector_64", @@ -13453,7 +13460,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgetriBatched", "cublasDgetrfBatched", "cublasDgeqrfBatched", - "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", "cublasDgelsBatched", @@ -13492,7 +13498,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgetriBatched", "cublasCgetrfBatched", "cublasCgeqrfBatched", - "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", "cublasCgemm3m_64", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index dee598fd..ee8ad63c 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1095,7 +1095,7 @@ |`cublasCgemmBatched`| | | | |`hipblasCgemmBatched_v2`|6.0.0| | | | | |`cublasCgemmBatched_64`|12.0| | | |`hipblasCgemmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | | -|`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasCgemmStridedBatched_64`|12.0| | | |`hipblasCgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | | |`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| @@ -1145,7 +1145,7 @@ |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | | -|`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasDgemmStridedBatched_64`|12.0| | | |`hipblasDgemmStridedBatched_64`|6.3.0| | | |6.3.0| |`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | | |`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| @@ -1189,7 +1189,7 @@ |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | | |`cublasHgemmBatched_64`|12.0| | | |`hipblasHgemmBatched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | | -|`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasHgemmStridedBatched_64`|12.0| | | |`hipblasHgemmStridedBatched_64`|6.3.0| | | |6.3.0| |`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | | @@ -1197,7 +1197,7 @@ |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | | -|`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasSgemmStridedBatched_64`|12.0| | | |`hipblasSgemmStridedBatched_64`|6.3.0| | | |6.3.0| |`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| @@ -1241,7 +1241,7 @@ |`cublasZgemmBatched`| | | | |`hipblasZgemmBatched_v2`|6.0.0| | | | | |`cublasZgemmBatched_64`|12.0| | | |`hipblasZgemmBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | | -|`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasZgemmStridedBatched_64`|12.0| | | |`hipblasZgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | | |`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 8b692547..a95fbf83 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1095,7 +1095,7 @@ |`cublasCgemmBatched`| | | | |`hipblasCgemmBatched_v2`|6.0.0| | | | |`rocblas_cgemm_batched`|3.5.0| | | | | |`cublasCgemmBatched_64`|12.0| | | |`hipblasCgemmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | -|`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemmStridedBatched_64`|12.0| | | |`hipblasCgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | |`rocblas_cgemm`|1.5.0| | | | | |`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| @@ -1145,7 +1145,7 @@ |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | -|`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemmStridedBatched_64`|12.0| | | |`hipblasDgemmStridedBatched_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| @@ -1189,7 +1189,7 @@ |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | |`rocblas_hgemm_batched`|3.5.0| | | | | |`cublasHgemmBatched_64`|12.0| | | |`hipblasHgemmBatched_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | -|`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHgemmStridedBatched_64`|12.0| | | |`hipblasHgemmStridedBatched_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | @@ -1197,7 +1197,7 @@ |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | -|`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemmStridedBatched_64`|12.0| | | |`hipblasSgemmStridedBatched_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| @@ -1241,7 +1241,7 @@ |`cublasZgemmBatched`| | | | |`hipblasZgemmBatched_v2`|6.0.0| | | | |`rocblas_zgemm_batched`|3.5.0| | | | | |`cublasZgemmBatched_64`|12.0| | | |`hipblasZgemmBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | -|`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemmStridedBatched_64`|12.0| | | |`hipblasZgemmStridedBatched_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index f0a40e7c..a85c584a 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1095,7 +1095,7 @@ |`cublasCgemmBatched`| | | | |`rocblas_cgemm_batched`|3.5.0| | | | | |`cublasCgemmBatched_64`|12.0| | | |`rocblas_cgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemmStridedBatched`|8.0| | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | -|`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasCgemmStridedBatched_64`|12.0| | | |`rocblas_cgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasCgemm_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`rocblas_cgemm`|1.5.0| | | | | |`cublasCgemm_v2_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| @@ -1145,7 +1145,7 @@ |`cublasDgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | -|`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasDgemmStridedBatched_64`|12.0| | | |`rocblas_dgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasDgemm_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemm_v2_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| @@ -1189,7 +1189,7 @@ |`cublasHgemmBatched`|9.0| | | |`rocblas_hgemm_batched`|3.5.0| | | | | |`cublasHgemmBatched_64`|12.0| | | |`rocblas_hgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemmStridedBatched`|8.0| | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | -|`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasHgemmStridedBatched_64`|12.0| | | |`rocblas_hgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasHgemm_64`|12.0| | | |`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | @@ -1197,7 +1197,7 @@ |`cublasSgemmGroupedBatched`|12.4| | | | | | | | | | |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | -|`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasSgemmStridedBatched_64`|12.0| | | |`rocblas_sgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasSgemm_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemm_v2_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| @@ -1241,7 +1241,7 @@ |`cublasZgemmBatched`| | | | |`rocblas_zgemm_batched`|3.5.0| | | | | |`cublasZgemmBatched_64`|12.0| | | |`rocblas_zgemm_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemmStridedBatched`|8.0| | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | -|`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | +|`cublasZgemmStridedBatched_64`|12.0| | | |`rocblas_zgemm_strided_batched_64`|6.3.0| | | |6.3.0| |`cublasZgemm_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm_v2_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index a8fc6cff..568cc17c 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -421,9 +421,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasHgemmBatched", {"hipblasHgemmBatched", "rocblas_hgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasHgemmBatched_64", {"hipblasHgemmBatched_64", "rocblas_hgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasSgemmStridedBatched", {"hipblasSgemmStridedBatched", "rocblas_sgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSgemmStridedBatched_64", {"hipblasSgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemmStridedBatched_64", {"hipblasSgemmStridedBatched_64", "rocblas_sgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasDgemmStridedBatched", {"hipblasDgemmStridedBatched", "rocblas_dgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDgemmStridedBatched_64", {"hipblasDgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemmStridedBatched_64", {"hipblasDgemmStridedBatched_64", "rocblas_dgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasCgemmBatched", {"hipblasCgemmBatched_v2", "rocblas_cgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemmBatched_64", {"hipblasCgemmBatched_v2_64", "rocblas_cgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm3mBatched", {"hipblasCgemm3mBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -431,13 +431,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZgemmBatched", {"hipblasZgemmBatched_v2", "rocblas_zgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemmBatched_64", {"hipblasZgemmBatched_v2_64", "rocblas_zgemm_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemmStridedBatched", {"hipblasCgemmStridedBatched_v2", "rocblas_cgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemmStridedBatched_64", {"hipblasCgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemmStridedBatched_64", {"hipblasCgemmStridedBatched_v2_64", "rocblas_cgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasCgemm3mStridedBatched", {"hipblasCgemm3mStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mStridedBatched_64", {"hipblasCgemm3mStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemmStridedBatched", {"hipblasZgemmStridedBatched_v2", "rocblas_zgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemmStridedBatched_64", {"hipblasZgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemmStridedBatched_64", {"hipblasZgemmStridedBatched_v2_64", "rocblas_zgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasHgemmStridedBatched_64", {"hipblasHgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHgemmStridedBatched_64", {"hipblasHgemmStridedBatched_64", "rocblas_hgemm_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_EXPERIMENTAL}}, {"cublasGemmGroupedBatchedEx", {"hipblasGemmGroupedBatchedEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasGemmGroupedBatchedEx_64", {"hipblasGemmGroupedBatchedEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -2033,6 +2033,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgemmBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCgemmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZgemmBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasHgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemmStridedBatched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemmStridedBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemmStridedBatched_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2431,6 +2436,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_cgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_zgemm_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_hgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zgemm_strided_batched_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 2b29009f..18433fa7 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2883,6 +2883,31 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* const AP[], int64_t lda, const hipblasHalf* const BP[], int64_t ldb, const hipblasHalf* beta, hipblasHalf* const CP[], int64_t ldc, int64_t batchCount); // CHECK: blasStatus = hipblasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* B, int64_t ldb, long long int strideB, const float* beta, float* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, long long strideA, const float* BP, int64_t ldb, long long strideB, const float* beta, float* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasSgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + blasStatus = cublasSgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* B, int64_t ldb, long long int strideB, const double* beta, double* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, long long strideA, const double* BP, int64_t ldb, long long strideB, const double* beta, double* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasDgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + blasStatus = cublasDgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* B, int64_t ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, long long strideA, const hipComplex* BP, int64_t ldb, long long strideB, const hipComplex* beta, hipComplex* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasCgemmStridedBatched_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasCgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* B, int64_t ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, long long strideA, const hipDoubleComplex* BP, int64_t ldb, long long strideB, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasZgemmStridedBatched_v2_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasZgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, long long int strideA, const __half* B, int64_t ldb, long long int strideB, const __half* beta, __half* C, int64_t ldc, long long int strideC, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, long long strideA, const hipblasHalf* BP, int64_t ldb, long long strideB, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc, long long strideC, int64_t batchCount); + // CHECK: blasStatus = hipblasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); + blasStatus = cublasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index dd48b9c9..cc1c1168 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3088,6 +3088,31 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* const A[], int64_t lda, const rocblas_half* const B[], int64_t ldb, const rocblas_half* beta, rocblas_half* const C[], int64_t ldc, int64_t batch_count); // CHECK: blasStatus = rocblas_hgemm_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); blasStatus = cublasHgemmBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hAarray_const, lda_64, hBarray_const, ldb_64, hb, hCarray, ldc_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* B, int64_t ldb, long long int strideB, const float* beta, float* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, rocblas_stride stride_a, const float* B, int64_t ldb, rocblas_stride stride_b, const float* beta, float* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_sgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + blasStatus = cublasSgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &fa, &fA, lda_64, strideA, &fB, ldb_64, strideB, &fb, &fC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* B, int64_t ldb, long long int strideB, const double* beta, double* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, rocblas_stride stride_a, const double* B, int64_t ldb, rocblas_stride stride_b, const double* beta, double* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_dgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + blasStatus = cublasDgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &da, &dA, lda_64, strideA, &dB, ldb_64, strideB, &db, &dC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* B, int64_t ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, rocblas_stride stride_a, const rocblas_float_complex* B, int64_t ldb, rocblas_stride stride_b, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_cgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasCgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &complexa, &complexA, lda_64, strideA, &complexB, ldb_64, strideB, &complexb, &complexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* B, int64_t ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, rocblas_stride stride_a, const rocblas_double_complex* B, int64_t ldb, rocblas_stride stride_b, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_zgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + blasStatus = cublasZgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexB, ldb_64, strideB, &dcomplexb, &dcomplexC, ldc_64, strideC, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, long long int strideA, const __half* B, int64_t ldb, long long int strideB, const __half* beta, __half* C, int64_t ldc, long long int strideC, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_strided_batched_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, rocblas_stride stride_a, const rocblas_half* B, int64_t ldb, rocblas_stride stride_b, const rocblas_half* beta, rocblas_half* C, int64_t ldc, rocblas_stride stride_c, int64_t batch_count); + // CHECK: blasStatus = rocblas_hgemm_strided_batched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); + blasStatus = cublasHgemmStridedBatched_64(blasHandle, transa, transb, m_64, n_64, k_64, ha, hA, lda_64, strideA, hB, ldb_64, strideB, hb, hC, ldc_64, strideC, batchCount_64); #endif return 0;