diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e5a8b87..d3ae1564 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,22 @@ Full documentation for HIPIFY is available at [hipify.readthedocs.io](https://hipify.readthedocs.io/en/latest/). +## HIPIFY for ROCm 5.7.0 +### Added +- CUDA 12.2.0 support +- cuDNN 8.9.2 support +- LLVM 16.0.6 support +- Initial rocSPARSE support +- Initial CUDA2ROC documentation generation for rocBLAS, rocSPARSE, and MIOpen: + - in separate files: hipify-clang --md --doc-format=full --doc-roc=separate + - in a single file: hipify-clang --md --doc-format=full --doc-roc=joint +- New options: + - --use-hip-data-types (Use 'hipDataType' instead of 'hipblasDatatype_t' or 'rocblas_datatype') + - --doc-roc=\ (ROC documentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified) +### Fixed +- [#822] Add a new function call transformation type "additional const by value arg" +- [#830] Add a new function call transformation type "move arg from place X to place Y" + ## HIPIFY for ROCm 5.6.0 ### Added - CUDA 12.1.0 support diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ec50dc6..ce188b75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,11 +5,11 @@ project(hipify-clang) include(GNUInstallDirs) option(HIPIFY_INCLUDE_IN_HIP_SDK "Include HIPIFY in HIP SDK" OFF) + if(HIPIFY_INCLUDE_IN_HIP_SDK) if(NOT WIN32) message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is only supported on Windows") - endif() - if(CMAKE_GENERATOR MATCHES "Visual Studio") + elseif(CMAKE_GENERATOR MATCHES "Visual Studio") message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is not targeting Visual Studio") endif() else() @@ -28,48 +28,47 @@ list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) include(AddLLVM) if (NOT HIPIFY_CLANG_TESTS_ONLY) - -if(MSVC AND MSVC_VERSION VERSION_LESS "1900") + if(MSVC AND MSVC_VERSION VERSION_LESS "1900") message(SEND_ERROR "hipify-clang could be built by Visual Studio 14 2015 or higher.") return() -endif() + endif() -include_directories(${LLVM_INCLUDE_DIRS}) -add_definitions(${LLVM_DEFINITIONS}) + include_directories(${LLVM_INCLUDE_DIRS}) + add_definitions(${LLVM_DEFINITIONS}) -file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) -file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) -add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) -target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS}) + file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) + file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) -if(HIPIFY_INCLUDE_IN_HIP_SDK) - if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) - message(FATAL_ERROR "In order to include HIPIFY in HIP SDK, HIPIFY needs to be built with LLVM_EXTERNAL_PROJECTS") - endif() + add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) + target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS}) - # Need to add clang include directories explicitly if - # building as part of llvm. - if(LLVM_EXTERNAL_CLANG_SOURCE_DIR) - target_include_directories(hipify-clang - PRIVATE - ${LLVM_BINARY_DIR}/tools/clang/include - ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include) - endif() - # Need to add lld include directories explicitly if - # building as part of llvm. - if(LLVM_EXTERNAL_LLD_SOURCE_DIR) - target_include_directories(hipify-clang - PRIVATE - ${LLVM_BINARY_DIR}/tools/lld/include - ${LLVM_EXTERNAL_LLD_SOURCE_DIR}/include) + if(HIPIFY_INCLUDE_IN_HIP_SDK) + if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + message(FATAL_ERROR "In order to include HIPIFY in HIP SDK, HIPIFY needs to be built with LLVM_EXTERNAL_PROJECTS") + endif() + + # Need to add clang include directories explicitly if building as part of llvm. + if(LLVM_EXTERNAL_CLANG_SOURCE_DIR) + target_include_directories(hipify-clang + PRIVATE + ${LLVM_BINARY_DIR}/tools/clang/include + ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include) + endif() + + # Need to add lld include directories explicitly if building as part of llvm. + if(LLVM_EXTERNAL_LLD_SOURCE_DIR) + target_include_directories(hipify-clang + PRIVATE + ${LLVM_BINARY_DIR}/tools/lld/include + ${LLVM_EXTERNAL_LLD_SOURCE_DIR}/include) + endif() + else() + set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) + set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) endif() -else() - set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) - set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) -endif() -# Link against LLVM and CLANG libraries -target_link_libraries(hipify-clang PRIVATE + # Link against LLVM and CLANG libraries. + target_link_libraries(hipify-clang PRIVATE clangASTMatchers clangFrontend clangTooling @@ -93,87 +92,80 @@ target_link_libraries(hipify-clang PRIVATE LLVMOption LLVMCore) -if(LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1") + if(LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1") target_link_libraries(hipify-clang PRIVATE clangToolingInclusions) -endif() + endif() -if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") + if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") target_link_libraries(hipify-clang PRIVATE LLVMFrontendOpenMP) -endif() + endif() -if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0") + if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0") target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver clangSupport) -endif() + endif() -if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") + if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") if(MSVC) - set(STD "/std:c++17") + set(STD "/std:c++17") else() - set(STD "-std=c++17") + set(STD "-std=c++17") endif() -else() + else() if(MSVC) - set(STD "/std:c++14") + set(STD "/std:c++14") else() - set(STD "-std=c++14") + set(STD "-std=c++14") endif() -endif() - -# [ToDo] Remove SWDEV_375013 related guards from CMakeLists.txt and HipifyAction.cpp along with the LLVM 16.0.0 official release -option (SWDEV_375013 "Enables SWDEV-375013 blocker workaround for the clang's change https://reviews.llvm.org/D140332" OFF) -if(SWDEV_375013) - add_definitions(-DSWDEV_375013) -endif() + endif() -if(MSVC) + if(MSVC) target_link_libraries(hipify-clang PRIVATE version) target_compile_options(hipify-clang PRIVATE ${STD} /Od /GR- /EHs- /EHc-) set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /SUBSYSTEM:WINDOWS") -else() + else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STD} -pthread -fno-rtti -fvisibility-inlines-hidden") -endif() + endif() -# Address Sanitize Flag -if(ADDRESS_SANITIZER) + # Address Sanitize Flag. + if(ADDRESS_SANITIZER) set(addr_var -fsanitize=address) -else() + else() set(addr_var ) -endif() -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}") + endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}") -# [ToDo] Remove D125860 related guards from CMakeLists.txt with the LLVM 16.0.0 official release -option (D125860 "Enables treating clang's resource dir as lib/clang/X.Y.Z, as it was before clang's change D125860, merged as e1b88c8a09be25b86b13f98755a9bd744b4dbf14" OFF) -if(D125860) + # [ToDo] Remove D125860 related guards from CMakeLists.txt with the LLVM 16.0.0 official release. + option (D125860 "Enables treating clang's resource dir as lib/clang/X.Y.Z, as it was before clang's change D125860, merged as e1b88c8a09be25b86b13f98755a9bd744b4dbf14" OFF) + if(D125860) add_definitions(-D125860) -endif() -if((LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") AND (NOT D125860)) + endif() + if((LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") AND (NOT D125860)) set(LIB_CLANG_RES ${LLVM_VERSION_MAJOR}) -else() + else() set(LIB_CLANG_RES ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}) -endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}\\\" ${addr_var}") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}\\\" ${addr_var}") -set(INSTALL_PATH_DOC_STRING "hipify-clang Installation Path") -if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(INSTALL_PATH_DOC_STRING "hipify-clang Installation Path") + if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/dist" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) -endif() + endif() -set(HIPIFY_BIN_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/bin") + set(HIPIFY_BIN_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/bin") -install(TARGETS hipify-clang DESTINATION bin) -# install bin directory in CMAKE_INSTALL_PREFIX path -install( + install(TARGETS hipify-clang DESTINATION bin) + # Install bin directory in CMAKE_INSTALL_PREFIX path. + install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin DESTINATION . USE_SOURCE_PERMISSIONS PATTERN "hipify-perl" PATTERN "*.sh") -# Headers are already included in HIP SDK, so skip those if including -# HIPIFY in HIP SDK. -if(NOT HIPIFY_INCLUDE_IN_HIP_SDK) - # install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path - install( + # Headers are already included in HIP SDK, so skip those if including HIPIFY in HIP SDK. + if(NOT HIPIFY_INCLUDE_IN_HIP_SDK) + # Install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path. + install( DIRECTORY ${LLVM_DIR}/../../clang/${LIB_CLANG_RES}/ DESTINATION . COMPONENT clang-resource-headers @@ -185,7 +177,7 @@ if(NOT HIPIFY_INCLUDE_IN_HIP_SDK) PATTERN "new" PATTERN "ppc_wrappers" EXCLUDE PATTERN "openmp_wrappers" EXCLUDE) -endif() + endif() # install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path install( @@ -201,97 +193,99 @@ install( PATTERN "ppc_wrappers" EXCLUDE PATTERN "openmp_wrappers" EXCLUDE) -option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) - -if(UNIX) - - #get rid of any RPATH definations already + if(UNIX) + # Get rid of any RPATH definations already. set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "") - #set RPATH for the binary - set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--disable-new-dtags -Wl,--rpath,$ORIGIN/../lib" ) + # Set RPATH for the binary. + set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--enable-new-dtags -Wl,--rpath,$ORIGIN/../lib" ) + + option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) if(FILE_REORG_BACKWARD_COMPATIBILITY) - include(hipify-backward-compat.cmake) + include(hipify-backward-compat.cmake) endif() + set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm" CACHE PATH "HIP Package Installation Path") set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hipify-clang) + configure_file(packaging/hipify-clang.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) configure_file(${CMAKE_SOURCE_DIR}/LICENSE.txt ${BUILD_DIR}/LICENSE.txt @ONLY) - add_custom_target(package_hipify-clang COMMAND ${CMAKE_COMMAND} . - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND rm -rf *.deb *.rpm *.tar.gz - COMMAND make package - COMMAND cp *.deb ${PROJECT_BINARY_DIR} - COMMAND cp *.rpm ${PROJECT_BINARY_DIR} - COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} - WORKING_DIRECTORY ${BUILD_DIR}) -endif() + add_custom_target(package_hipify-clang COMMAND ${CMAKE_COMMAND} . + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND rm -rf *.deb *.rpm *.tar.gz + COMMAND make package + COMMAND cp *.deb ${PROJECT_BINARY_DIR} + COMMAND cp *.rpm ${PROJECT_BINARY_DIR} + COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} + WORKING_DIRECTORY ${BUILD_DIR}) + endif() -endif() +endif() # if (NOT HIPIFY_CLANG_TESTS_ONLY) if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) - find_package(PythonInterp 2.7 REQUIRED) - - function (require_program PROGRAM_NAME) - find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) - if(FOUND_${PROGRAM_NAME}) - message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") - else() - message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS(_ONLY) to OFF to disable HIPIFY tests, or install the missing program.") - endif() - endfunction() - - require_program(lit) - require_program(FileCheck) - - find_package(CUDA REQUIRED) - if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR - (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR - (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR - (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR - (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR - (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR - (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0")) - message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.") - if(CUDA_VERSION_MAJOR VERSION_LESS "7") - message(STATUS "Please install CUDA 7.0 or higher.") - elseif(CUDA_VERSION_MAJOR VERSION_LESS "8") - message(STATUS "Please install LLVM + clang 3.8 or higher.") - elseif(CUDA_VERSION_MAJOR VERSION_LESS "9") - message(STATUS "Please install LLVM + clang 4.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "9.0") - message(STATUS "Please install LLVM + clang 6.0 or higher.") - elseif(CUDA_VERSION_MAJOR VERSION_LESS "10") - message(STATUS "Please install LLVM + clang 7.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "10.0") - message(STATUS "Please install LLVM + clang 8.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "10.1") - message(STATUS "Please install LLVM + clang 9.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "10.2" OR CUDA_VERSION VERSION_EQUAL "11.0") - message(STATUS "Please install LLVM + clang 10.0 or higher.") - endif() + find_package(PythonInterp 2.7 REQUIRED) + + function (require_program PROGRAM_NAME) + find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) + if(FOUND_${PROGRAM_NAME}) + message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") + else() + message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS(_ONLY) to OFF to disable HIPIFY tests, or install the missing program.") endif() + endfunction() + + require_program(lit) + require_program(FileCheck) + + find_package(CUDA REQUIRED) + if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR + (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR + (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR + (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR + (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR + (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR + (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0")) + message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.") + if(CUDA_VERSION_MAJOR VERSION_LESS "7") + message(STATUS "Please install CUDA 7.0 or higher.") + elseif(CUDA_VERSION_MAJOR VERSION_LESS "8") + message(STATUS "Please install LLVM + clang 3.8 or higher.") + elseif(CUDA_VERSION_MAJOR VERSION_LESS "9") + message(STATUS "Please install LLVM + clang 4.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "9.0") + message(STATUS "Please install LLVM + clang 6.0 or higher.") + elseif(CUDA_VERSION_MAJOR VERSION_LESS "10") + message(STATUS "Please install LLVM + clang 7.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "10.0") + message(STATUS "Please install LLVM + clang 8.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "10.1") + message(STATUS "Please install LLVM + clang 9.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "10.2" OR CUDA_VERSION VERSION_EQUAL "11.0") + message(STATUS "Please install LLVM + clang 10.0 or higher.") + endif() + endif() - configure_file( - ${CMAKE_CURRENT_LIST_DIR}/tests/lit.site.cfg.in - ${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg - @ONLY) + configure_file( + ${CMAKE_CURRENT_LIST_DIR}/tests/lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg + @ONLY) -if(HIPIFY_CLANG_TESTS_ONLY) + if(HIPIFY_CLANG_TESTS_ONLY) add_lit_testsuite(test-hipify "Running HIPIFY regression tests" - ${CMAKE_CURRENT_LIST_DIR}/tests - PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg - ARGS -v) -else() + ${CMAKE_CURRENT_LIST_DIR}/tests + PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg + ARGS -v) + else() add_lit_testsuite(test-hipify "Running HIPIFY regression tests" - ${CMAKE_CURRENT_LIST_DIR}/tests - PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg - ARGS -v - DEPENDS hipify-clang) -endif() + ${CMAKE_CURRENT_LIST_DIR}/tests + PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg + ARGS -v + DEPENDS hipify-clang) + endif() - add_custom_target(test-hipify-clang) - add_dependencies(test-hipify-clang test-hipify) - set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests") -endif() + add_custom_target(test-hipify-clang) + add_dependencies(test-hipify-clang test-hipify) + set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests") + +endif() # if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) diff --git a/bin/hipify-perl b/bin/hipify-perl index a066031b..b090469f 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1173,6 +1173,7 @@ sub rocSubstitutions { subst("cublasGemmEx", "rocblas_gemm_ex", "library"); subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library"); subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library"); + subst("cublasGetMathMode", "rocblas_get_math_mode", "library"); subst("cublasGetMatrix", "rocblas_get_matrix", "library"); subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library"); subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library"); @@ -1219,6 +1220,7 @@ sub rocSubstitutions { subst("cublasSdot", "rocblas_sdot", "library"); subst("cublasSdot_v2", "rocblas_sdot", "library"); subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library"); + subst("cublasSetMathMode", "rocblas_set_math_mode", "library"); subst("cublasSetMatrix", "rocblas_set_matrix", "library"); subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library"); subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library"); @@ -1476,6 +1478,7 @@ sub rocSubstitutions { subst("cusparseCreateDnMat", "rocsparse_create_dnmat_descr", "library"); subst("cusparseCreateDnVec", "rocsparse_create_dnvec_descr", "library"); subst("cusparseCreateHybMat", "rocsparse_create_hyb_mat", "library"); + subst("cusparseCreateIdentityPermutation", "rocsparse_create_identity_permutation", "library"); subst("cusparseCreateMatDescr", "rocsparse_create_mat_descr", "library"); subst("cusparseCreateSpVec", "rocsparse_create_spvec_descr", "library"); subst("cusparseCscSetPointers", "rocsparse_csc_set_pointers", "library"); @@ -1503,6 +1506,7 @@ sub rocSubstitutions { subst("cusparseDnVecGet", "rocsparse_dnvec_get", "library"); subst("cusparseDnVecGetValues", "rocsparse_dnvec_get_values", "library"); subst("cusparseDnVecSetValues", "rocsparse_dnvec_set_values", "library"); + subst("cusparseDpruneCsr2csrByPercentage", "rocsparse_dprune_csr2csr_by_percentage", "library"); subst("cusparseGather", "rocsparse_gather", "library"); subst("cusparseGetMatDiagType", "rocsparse_get_mat_diag_type", "library"); subst("cusparseGetMatFillMode", "rocsparse_get_mat_fill_mode", "library"); @@ -1541,8 +1545,14 @@ sub rocSubstitutions { subst("cusparseSpVecGetIndexBase", "rocsparse_spvec_get_index_base", "library"); subst("cusparseSpVecGetValues", "rocsparse_spvec_get_values", "library"); subst("cusparseSpVecSetValues", "rocsparse_spvec_set_values", "library"); + subst("cusparseXcoo2csr", "rocsparse_coo2csr", "library"); subst("cusparseXcoosortByColumn", "rocsparse_coosort_by_column", "library"); subst("cusparseXcoosortByRow", "rocsparse_coosort_by_row", "library"); + subst("cusparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", "library"); + subst("cusparseXcscsort", "rocsparse_cscsort", "library"); + subst("cusparseXcscsort_bufferSizeExt", "rocsparse_cscsort_buffer_size", "library"); + subst("cusparseXcsrsort", "rocsparse_csrsort", "library"); + subst("cusparseXcsrsort_bufferSizeExt", "rocsparse_csrsort_buffer_size", "library"); subst("cusparseXgebsr2gebsrNnz", "rocsparse_gebsr2gebsr_nnz", "library"); subst("cusparseZbsr2csr", "rocsparse_zbsr2csr", "library"); subst("cusparseZcsrcolor", "rocsparse_zcsrcolor", "library"); @@ -1555,12 +1565,14 @@ sub rocSubstitutions { subst("cuDoubleComplex", "rocblas_double_complex", "type"); subst("cuFloatComplex", "rocblas_float_complex", "type"); subst("cublasAtomicsMode_t", "rocblas_atomics_mode", "type"); + subst("cublasComputeType_t", "rocblas_computetype", "type"); subst("cublasContext", "_rocblas_handle", "type"); subst("cublasDataType_t", "rocblas_datatype", "type"); subst("cublasDiagType_t", "rocblas_diagonal", "type"); subst("cublasFillMode_t", "rocblas_fill", "type"); subst("cublasGemmAlgo_t", "rocblas_gemm_algo", "type"); subst("cublasHandle_t", "rocblas_handle", "type"); + subst("cublasMath_t", "rocblas_math_mode", "type"); subst("cublasOperation_t", "rocblas_operation", "type"); subst("cublasPointerMode_t", "rocblas_pointer_mode", "type"); subst("cublasSideMode_t", "rocblas_side", "type"); @@ -1644,8 +1656,12 @@ sub rocSubstitutions { subst("cusparseSpVecDescr_t", "rocsparse_spvec_descr", "type"); subst("cusparseSparseToDenseAlg_t", "rocsparse_sparse_to_dense_alg", "type"); subst("cusparseStatus_t", "rocsparse_status", "type"); + subst("pruneInfo", "_rocsparse_mat_info", "type"); + subst("pruneInfo_t", "rocsparse_mat_info", "type"); subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal"); subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F", "rocblas_compute_type_f32", "numeric_literal"); + subst("CUBLAS_DEFAULT_MATH", "rocblas_default_math", "numeric_literal"); subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal"); subst("CUBLAS_DIAG_UNIT", "rocblas_diagonal_unit", "numeric_literal"); subst("CUBLAS_FILL_MODE_FULL", "rocblas_fill_full", "numeric_literal"); @@ -1670,6 +1686,7 @@ sub rocSubstitutions { subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal"); subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal"); subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal"); + subst("CUBLAS_TF32_TENSOR_OP_MATH", "rocblas_xf32_xdl_math_op", "numeric_literal"); subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal"); subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal"); subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal"); @@ -4059,6 +4076,7 @@ sub simpleSubstitutions { subst("cusparseStatus_t", "hipsparseStatus_t", "type"); subst("nvrtcProgram", "hiprtcProgram", "type"); subst("nvrtcResult", "hiprtcResult", "type"); + subst("pruneInfo", "pruneInfo", "type"); subst("pruneInfo_t", "pruneInfo_t", "type"); subst("surfaceReference", "surfaceReference", "type"); subst("texture", "texture", "type"); @@ -5880,6 +5898,8 @@ sub warnUnsupportedDeviceFunctions { "mulhi", "mul64hi", "mul24", + "make_half2", + "make_bfloat162", "llmin", "llmax", "int_as_float", @@ -6061,6 +6081,8 @@ sub warnUnsupportedDeviceFunctions { "__heq2_mask", "__hcmadd", "__halves2bfloat162", + "__half2uchar_rz", + "__half2char_rz", "__hadd_rn", "__hadd2_rn", "__fsub_rz", @@ -6088,6 +6110,7 @@ sub warnUnsupportedDeviceFunctions { "__float2bfloat16_rd", "__float2bfloat162_rn", "__float2bfloat16", + "__float22bfloat162_rn", "__finitel", "__finitef", "__finite", @@ -6132,6 +6155,7 @@ sub warnUnsupportedDeviceFunctions { "__bfloat162uint_ru", "__bfloat162uint_rn", "__bfloat162uint_rd", + "__bfloat162uchar_rz", "__bfloat162short_rz", "__bfloat162short_ru", "__bfloat162short_rn", @@ -6145,6 +6169,7 @@ sub warnUnsupportedDeviceFunctions { "__bfloat162int_rn", "__bfloat162int_rd", "__bfloat162float", + "__bfloat162char_rz", "__bfloat162bfloat162", "__bfloat1622float2", "_Pow_int" @@ -6218,7 +6243,6 @@ sub warnUnsupportedFunctions { my $line_num = shift; my $k = 0; foreach $func ( - "pruneInfo", "nvrtcGetSupportedArchs", "nvrtcGetOptiXIRSize", "nvrtcGetOptiXIR", @@ -6764,14 +6788,27 @@ sub warnUnsupportedFunctions { "cudaOccupancyAvailableDynamicSMemPerBlock", "cudaNvSciSyncAttrWait", "cudaNvSciSyncAttrSignal", + "cudaMemsetParamsV2", "cudaMemoryTypeUnregistered", "cudaMemcpyToArrayAsync", + "cudaMemcpyNodeParams", "cudaMemcpyFromArrayAsync", "cudaMemcpyArrayToArray", "cudaMemcpy3DPeerParms", "cudaMemcpy3DPeerAsync", "cudaMemcpy3DPeer", "cudaMemcpy2DArrayToArray", + "cudaMemRangeAttributePreferredLocationType", + "cudaMemRangeAttributePreferredLocationId", + "cudaMemRangeAttributeLastPrefetchLocationType", + "cudaMemRangeAttributeLastPrefetchLocationId", + "cudaMemPrefetchAsync_v2", + "cudaMemLocationTypeHostNumaCurrent", + "cudaMemLocationTypeHostNuma", + "cudaMemLocationTypeHost", + "cudaMemFreeNodeParams", + "cudaMemAllocNodeParamsV2", + "cudaMemAdvise_v2", "cudaLimitPersistingL2CacheSize", "cudaLimitMaxL2FetchGranularity", "cudaLimitDevRuntimeSyncDepth", @@ -6801,6 +6838,7 @@ sub warnUnsupportedFunctions { "cudaLaunchAttribute", "cudaKeyValuePair", "cudaKernel_t", + "cudaKernelNodeParamsV2", "cudaKernelNodeAttributePriority", "cudaKernelNodeAttributeMemSyncDomainMap", "cudaKernelNodeAttributeMemSyncDomain", @@ -6808,6 +6846,7 @@ sub warnUnsupportedFunctions { "cudaKernelNodeAttributeClusterDimension", "cudaInitDeviceFlagsAreValid", "cudaInitDevice", + "cudaHostNodeParamsV2", "cudaGraphicsVDPAURegisterVideoSurface", "cudaGraphicsVDPAURegisterOutputSurface", "cudaGraphicsResourceSetMapFlags", @@ -6828,6 +6867,8 @@ sub warnUnsupportedFunctions { "cudaGraphicsCubeFaceNegativeY", "cudaGraphicsCubeFaceNegativeX", "cudaGraphicsCubeFace", + "cudaGraphNodeSetParams", + "cudaGraphNodeParams", "cudaGraphInstantiateWithParams", "cudaGraphInstantiateSuccess", "cudaGraphInstantiateResult", @@ -6844,9 +6885,11 @@ sub warnUnsupportedFunctions { "cudaGraphExecUpdateResultInfo_st", "cudaGraphExecUpdateResultInfo", "cudaGraphExecUpdateErrorAttributesChanged", + "cudaGraphExecNodeSetParams", "cudaGraphExecGetFlags", "cudaGraphExecExternalSemaphoresWaitNodeSetParams", "cudaGraphExecExternalSemaphoresSignalNodeSetParams", + "cudaGraphAddNode", "cudaGraphAddExternalSemaphoresWaitNode", "cudaGraphAddExternalSemaphoresSignalNode", "cudaGetTextureObjectTextureDesc_v2", @@ -6891,8 +6934,10 @@ sub warnUnsupportedFunctions { "cudaFlushGPUDirectRDMAWritesOptionMemOps", "cudaFlushGPUDirectRDMAWritesOptionHost", "cudaExternalSemaphoreWaitSkipNvSciBufMemSync", + "cudaExternalSemaphoreWaitNodeParamsV2", "cudaExternalSemaphoreWaitNodeParams", "cudaExternalSemaphoreSignalSkipNvSciBufMemSync", + "cudaExternalSemaphoreSignalNodeParamsV2", "cudaExternalSemaphoreSignalNodeParams", "cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32", "cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd", @@ -6903,9 +6948,11 @@ sub warnUnsupportedFunctions { "cudaExternalMemoryMipmappedArrayDesc", "cudaExternalMemoryHandleTypeNvSciBuf", "cudaExternalMemoryGetMappedMipmappedArray", + "cudaEventWaitNodeParams", "cudaEventWaitExternal", "cudaEventWaitDefault", "cudaEventRecordWithFlags", + "cudaEventRecordNodeParams", "cudaEventRecordExternal", "cudaEventRecordDefault", "cudaEventCreateFromEGLSync", @@ -7069,6 +7116,9 @@ sub warnUnsupportedFunctions { "cudaDriverEntryPointQueryResult", "cudaDeviceSyncMemops", "cudaDevicePropDontCare", + "cudaDeviceNumaConfigNumaNode", + "cudaDeviceNumaConfigNone", + "cudaDeviceNumaConfig", "cudaDeviceMask", "cudaDeviceGetTexture1DLinearMaxWidth", "cudaDeviceGetNvSciSyncAttributes", @@ -7085,6 +7135,8 @@ sub warnUnsupportedFunctions { "cudaDevAttrReserved124", "cudaDevAttrReserved123", "cudaDevAttrReserved122", + "cudaDevAttrNumaId", + "cudaDevAttrNumaConfig", "cudaDevAttrMemoryPoolSupportedHandleTypes", "cudaDevAttrMemSyncDomainCount", "cudaDevAttrMaxTimelineSemaphoreInteropSupported", @@ -7100,6 +7152,7 @@ sub warnUnsupportedFunctions { "cudaDevAttrIpcEventSupport", "cudaDevAttrHostRegisterSupported", "cudaDevAttrHostRegisterReadOnlySupported", + "cudaDevAttrHostNumaId", "cudaDevAttrGPUDirectRDMAWritesOrdering", "cudaDevAttrGPUDirectRDMASupported", "cudaDevAttrGPUDirectRDMAFlushWritesOptions", @@ -7170,6 +7223,7 @@ sub warnUnsupportedFunctions { "cudaClusterSchedulingPolicyLoadBalancing", "cudaClusterSchedulingPolicyDefault", "cudaClusterSchedulingPolicy", + "cudaChildGraphNodeParams", "cudaChannelFormatKindUnsignedNormalized8X4", "cudaChannelFormatKindUnsignedNormalized8X2", "cudaChannelFormatKindUnsignedNormalized8X1", @@ -7278,7 +7332,9 @@ sub warnUnsupportedFunctions { "cuMemcpy3DPeerAsync", "cuMemcpy3DPeer", "cuMemcpy", + "cuMemPrefetchAsync_v2", "cuMemGetHandleForAddressRange", + "cuMemAdvise_v2", "cuLibraryUnload", "cuLibraryLoadFromFile", "cuLibraryLoadData", @@ -7305,11 +7361,13 @@ sub warnUnsupportedFunctions { "cuGraphicsD3D9RegisterResource", "cuGraphicsD3D11RegisterResource", "cuGraphicsD3D10RegisterResource", + "cuGraphNodeSetParams", "cuGraphInstantiateWithParams", "cuGraphExternalSemaphoresWaitNodeSetParams", "cuGraphExternalSemaphoresWaitNodeGetParams", "cuGraphExternalSemaphoresSignalNodeSetParams", "cuGraphExternalSemaphoresSignalNodeGetParams", + "cuGraphExecNodeSetParams", "cuGraphExecMemsetNodeSetParams", "cuGraphExecMemcpyNodeSetParams", "cuGraphExecGetFlags", @@ -7318,6 +7376,7 @@ sub warnUnsupportedFunctions { "cuGraphExecBatchMemOpNodeSetParams", "cuGraphBatchMemOpNodeSetParams", "cuGraphBatchMemOpNodeGetParams", + "cuGraphAddNode", "cuGraphAddMemsetNode", "cuGraphAddMemcpyNode", "cuGraphAddExternalSemaphoresWaitNode", @@ -7882,6 +7941,8 @@ sub warnUnsupportedFunctions { "CU_EGL_COLOR_FORMAT_ARGB", "CU_EGL_COLOR_FORMAT_ABGR", "CU_EGL_COLOR_FORMAT_A", + "CU_DEVICE_NUMA_CONFIG_NUMA_NODE", + "CU_DEVICE_NUMA_CONFIG_NONE", "CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", "CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS", "CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED", @@ -9524,7 +9585,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetSmCountTarget", "cublasSetMatrix_64", "cublasSetMatrixAsync_64", - "cublasSetMathMode", "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSdot_v2_64", @@ -9548,7 +9608,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasRotEx_64", "cublasNrm2Ex_64", "cublasMigrateComputeType", - "cublasMath_t", "cublasLoggerConfigure", "cublasLogCallback", "cublasIzamin_v2_64", @@ -9591,7 +9650,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetProperty", "cublasGetMatrix_64", "cublasGetMatrixAsync_64", - "cublasGetMathMode", "cublasGetLoggerCallback", "cublasGetError", "cublasGetCudartVersion", @@ -9818,7 +9876,6 @@ sub warnRocOnlyUnsupportedFunctions { "CUDA_C_4I", "CUDA_C_16U", "CUDA_C_16I", - "CUBLAS_TF32_TENSOR_OP_MATH", "CUBLAS_TENSOR_OP_MATH", "CUBLAS_STATUS_LICENSE_ERROR", "CUBLAS_PEDANTIC_MATH", @@ -9866,7 +9923,6 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLAS_GEMM_ALGO1", "CUBLAS_GEMM_ALGO0_TENSOR_OP", "CUBLAS_GEMM_ALGO0", - "CUBLAS_DEFAULT_MATH", "CUBLAS_COMPUTE_64F_PEDANTIC", "CUBLAS_COMPUTE_64F", "CUBLAS_COMPUTE_32I_PEDANTIC", @@ -9875,7 +9931,6 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLAS_COMPUTE_32F_FAST_TF32", "CUBLAS_COMPUTE_32F_FAST_16F", "CUBLAS_COMPUTE_32F_FAST_16BF", - "CUBLAS_COMPUTE_32F", "CUBLAS_COMPUTE_16F_PEDANTIC", "CUBLAS_COMPUTE_16F" ) diff --git a/docs/.sphinx/requirements.in b/docs/.sphinx/requirements.in index a4f5c674..e53f3df1 100644 --- a/docs/.sphinx/requirements.in +++ b/docs/.sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core==0.18.4 +rocm-docs-core==0.19.0 diff --git a/docs/.sphinx/requirements.txt b/docs/.sphinx/requirements.txt index 0a276bca..ee9448ee 100644 --- a/docs/.sphinx/requirements.txt +++ b/docs/.sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.28.2 # via # pygithub # sphinx -rocm-docs-core==0.18.4 +rocm-docs-core==0.19.0 # via -r requirements.in smmap==5.0.0 # via gitdb diff --git a/docs/hipify-clang.md b/docs/hipify-clang.md index eb213a33..4e437667 100644 --- a/docs/hipify-clang.md +++ b/docs/hipify-clang.md @@ -23,7 +23,7 @@ After applying all the matchers, the output HIP source is produced. 1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**16.0.6**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-16.0.6). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**12.1.1**](https://developer.nvidia.com/cuda-downloads). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**12.2.0**](https://developer.nvidia.com/cuda-downloads). @@ -169,12 +169,12 @@ After applying all the matchers, the output HIP source is produced. 16.0.4, 16.0.5,
16.0.6 - + - + @@ -199,14 +199,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.1 -I /usr/local/cuda-12.1/samples/common/inc +./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.2 -I /usr/local/cuda-12.2/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you were compiling the input file. For example: ```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.1 -- -std=c++17 +./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.2 -- -std=c++17 ``` The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. @@ -327,9 +327,9 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1"` + - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.1"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.2"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. @@ -389,7 +389,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 16.0.6, CUDA 8.0 - 12.1.1, cuDNN 5.1.10 - 8.9.2 +Ubuntu 20-21: LLVM 9.0.0 - 16.0.6, CUDA 8.0 - 12.2.0, cuDNN 5.1.10 - 8.9.2 Minimum build system requirements for the above configurations: @@ -443,7 +443,7 @@ cmake -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE --- Found CUDA: /usr/local/cuda (found version "12.1") +-- Found CUDA: /usr/local/cuda (found version "12.2") -- Configuring done -- Generating done -- Build files have been written to: /usr/hipify/build @@ -457,7 +457,7 @@ make test-hipify ```shell Running HIPify regression tests ======================================== -CUDA 12.1 - will be used for testing +CUDA 12.2 - will be used for testing LLVM 16.0.6 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS @@ -576,8 +576,8 @@ Testing Time: 7.90s | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | | 15.0.0 - 15.0.7 | 7.0 - 11.8.0 | 8.0.5 - 8.8.1 | 2017.15.9.53, 2019.16.11.25, 2022.17.5.2 | 3.26.0 | 3.11.2 | -| 16.0.0 - 16.0.6 | 7.0 - 12.1.1 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | -| 17.0.0git | 7.0 - 12.1.1 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | +| 16.0.0 - 16.0.6 | 7.0 - 12.2.0 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | +| 17.0.0git | 7.0 - 12.2.0 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -590,9 +590,9 @@ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=d:/LLVM/16.0.6/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.1" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-12.1-windows-x64-v8.9.2 \ + -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2" \ + -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.2" \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-12.2-windows-x64-v8.9.2 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ -DLLVM_EXTERNAL_LIT=d:/LLVM/16.0.6/build/Release/bin/llvm-lit.py \ ../hipify @@ -606,7 +606,7 @@ cmake -- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.4", minimum required is "3.6") -- Found lit: c:/Program Files/Python311/Scripts/lit.exe -- Found FileCheck: d:/LLVM/16.0.6/dist/bin/FileCheck.exe --- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1 (found version "12.1") +-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2 (found version "12.2") -- Configuring done -- Generating done -- Build files have been written to: d:/hipify/build diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 2c350b76..b0850b1d 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -182,7 +182,7 @@ |`cublasSetAtomicsMode`| | | |`hipblasSetAtomicsMode`|3.10.0| | | | |`cublasSetKernelStream`| | | | | | | | | |`cublasSetLoggerCallback`|9.2| | | | | | | | -|`cublasSetMathMode`| | | | | | | | | +|`cublasSetMathMode`|9.0| | | | | | | | |`cublasSetMatrix`| | | |`hipblasSetMatrix`|1.8.2| | | | |`cublasSetMatrixAsync`| | | |`hipblasSetMatrixAsync`|3.7.0| | | | |`cublasSetMatrixAsync_64`|12.0| | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 8c232a5c..282b2eca 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -8,7 +8,7 @@ |`CUBLAS_ATOMICS_NOT_ALLOWED`| | | |`HIPBLAS_ATOMICS_NOT_ALLOWED`|3.10.0| | | |`rocblas_atomics_not_allowed`|3.8.0| | | | |`CUBLAS_COMPUTE_16F`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_16F_PEDANTIC`|11.0| | | | | | | | | | | | | -|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | | | | | | | +|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | |`rocblas_compute_type_f32`|5.7.0| | | | |`CUBLAS_COMPUTE_32F_FAST_16BF`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_16F`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_TF32`|11.0| | | | | | | | | | | | | @@ -17,7 +17,7 @@ |`CUBLAS_COMPUTE_32I_PEDANTIC`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_64F`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_64F_PEDANTIC`|11.0| | | | | | | | | | | | | -|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | | | | | | | +|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | |`rocblas_default_math`|5.7.0| | | | |`CUBLAS_DIAG_NON_UNIT`| | | |`HIPBLAS_DIAG_NON_UNIT`|1.8.2| | | |`rocblas_diagonal_non_unit`|1.5.0| | | | |`CUBLAS_DIAG_UNIT`| | | |`HIPBLAS_DIAG_UNIT`|1.8.2| | | |`rocblas_diagonal_unit`|1.5.0| | | | |`CUBLAS_FILL_MODE_FULL`|10.1| | |`HIPBLAS_FILL_MODE_FULL`|1.8.2| | | |`rocblas_fill_full`|1.5.0| | | | @@ -89,16 +89,16 @@ |`CUBLAS_STATUS_NOT_SUPPORTED`| | | |`HIPBLAS_STATUS_NOT_SUPPORTED`|1.8.2| | | |`rocblas_status_perf_degraded`|3.5.0| | | | |`CUBLAS_STATUS_SUCCESS`| | | |`HIPBLAS_STATUS_SUCCESS`|1.8.2| | | |`rocblas_status_success`|1.5.0| | | | |`CUBLAS_TENSOR_OP_MATH`|9.0|11.0| | | | | | | | | | | | -|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | | | | | | | +|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | |`rocblas_xf32_xdl_math_op`|5.7.0| | | | |`cublasAtomicsMode_t`| | | |`hipblasAtomicsMode_t`|3.10.0| | | |`rocblas_atomics_mode`|3.8.0| | | | -|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | | | | | | | +|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | |`rocblas_computetype`|5.7.0| | | | |`cublasContext`| | | | | | | | |`_rocblas_handle`|1.5.0| | | | |`cublasDataType_t`|7.5| | |`hipblasDatatype_t`|1.8.2| | | |`rocblas_datatype`|1.8.2| | | | |`cublasDiagType_t`| | | |`hipblasDiagType_t`|1.8.2| | | |`rocblas_diagonal`|1.5.0| | | | |`cublasFillMode_t`| | | |`hipblasFillMode_t`|1.8.2| | | |`rocblas_fill`|1.5.0| | | | |`cublasGemmAlgo_t`|8.0| | |`hipblasGemmAlgo_t`|1.8.2| | | |`rocblas_gemm_algo`|1.8.2| | | | |`cublasHandle_t`| | | |`hipblasHandle_t`|3.0.0| | | |`rocblas_handle`|1.5.0| | | | -|`cublasMath_t`|9.0| | | | | | | | | | | | | +|`cublasMath_t`|9.0| | | | | | | |`rocblas_math_mode`|5.7.0| | | | |`cublasOperation_t`| | | |`hipblasOperation_t`|1.8.2| | | |`rocblas_operation`|1.5.0| | | | |`cublasPointerMode_t`| | | |`hipblasPointerMode_t`|1.8.2| | | |`rocblas_pointer_mode`|1.6.0| | | | |`cublasSideMode_t`| | | |`hipblasSideMode_t`|1.8.2| | | |`rocblas_side`|1.5.0| | | | @@ -156,7 +156,7 @@ |`cublasGetCudartVersion`|10.1| | | | | | | | | | | | | |`cublasGetError`| | | | | | | | | | | | | | |`cublasGetLoggerCallback`|9.2| | | | | | | | | | | | | -|`cublasGetMathMode`|9.0| | | | | | | | | | | | | +|`cublasGetMathMode`|9.0| | | | | | | |`rocblas_get_math_mode`|5.7.0| | | | |`cublasGetMatrix`| | | |`hipblasGetMatrix`|1.8.2| | | |`rocblas_get_matrix`|1.6.0| | | | |`cublasGetMatrixAsync`| | | |`hipblasGetMatrixAsync`|3.7.0| | | |`rocblas_get_matrix_async`|3.5.0| | | | |`cublasGetMatrixAsync_64`|12.0| | | | | | | | | | | | | @@ -182,7 +182,7 @@ |`cublasSetAtomicsMode`| | | |`hipblasSetAtomicsMode`|3.10.0| | | |`rocblas_set_atomics_mode`|3.8.0| | | | |`cublasSetKernelStream`| | | | | | | | | | | | | | |`cublasSetLoggerCallback`|9.2| | | | | | | | | | | | | -|`cublasSetMathMode`| | | | | | | | | | | | | | +|`cublasSetMathMode`|9.0| | | | | | | |`rocblas_set_math_mode`|5.7.0| | | | |`cublasSetMatrix`| | | |`hipblasSetMatrix`|1.8.2| | | |`rocblas_set_matrix`|1.6.0| | | | |`cublasSetMatrixAsync`| | | |`hipblasSetMatrixAsync`|3.7.0| | | |`rocblas_set_matrix_async`|3.5.0| | | | |`cublasSetMatrixAsync_64`|12.0| | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index c1ab92b9..8d89c7ca 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -8,7 +8,7 @@ |`CUBLAS_ATOMICS_NOT_ALLOWED`| | | |`rocblas_atomics_not_allowed`|3.8.0| | | | |`CUBLAS_COMPUTE_16F`|11.0| | | | | | | | |`CUBLAS_COMPUTE_16F_PEDANTIC`|11.0| | | | | | | | -|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | | +|`CUBLAS_COMPUTE_32F`|11.0| | |`rocblas_compute_type_f32`|5.7.0| | | | |`CUBLAS_COMPUTE_32F_FAST_16BF`|11.0| | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_16F`|11.0| | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_TF32`|11.0| | | | | | | | @@ -17,7 +17,7 @@ |`CUBLAS_COMPUTE_32I_PEDANTIC`|11.0| | | | | | | | |`CUBLAS_COMPUTE_64F`|11.0| | | | | | | | |`CUBLAS_COMPUTE_64F_PEDANTIC`|11.0| | | | | | | | -|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | | +|`CUBLAS_DEFAULT_MATH`|9.0| | |`rocblas_default_math`|5.7.0| | | | |`CUBLAS_DIAG_NON_UNIT`| | | |`rocblas_diagonal_non_unit`|1.5.0| | | | |`CUBLAS_DIAG_UNIT`| | | |`rocblas_diagonal_unit`|1.5.0| | | | |`CUBLAS_FILL_MODE_FULL`|10.1| | |`rocblas_fill_full`|1.5.0| | | | @@ -89,16 +89,16 @@ |`CUBLAS_STATUS_NOT_SUPPORTED`| | | |`rocblas_status_perf_degraded`|3.5.0| | | | |`CUBLAS_STATUS_SUCCESS`| | | |`rocblas_status_success`|1.5.0| | | | |`CUBLAS_TENSOR_OP_MATH`|9.0|11.0| | | | | | | -|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | | +|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | |`rocblas_xf32_xdl_math_op`|5.7.0| | | | |`cublasAtomicsMode_t`| | | |`rocblas_atomics_mode`|3.8.0| | | | -|`cublasComputeType_t`|11.0| | | | | | | | +|`cublasComputeType_t`|11.0| | |`rocblas_computetype`|5.7.0| | | | |`cublasContext`| | | |`_rocblas_handle`|1.5.0| | | | |`cublasDataType_t`|7.5| | |`rocblas_datatype`|1.8.2| | | | |`cublasDiagType_t`| | | |`rocblas_diagonal`|1.5.0| | | | |`cublasFillMode_t`| | | |`rocblas_fill`|1.5.0| | | | |`cublasGemmAlgo_t`|8.0| | |`rocblas_gemm_algo`|1.8.2| | | | |`cublasHandle_t`| | | |`rocblas_handle`|1.5.0| | | | -|`cublasMath_t`|9.0| | | | | | | | +|`cublasMath_t`|9.0| | |`rocblas_math_mode`|5.7.0| | | | |`cublasOperation_t`| | | |`rocblas_operation`|1.5.0| | | | |`cublasPointerMode_t`| | | |`rocblas_pointer_mode`|1.6.0| | | | |`cublasSideMode_t`| | | |`rocblas_side`|1.5.0| | | | @@ -156,7 +156,7 @@ |`cublasGetCudartVersion`|10.1| | | | | | | | |`cublasGetError`| | | | | | | | | |`cublasGetLoggerCallback`|9.2| | | | | | | | -|`cublasGetMathMode`|9.0| | | | | | | | +|`cublasGetMathMode`|9.0| | |`rocblas_get_math_mode`|5.7.0| | | | |`cublasGetMatrix`| | | |`rocblas_get_matrix`|1.6.0| | | | |`cublasGetMatrixAsync`| | | |`rocblas_get_matrix_async`|3.5.0| | | | |`cublasGetMatrixAsync_64`|12.0| | | | | | | | @@ -182,7 +182,7 @@ |`cublasSetAtomicsMode`| | | |`rocblas_set_atomics_mode`|3.8.0| | | | |`cublasSetKernelStream`| | | | | | | | | |`cublasSetLoggerCallback`|9.2| | | | | | | | -|`cublasSetMathMode`| | | | | | | | | +|`cublasSetMathMode`|9.0| | |`rocblas_set_math_mode`|5.7.0| | | | |`cublasSetMatrix`| | | |`rocblas_set_matrix`|1.6.0| | | | |`cublasSetMatrixAsync`| | | |`rocblas_set_matrix_async`|3.5.0| | | | |`cublasSetMatrixAsync_64`|12.0| | | | | | | | diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md index a44ed3a9..a38bc6b4 100644 --- a/docs/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md @@ -12,6 +12,7 @@ |`__ballot`| | | |`__ballot`|1.6.0| | | | |`__bfloat1622float2`|11.0| | | | | | | | |`__bfloat162bfloat162`|11.0| | | | | | | | +|`__bfloat162char_rz`|12.2| | | | | | | | |`__bfloat162float`|11.0| | | | | | | | |`__bfloat162int_rd`|11.0| | | | | | | | |`__bfloat162int_rn`|11.0| | | | | | | | @@ -25,6 +26,7 @@ |`__bfloat162short_rn`|11.0| | | | | | | | |`__bfloat162short_ru`|11.0| | | | | | | | |`__bfloat162short_rz`|11.0| | | | | | | | +|`__bfloat162uchar_rz`|12.2| | | | | | | | |`__bfloat162uint_rd`|11.0| | | | | | | | |`__bfloat162uint_rn`|11.0| | | | | | | | |`__bfloat162uint_ru`|11.0| | | | | | | | @@ -111,6 +113,7 @@ |`__finite`| | | | | | | | | |`__finitef`| | | | | | | | | |`__finitel`| | | | | | | | | +|`__float22bfloat162_rn`|11.0| | | | | | | | |`__float22half2_rn`| | | |`__float22half2_rn`|1.6.0| | | | |`__float2bfloat16`|11.0| | | | | | | | |`__float2bfloat162_rn`|11.0| | | | | | | | @@ -183,6 +186,7 @@ |`__hadd_rn`|11.6| | | | | | | | |`__hadd_sat`| | | |`__hadd_sat`|1.6.0| | | | |`__half22float2`| | | |`__half22float2`|1.6.0| | | | +|`__half2char_rz`|12.2| | | | | | | | |`__half2float`| | | |`__half2float`|1.6.0| | | | |`__half2half2`| | | |`__half2half2`|1.9.0| | | | |`__half2int_rd`| | | |`__half2int_rd`|1.6.0| | | | @@ -197,6 +201,7 @@ |`__half2short_rn`| | | |`__half2short_rn`|1.6.0| | | | |`__half2short_ru`| | | |`__half2short_ru`|1.6.0| | | | |`__half2short_rz`| | | |`__half2short_rz`|1.6.0| | | | +|`__half2uchar_rz`|12.2| | | | | | | | |`__half2uint_rd`| | | |`__half2uint_rd`|1.6.0| | | | |`__half2uint_rn`| | | |`__half2uint_rn`|1.6.0| | | | |`__half2uint_ru`| | | |`__half2uint_ru`|1.6.0| | | | @@ -707,6 +712,8 @@ |`lrintf`| | | |`lrintf`|1.6.0| | | | |`lround`| | | |`lround`|1.6.0| | | | |`lroundf`| | | |`lroundf`|1.6.0| | | | +|`make_bfloat162`|12.2| | | | | | | | +|`make_half2`|12.2| | | | | | | | |`max`| | | |`max`|1.6.0| | | | |`min`| | | |`min`|1.6.0| | | | |`modf`| | | |`modf`|1.9.0| | | | diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index b43fa72e..dca42564 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -479,6 +479,8 @@ |`CU_DEVICE_ATTRIBUTE_WARP_SIZE`| | | |`hipDeviceAttributeWarpSize`|1.6.0| | | | |`CU_DEVICE_CPU`|8.0| | |`hipCpuDeviceId`|3.7.0| | | | |`CU_DEVICE_INVALID`|8.0| | |`hipInvalidDeviceId`|3.7.0| | | | +|`CU_DEVICE_NUMA_CONFIG_NONE`|12.2| | | | | | | | +|`CU_DEVICE_NUMA_CONFIG_NUMA_NODE`|12.2| | | | | | | | |`CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED`|10.1|10.1| |`hipDevP2PAttrHipArrayAccessSupported`|3.8.0| | | | |`CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED`|8.0| | |`hipDevP2PAttrAccessSupported`|3.8.0| | | | |`CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED`|9.2|10.0|10.1|`hipDevP2PAttrHipArrayAccessSupported`|3.8.0| | | | @@ -1588,9 +1590,9 @@ |`cuMemsetD8`| | | |`hipMemsetD8`|1.6.0| | | | |`cuMemsetD8Async`| | | |`hipMemsetD8Async`|3.0.0| | | | |`cuMemsetD8_v2`| | | |`hipMemsetD8`|1.6.0| | | | -|`cuMipmappedArrayCreate`| | | |`hipMipmappedArrayCreate`|3.5.0| | | | -|`cuMipmappedArrayDestroy`| | | |`hipMipmappedArrayDestroy`|3.5.0| | | | -|`cuMipmappedArrayGetLevel`| | | |`hipMipmappedArrayGetLevel`|3.5.0| | | | +|`cuMipmappedArrayCreate`| | | |`hipMipmappedArrayCreate`|3.5.0|5.7.0| | | +|`cuMipmappedArrayDestroy`| | | |`hipMipmappedArrayDestroy`|3.5.0|5.7.0| | | +|`cuMipmappedArrayGetLevel`| | | |`hipMipmappedArrayGetLevel`|3.5.0|5.7.0| | | |`cuMipmappedArrayGetMemoryRequirements`|11.6| | | | | | | | ## **14. Virtual Memory Management** @@ -1647,7 +1649,9 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| |`cuMemAdvise`|8.0| | |`hipMemAdvise`|3.7.0| | | | +|`cuMemAdvise_v2`|12.2| | | | | | | | |`cuMemPrefetchAsync`|8.0| | |`hipMemPrefetchAsync`|3.7.0| | | | +|`cuMemPrefetchAsync_v2`|12.2| | | | | | | | |`cuMemRangeGetAttribute`|8.0| | |`hipMemRangeGetAttribute`|3.7.0| | | | |`cuMemRangeGetAttributes`|8.0| | |`hipMemRangeGetAttributes`|3.7.0| | | | |`cuPointerGetAttribute`| | | |`hipPointerGetAttribute`|5.0.0| | | | @@ -1776,6 +1780,7 @@ |`cuGraphAddMemFreeNode`|11.4| | |`hipGraphAddMemFreeNode`|5.5.0| | | | |`cuGraphAddMemcpyNode`|10.0| | | | | | | | |`cuGraphAddMemsetNode`|10.0| | | | | | | | +|`cuGraphAddNode`|12.2| | | | | | | | |`cuGraphBatchMemOpNodeGetParams`|11.7| | | | | | | | |`cuGraphBatchMemOpNodeSetParams`|11.7| | | | | | | | |`cuGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | | @@ -1800,6 +1805,7 @@ |`cuGraphExecKernelNodeSetParams`|10.1| | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | | |`cuGraphExecMemcpyNodeSetParams`|10.2| | | | | | | | |`cuGraphExecMemsetNodeSetParams`|10.2| | | | | | | | +|`cuGraphExecNodeSetParams`|12.2| | | | | | | | |`cuGraphExecUpdate`|10.2| | |`hipGraphExecUpdate`|5.0.0| | | | |`cuGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | | | | | | |`cuGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | | | | | | @@ -1832,6 +1838,7 @@ |`cuGraphNodeGetEnabled`|11.6| | |`hipGraphNodeGetEnabled`|5.5.0| | | | |`cuGraphNodeGetType`|10.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cuGraphNodeSetEnabled`|11.6| | |`hipGraphNodeSetEnabled`|5.5.0| | | | +|`cuGraphNodeSetParams`|12.2| | | | | | | | |`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cuGraphRemoveDependencies`|10.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | |`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index eb93726b..4f8a4eb3 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -171,8 +171,10 @@ |`cudaMallocMipmappedArray`| | | |`hipMallocMipmappedArray`|3.5.0| | | | |`cudaMallocPitch`| | | |`hipMallocPitch`|1.6.0| | | | |`cudaMemAdvise`|8.0| | |`hipMemAdvise`|3.7.0| | | | +|`cudaMemAdvise_v2`|12.2| | | | | | | | |`cudaMemGetInfo`| | | |`hipMemGetInfo`|1.6.0| | | | |`cudaMemPrefetchAsync`|8.0| | |`hipMemPrefetchAsync`|3.7.0| | | | +|`cudaMemPrefetchAsync_v2`|12.2| | | | | | | | |`cudaMemRangeGetAttribute`|8.0| | |`hipMemRangeGetAttribute`|3.7.0| | | | |`cudaMemRangeGetAttributes`|8.0| | |`hipMemRangeGetAttributes`|3.7.0| | | | |`cudaMemcpy`| | | |`hipMemcpy`|1.5.0| | | | @@ -424,6 +426,7 @@ |`cudaGraphAddMemcpyNodeFromSymbol`|11.1| | |`hipGraphAddMemcpyNodeFromSymbol`|5.0.0| | | | |`cudaGraphAddMemcpyNodeToSymbol`|11.1| | |`hipGraphAddMemcpyNodeToSymbol`|5.0.0| | | | |`cudaGraphAddMemsetNode`|10.0| | |`hipGraphAddMemsetNode`|4.3.0| | | | +|`cudaGraphAddNode`|12.2| | | | | | | | |`cudaGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | | |`cudaGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | | |`cudaGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | | @@ -448,6 +451,7 @@ |`cudaGraphExecMemcpyNodeSetParamsFromSymbol`|11.1| | |`hipGraphExecMemcpyNodeSetParamsFromSymbol`|5.0.0| | | | |`cudaGraphExecMemcpyNodeSetParamsToSymbol`|11.1| | |`hipGraphExecMemcpyNodeSetParamsToSymbol`|5.0.0| | | | |`cudaGraphExecMemsetNodeSetParams`|11.0| | |`hipGraphExecMemsetNodeSetParams`|5.0.0| | | | +|`cudaGraphExecNodeSetParams`|12.2| | | | | | | | |`cudaGraphExecUpdate`|11.0| | |`hipGraphExecUpdate`|5.0.0| | | | |`cudaGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | | | | | | |`cudaGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | | | | | | @@ -482,6 +486,7 @@ |`cudaGraphNodeGetEnabled`|11.6| | |`hipGraphNodeGetEnabled`|5.5.0| | | | |`cudaGraphNodeGetType`|11.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cudaGraphNodeSetEnabled`|11.6| | |`hipGraphNodeSetEnabled`|5.5.0| | | | +|`cudaGraphNodeSetParams`|12.2| | | | | | | | |`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cudaGraphRemoveDependencies`|11.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | |`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | @@ -595,6 +600,7 @@ |`cudaChannelFormatKindUnsignedNormalized8X1`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X2`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X4`|11.5| | | | | | | | +|`cudaChildGraphNodeParams`|12.2| | | | | | | | |`cudaClusterSchedulingPolicy`|11.8| | | | | | | | |`cudaClusterSchedulingPolicyDefault`|11.8| | | | | | | | |`cudaClusterSchedulingPolicyLoadBalancing`|11.8| | | | | | | | @@ -657,6 +663,7 @@ |`cudaDevAttrGlobalMemoryBusWidth`| | | |`hipDeviceAttributeMemoryBusWidth`|1.6.0| | | | |`cudaDevAttrGpuOverlap`| | | |`hipDeviceAttributeAsyncEngineCount`|4.3.0| | | | |`cudaDevAttrHostNativeAtomicSupported`|8.0| | |`hipDeviceAttributeHostNativeAtomicSupported`|4.3.0| | | | +|`cudaDevAttrHostNumaId`|12.2| | | | | | | | |`cudaDevAttrHostRegisterReadOnlySupported`|11.1| | | | | | | | |`cudaDevAttrHostRegisterSupported`|9.2| | | | | | | | |`cudaDevAttrIntegrated`| | | |`hipDeviceAttributeIntegrated`|1.9.0| | | | @@ -731,6 +738,8 @@ |`cudaDevAttrMemoryPoolsSupported`|11.2| | |`hipDeviceAttributeMemoryPoolsSupported`|5.2.0| | | | |`cudaDevAttrMultiGpuBoardGroupID`| | | |`hipDeviceAttributeMultiGpuBoardGroupID`|5.0.0| | | | |`cudaDevAttrMultiProcessorCount`| | | |`hipDeviceAttributeMultiprocessorCount`|1.6.0| | | | +|`cudaDevAttrNumaConfig`|12.2| | | | | | | | +|`cudaDevAttrNumaId`|12.2| | | | | | | | |`cudaDevAttrPageableMemoryAccess`|8.0| | |`hipDeviceAttributePageableMemoryAccess`|3.10.0| | | | |`cudaDevAttrPageableMemoryAccessUsesHostPageTables`|9.2| | |`hipDeviceAttributePageableMemoryAccessUsesHostPageTables`|3.10.0| | | | |`cudaDevAttrPciBusId`| | | |`hipDeviceAttributePciBusId`|1.6.0| | | | @@ -767,6 +776,9 @@ |`cudaDeviceLmemResizeToMax`| | | |`hipDeviceLmemResizeToMax`|1.6.0| | | | |`cudaDeviceMapHost`| | | |`hipDeviceMapHost`|1.6.0| | | | |`cudaDeviceMask`| | | | | | | | | +|`cudaDeviceNumaConfig`|12.2| | | | | | | | +|`cudaDeviceNumaConfigNone`|12.2| | | | | | | | +|`cudaDeviceNumaConfigNumaNode`|12.2| | | | | | | | |`cudaDeviceP2PAttr`|8.0| | |`hipDeviceP2PAttr`|3.8.0| | | | |`cudaDeviceProp`| | | |`hipDeviceProp_t`|1.6.0| | | | |`cudaDevicePropDontCare`| | |12.0| | | | | | @@ -1002,8 +1014,10 @@ |`cudaEventInterprocess`| | | |`hipEventInterprocess`|1.6.0| | | | |`cudaEventRecordDefault`|11.1| | | | | | | | |`cudaEventRecordExternal`|11.1| | | | | | | | +|`cudaEventRecordNodeParams`|12.2| | | | | | | | |`cudaEventWaitDefault`|11.1| | | | | | | | |`cudaEventWaitExternal`| | | | | | | | | +|`cudaEventWaitNodeParams`|12.2| | | | | | | | |`cudaEvent_t`| | | |`hipEvent_t`|1.6.0| | | | |`cudaExtent`| | | |`hipExtent`|1.7.0| | | | |`cudaExternalMemoryBufferDesc`|10.0| | |`hipExternalMemoryBufferDesc`|4.3.0| | | | @@ -1033,10 +1047,12 @@ |`cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`|11.2| | | | | | | | |`cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32`|11.2| | | | | | | | |`cudaExternalSemaphoreSignalNodeParams`|11.2| | | | | | | | +|`cudaExternalSemaphoreSignalNodeParamsV2`|12.2| | | | | | | | |`cudaExternalSemaphoreSignalParams`|10.0| | |`hipExternalSemaphoreSignalParams`|4.4.0| | | | |`cudaExternalSemaphoreSignalParams_v1`|11.2| | |`hipExternalSemaphoreSignalParams`|4.4.0| | | | |`cudaExternalSemaphoreSignalSkipNvSciBufMemSync`|10.2| | | | | | | | |`cudaExternalSemaphoreWaitNodeParams`|11.2| | | | | | | | +|`cudaExternalSemaphoreWaitNodeParamsV2`|12.2| | | | | | | | |`cudaExternalSemaphoreWaitParams`|10.0| | |`hipExternalSemaphoreWaitParams`|4.4.0| | | | |`cudaExternalSemaphoreWaitParams_v1`|11.2| | |`hipExternalSemaphoreWaitParams`|4.4.0| | | | |`cudaExternalSemaphoreWaitSkipNvSciBufMemSync`|10.2| | | | | | | | @@ -1125,6 +1141,7 @@ |`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | | |`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | | |`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | +|`cudaGraphNodeParams`|12.2| | | | | | | | |`cudaGraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | | |`cudaGraphNodeTypeCount`|10.0| | |`hipGraphNodeTypeCount`|4.3.0| | | | |`cudaGraphNodeTypeEmpty`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | | @@ -1167,6 +1184,7 @@ |`cudaHostAllocWriteCombined`| | | |`hipHostMallocWriteCombined`|1.6.0| | | | |`cudaHostFn_t`|10.0| | |`hipHostFn_t`|4.3.0| | | | |`cudaHostNodeParams`|10.0| | |`hipHostNodeParams`|4.3.0| | | | +|`cudaHostNodeParamsV2`|12.2| | | | | | | | |`cudaHostRegisterDefault`| | | |`hipHostRegisterDefault`|1.6.0| | | | |`cudaHostRegisterIoMemory`|7.5| | |`hipHostRegisterIoMemory`|1.6.0| | | | |`cudaHostRegisterMapped`| | | |`hipHostRegisterMapped`|1.6.0| | | | @@ -1189,6 +1207,7 @@ |`cudaKernelNodeAttributeMemSyncDomainMap`|12.0| | | | | | | | |`cudaKernelNodeAttributePriority`|11.7| | | | | | | | |`cudaKernelNodeParams`|10.0| | |`hipKernelNodeParams`|4.3.0| | | | +|`cudaKernelNodeParamsV2`|12.2| | | | | | | | |`cudaKernel_t`|12.1| | | | | | | | |`cudaKeyValuePair`| | |12.0| | | | | | |`cudaLaunchAttribute`|11.8| | | | | | | | @@ -1234,6 +1253,7 @@ |`cudaMemAdviseUnsetPreferredLocation`|8.0| | |`hipMemAdviseUnsetPreferredLocation`|3.7.0| | | | |`cudaMemAdviseUnsetReadMostly`|8.0| | |`hipMemAdviseUnsetReadMostly`|3.7.0| | | | |`cudaMemAllocNodeParams`|11.4| | |`hipMemAllocNodeParams`|5.5.0| | | | +|`cudaMemAllocNodeParamsV2`|12.2| | | | | | | | |`cudaMemAllocationHandleType`|11.2| | |`hipMemAllocationHandleType`|5.2.0| | | | |`cudaMemAllocationType`|11.2| | |`hipMemAllocationType`|5.2.0| | | | |`cudaMemAllocationTypeInvalid`|11.2| | |`hipMemAllocationTypeInvalid`|5.2.0| | | | @@ -1242,6 +1262,7 @@ |`cudaMemAttachGlobal`| | | |`hipMemAttachGlobal`|2.5.0| | | | |`cudaMemAttachHost`| | | |`hipMemAttachHost`|2.5.0| | | | |`cudaMemAttachSingle`| | | |`hipMemAttachSingle`|3.7.0| | | | +|`cudaMemFreeNodeParams`|12.2| | | | | | | | |`cudaMemHandleTypeNone`|11.2| | |`hipMemHandleTypeNone`|5.2.0| | | | |`cudaMemHandleTypePosixFileDescriptor`|11.2| | |`hipMemHandleTypePosixFileDescriptor`|5.2.0| | | | |`cudaMemHandleTypeWin32`|11.2| | |`hipMemHandleTypeWin32`|5.2.0| | | | @@ -1249,6 +1270,9 @@ |`cudaMemLocation`|11.2| | |`hipMemLocation`|5.2.0| | | | |`cudaMemLocationType`|11.2| | |`hipMemLocationType`|5.2.0| | | | |`cudaMemLocationTypeDevice`|11.2| | |`hipMemLocationTypeDevice`|5.2.0| | | | +|`cudaMemLocationTypeHost`|12.2| | | | | | | | +|`cudaMemLocationTypeHostNuma`|12.2| | | | | | | | +|`cudaMemLocationTypeHostNumaCurrent`|12.2| | | | | | | | |`cudaMemLocationTypeInvalid`|11.2| | |`hipMemLocationTypeInvalid`|5.2.0| | | | |`cudaMemPoolAttr`|11.2| | |`hipMemPoolAttr`|5.2.0| | | | |`cudaMemPoolAttrReleaseThreshold`|11.2| | |`hipMemPoolAttrReleaseThreshold`|5.2.0| | | | @@ -1265,7 +1289,11 @@ |`cudaMemRangeAttribute`|8.0| | |`hipMemRangeAttribute`|3.7.0| | | | |`cudaMemRangeAttributeAccessedBy`|8.0| | |`hipMemRangeAttributeAccessedBy`|3.7.0| | | | |`cudaMemRangeAttributeLastPrefetchLocation`|8.0| | |`hipMemRangeAttributeLastPrefetchLocation`|3.7.0| | | | +|`cudaMemRangeAttributeLastPrefetchLocationId`|12.2| | | | | | | | +|`cudaMemRangeAttributeLastPrefetchLocationType`|12.2| | | | | | | | |`cudaMemRangeAttributePreferredLocation`|8.0| | |`hipMemRangeAttributePreferredLocation`|3.7.0| | | | +|`cudaMemRangeAttributePreferredLocationId`|12.2| | | | | | | | +|`cudaMemRangeAttributePreferredLocationType`|12.2| | | | | | | | |`cudaMemRangeAttributeReadMostly`|8.0| | |`hipMemRangeAttributeReadMostly`|3.7.0| | | | |`cudaMemcpy3DParms`| | | |`hipMemcpy3DParms`|1.7.0| | | | |`cudaMemcpy3DPeerParms`| | | | | | | | | @@ -1275,6 +1303,7 @@ |`cudaMemcpyHostToDevice`| | | |`hipMemcpyHostToDevice`|1.5.0| | | | |`cudaMemcpyHostToHost`| | | |`hipMemcpyHostToHost`|1.5.0| | | | |`cudaMemcpyKind`| | | |`hipMemcpyKind`|1.5.0| | | | +|`cudaMemcpyNodeParams`|12.2| | | | | | | | |`cudaMemoryAdvise`|8.0| | |`hipMemoryAdvise`|3.7.0| | | | |`cudaMemoryType`| | | |`hipMemoryType`|1.6.0| | | | |`cudaMemoryTypeDevice`| | | |`hipMemoryTypeDevice`|1.6.0| | | | @@ -1282,6 +1311,7 @@ |`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | | | |`cudaMemoryTypeUnregistered`| | | | | | | | | |`cudaMemsetParams`|10.0| | |`hipMemsetParams`|4.3.0| | | | +|`cudaMemsetParamsV2`|12.2| | | | | | | | |`cudaMipmappedArray`| | | |`hipMipmappedArray`|1.7.0| | | | |`cudaMipmappedArray_const_t`| | | |`hipMipmappedArray_const_t`|1.6.0| | | | |`cudaMipmappedArray_t`| | | |`hipMipmappedArray_t`|1.7.0| | | | @@ -1418,7 +1448,7 @@ |`cudaBindTexture`| |11.0|12.0|`hipBindTexture`|1.6.0|3.8.0| | | |`cudaBindTexture2D`| |11.0|12.0|`hipBindTexture2D`|1.7.0|3.8.0| | | |`cudaBindTextureToArray`| |11.0|12.0|`hipBindTextureToArray`|1.6.0|3.8.0| | | -|`cudaBindTextureToMipmappedArray`| |11.0|12.0|`hipBindTextureToMipmappedArray`|1.7.0| | | | +|`cudaBindTextureToMipmappedArray`| |11.0|12.0|`hipBindTextureToMipmappedArray`|1.7.0|5.7.0| | | |`cudaGetTextureAlignmentOffset`| |11.0|12.0|`hipGetTextureAlignmentOffset`|1.9.0|3.8.0| | | |`cudaGetTextureReference`| |11.0|12.0|`hipGetTextureReference`|1.7.0|5.3.0| | | |`cudaUnbindTexture`| |11.0|12.0|`hipUnbindTexture`|1.6.0|3.8.0| | | diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP.md b/docs/tables/CUSPARSE_API_supported_by_HIP.md index 496269da..089f9327 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP.md @@ -183,7 +183,7 @@ |`cusparseSpVecDescr_t`|10.2| | |`hipsparseSpVecDescr_t`|4.1.0| | | | |`cusparseSparseToDenseAlg_t`|11.1| | |`hipsparseSparseToDenseAlg_t`|4.2.0| | | | |`cusparseStatus_t`| | | |`hipsparseStatus_t`|1.9.2| | | | -|`pruneInfo`|9.0| | | | | | | | +|`pruneInfo`|9.0| | |`pruneInfo`|3.9.0| | | | |`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | | ## **5. CUSPARSE Management Function Reference** diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md index bbc87ed2..07efeb46 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md @@ -183,8 +183,8 @@ |`cusparseSpVecDescr_t`|10.2| | |`hipsparseSpVecDescr_t`|4.1.0| | | |`rocsparse_spvec_descr`|4.1.0| | | | |`cusparseSparseToDenseAlg_t`|11.1| | |`hipsparseSparseToDenseAlg_t`|4.2.0| | | |`rocsparse_sparse_to_dense_alg`|4.1.0| | | | |`cusparseStatus_t`| | | |`hipsparseStatus_t`|1.9.2| | | |`rocsparse_status`|1.9.0| | | | -|`pruneInfo`|9.0| | | | | | | | | | | | | -|`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | | | | | | | +|`pruneInfo`|9.0| | |`pruneInfo`|3.9.0| | | |`_rocsparse_mat_info`|1.9.0| | | | +|`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | |`rocsparse_mat_info`|1.9.0| | | | ## **5. CUSPARSE Management Function Reference** @@ -645,7 +645,7 @@ |`cusparseCnnz`| | | |`hipsparseCnnz`|3.2.0| | | | | | | | | |`cusparseCnnz_compress`|8.0| | |`hipsparseCnnz_compress`|3.5.0| | | | | | | | | |`cusparseCreateCsru2csrInfo`| | | |`hipsparseCreateCsru2csrInfo`|4.2.0| | | | | | | | | -|`cusparseCreateIdentityPermutation`| | | |`hipsparseCreateIdentityPermutation`|1.9.2| | | | | | | | | +|`cusparseCreateIdentityPermutation`| | | |`hipsparseCreateIdentityPermutation`|1.9.2| | | |`rocsparse_create_identity_permutation`|1.9.0| | | | |`cusparseCsr2cscEx`|8.0|10.2|11.0| | | | | | | | | | | |`cusparseCsr2cscEx2`|10.1| | |`hipsparseCsr2cscEx2`|5.4.0| | | | | | | | | |`cusparseCsr2cscEx2_bufferSize`|10.1| | |`hipsparseCsr2cscEx2_bufferSize`|5.4.0| | | | | | | | | @@ -680,7 +680,7 @@ |`cusparseDnnz`| | | |`hipsparseDnnz`|3.2.0| | | | | | | | | |`cusparseDnnz_compress`|8.0| | |`hipsparseDnnz_compress`|3.5.0| | | | | | | | | |`cusparseDpruneCsr2csr`|9.0| | |`hipsparseDpruneCsr2csr`|3.9.0| | | | | | | | | -|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`hipsparseDpruneCsr2csrByPercentage`|3.9.0| | | | | | | | | +|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`hipsparseDpruneCsr2csrByPercentage`|3.9.0| | | |`rocsparse_dprune_csr2csr_by_percentage`|3.9.0| | | | |`cusparseDpruneCsr2csrByPercentage_bufferSizeExt`|9.0| | |`hipsparseDpruneCsr2csrByPercentage_bufferSizeExt`|3.9.0| | | | | | | | | |`cusparseDpruneCsr2csrNnz`|9.0| | |`hipsparseDpruneCsr2csrNnz`|3.9.0| | | | | | | | | |`cusparseDpruneCsr2csrNnzByPercentage`|9.0| | |`hipsparseDpruneCsr2csrNnzByPercentage`|3.9.0| | | | | | | | | @@ -744,17 +744,17 @@ |`cusparseSpruneDense2csrNnz`|9.0| | |`hipsparseSpruneDense2csrNnz`|3.9.0| | | | | | | | | |`cusparseSpruneDense2csrNnzByPercentage`|9.0| | |`hipsparseSpruneDense2csrNnzByPercentage`|3.9.0| | | | | | | | | |`cusparseSpruneDense2csr_bufferSizeExt`|9.0| | |`hipsparseSpruneDense2csr_bufferSizeExt`|3.9.0| | | | | | | | | -|`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | | | | | | | +|`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | |`rocsparse_coo2csr`|1.9.0| | | | |`cusparseXcoosortByColumn`| | | |`hipsparseXcoosortByColumn`|1.9.2| | | |`rocsparse_coosort_by_column`|1.9.0| | | | |`cusparseXcoosortByRow`| | | |`hipsparseXcoosortByRow`|1.9.2| | | |`rocsparse_coosort_by_row`|1.9.0| | | | -|`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | | | | | | | -|`cusparseXcscsort`| | | |`hipsparseXcscsort`|2.10.0| | | | | | | | | -|`cusparseXcscsort_bufferSizeExt`| | | |`hipsparseXcscsort_bufferSizeExt`|2.10.0| | | | | | | | | +|`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | | +|`cusparseXcscsort`| | | |`hipsparseXcscsort`|2.10.0| | | |`rocsparse_cscsort`|2.10.0| | | | +|`cusparseXcscsort_bufferSizeExt`| | | |`hipsparseXcscsort_bufferSizeExt`|2.10.0| | | |`rocsparse_cscsort_buffer_size`|2.10.0| | | | |`cusparseXcsr2bsrNnz`| | | |`hipsparseXcsr2bsrNnz`|3.5.0| | | | | | | | | |`cusparseXcsr2coo`| | | |`hipsparseXcsr2coo`|1.9.2| | | | | | | | | |`cusparseXcsr2gebsrNnz`| | | |`hipsparseXcsr2gebsrNnz`|4.1.0| | | | | | | | | -|`cusparseXcsrsort`| | | |`hipsparseXcsrsort`|1.9.2| | | | | | | | | -|`cusparseXcsrsort_bufferSizeExt`| | | |`hipsparseXcsrsort_bufferSizeExt`|1.9.2| | | | | | | | | +|`cusparseXcsrsort`| | | |`hipsparseXcsrsort`|1.9.2| | | |`rocsparse_csrsort`|1.9.0| | | | +|`cusparseXcsrsort_bufferSizeExt`| | | |`hipsparseXcsrsort_bufferSizeExt`|1.9.2| | | |`rocsparse_csrsort_buffer_size`|1.9.0| | | | |`cusparseXgebsr2csr`| | | | | | | | | | | | | | |`cusparseXgebsr2gebsrNnz`| | | |`hipsparseXgebsr2gebsrNnz`|4.1.0| | | |`rocsparse_gebsr2gebsr_nnz`|4.1.0| | | | |`cusparseZbsr2csr`| | | |`hipsparseZbsr2csr`|3.5.0| | | |`rocsparse_zbsr2csr`|3.10.0| | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_ROC.md b/docs/tables/CUSPARSE_API_supported_by_ROC.md index 2dabb8b1..aa15c8da 100644 --- a/docs/tables/CUSPARSE_API_supported_by_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_ROC.md @@ -183,8 +183,8 @@ |`cusparseSpVecDescr_t`|10.2| | |`rocsparse_spvec_descr`|4.1.0| | | | |`cusparseSparseToDenseAlg_t`|11.1| | |`rocsparse_sparse_to_dense_alg`|4.1.0| | | | |`cusparseStatus_t`| | | |`rocsparse_status`|1.9.0| | | | -|`pruneInfo`|9.0| | | | | | | | -|`pruneInfo_t`|9.0| | | | | | | | +|`pruneInfo`|9.0| | |`_rocsparse_mat_info`|1.9.0| | | | +|`pruneInfo_t`|9.0| | |`rocsparse_mat_info`|1.9.0| | | | ## **5. CUSPARSE Management Function Reference** @@ -645,7 +645,7 @@ |`cusparseCnnz`| | | | | | | | | |`cusparseCnnz_compress`|8.0| | | | | | | | |`cusparseCreateCsru2csrInfo`| | | | | | | | | -|`cusparseCreateIdentityPermutation`| | | | | | | | | +|`cusparseCreateIdentityPermutation`| | | |`rocsparse_create_identity_permutation`|1.9.0| | | | |`cusparseCsr2cscEx`|8.0|10.2|11.0| | | | | | |`cusparseCsr2cscEx2`|10.1| | | | | | | | |`cusparseCsr2cscEx2_bufferSize`|10.1| | | | | | | | @@ -680,7 +680,7 @@ |`cusparseDnnz`| | | | | | | | | |`cusparseDnnz_compress`|8.0| | | | | | | | |`cusparseDpruneCsr2csr`|9.0| | | | | | | | -|`cusparseDpruneCsr2csrByPercentage`|9.0| | | | | | | | +|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`rocsparse_dprune_csr2csr_by_percentage`|3.9.0| | | | |`cusparseDpruneCsr2csrByPercentage_bufferSizeExt`|9.0| | | | | | | | |`cusparseDpruneCsr2csrNnz`|9.0| | | | | | | | |`cusparseDpruneCsr2csrNnzByPercentage`|9.0| | | | | | | | @@ -744,17 +744,17 @@ |`cusparseSpruneDense2csrNnz`|9.0| | | | | | | | |`cusparseSpruneDense2csrNnzByPercentage`|9.0| | | | | | | | |`cusparseSpruneDense2csr_bufferSizeExt`|9.0| | | | | | | | -|`cusparseXcoo2csr`| | | | | | | | | +|`cusparseXcoo2csr`| | | |`rocsparse_coo2csr`|1.9.0| | | | |`cusparseXcoosortByColumn`| | | |`rocsparse_coosort_by_column`|1.9.0| | | | |`cusparseXcoosortByRow`| | | |`rocsparse_coosort_by_row`|1.9.0| | | | -|`cusparseXcoosort_bufferSizeExt`| | | | | | | | | -|`cusparseXcscsort`| | | | | | | | | -|`cusparseXcscsort_bufferSizeExt`| | | | | | | | | +|`cusparseXcoosort_bufferSizeExt`| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | | +|`cusparseXcscsort`| | | |`rocsparse_cscsort`|2.10.0| | | | +|`cusparseXcscsort_bufferSizeExt`| | | |`rocsparse_cscsort_buffer_size`|2.10.0| | | | |`cusparseXcsr2bsrNnz`| | | | | | | | | |`cusparseXcsr2coo`| | | | | | | | | |`cusparseXcsr2gebsrNnz`| | | | | | | | | -|`cusparseXcsrsort`| | | | | | | | | -|`cusparseXcsrsort_bufferSizeExt`| | | | | | | | | +|`cusparseXcsrsort`| | | |`rocsparse_csrsort`|1.9.0| | | | +|`cusparseXcsrsort_bufferSizeExt`| | | |`rocsparse_csrsort_buffer_size`|1.9.0| | | | |`cusparseXgebsr2csr`| | | | | | | | | |`cusparseXgebsr2gebsrNnz`| | | |`rocsparse_gebsr2gebsr_nnz`|4.1.0| | | | |`cusparseZbsr2csr`| | | |`rocsparse_zbsr2csr`|3.10.0| | | | diff --git a/src/ArgParse.cpp b/src/ArgParse.cpp index 904b5c2e..acead48e 100644 --- a/src/ArgParse.cpp +++ b/src/ArgParse.cpp @@ -172,7 +172,7 @@ cl::opt DocFormat("doc-format", cl::cat(ToolTemplateCategory)); cl::opt DocRoc("doc-roc", - cl::desc("ROC cocumentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified"), + cl::desc("ROC documentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified"), cl::value_desc("value"), cl::cat(ToolTemplateCategory)); diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index fcc8fb66..c1cffdef 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -35,8 +35,8 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "rocblas_get_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "rocblas_set_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, - {"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasGetMathMode", {"hipblasGetMathMode", "rocblas_get_math_mode", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, + {"cublasSetMathMode", {"hipblasSetMathMode", "rocblas_set_math_mode", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, {"cublasMigrateComputeType", {"hipblasMigrateComputeType", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetSmCountTarget", {"hipblasGetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetSmCountTarget", {"hipblasSetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, @@ -1075,6 +1075,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { const std::map CUDA_BLAS_FUNCTION_VER_MAP { {"cublasGetMathMode", {CUDA_90, CUDA_0, CUDA_0}}, + {"cublasSetMathMode", {CUDA_90, CUDA_0, CUDA_0}}, {"cublasMigrateComputeType", {CUDA_110, CUDA_0, CUDA_0}}, {"cublasLogCallback", {CUDA_92, CUDA_0, CUDA_0}}, {"cublasLoggerConfigure", {CUDA_92, CUDA_0, CUDA_0}}, @@ -1962,6 +1963,8 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, {"hipblasCgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, {"hipblasZgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, + {"rocblas_get_math_mode", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_set_math_mode", {HIP_5070, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index fdaccdb0..d7b31796 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -73,12 +73,12 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", CONV_NUMERIC_LITERAL, API_BLAS, 2}}, // Blas Math mode/tensor operation - {"cublasMath_t", {"hipblasMath_t", "", CONV_TYPE, API_BLAS, 2, UNSUPPORTED}}, - {"CUBLAS_DEFAULT_MATH", {"HIPBLAS_DEFAULT_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 0 - {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED | CUDA_DEPRECATED}}, // 1 - {"CUBLAS_PEDANTIC_MATH", {"HIPBLAS_PEDANTIC_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 2 - {"CUBLAS_TF32_TENSOR_OP_MATH", {"HIPBLAS_TF32_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 3 - {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {"HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 16 + {"cublasMath_t", {"hipblasMath_t", "rocblas_math_mode", CONV_TYPE, API_BLAS, 2, HIP_UNSUPPORTED}}, + {"CUBLAS_DEFAULT_MATH", {"HIPBLAS_DEFAULT_MATH", "rocblas_default_math", CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 0 + {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED | CUDA_DEPRECATED}}, // 1 + {"CUBLAS_PEDANTIC_MATH", {"HIPBLAS_PEDANTIC_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 2 + {"CUBLAS_TF32_TENSOR_OP_MATH", {"HIPBLAS_TF32_TENSOR_OP_MATH", "rocblas_xf32_xdl_math_op", CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 3 + {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {"HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 16 // Blass different GEMM algorithms {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", "rocblas_gemm_algo", CONV_TYPE, API_BLAS, 2}}, @@ -170,10 +170,10 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { // NOTE: renamed UNSUPPORTED hipblasComputeType_t to the HIP supported hipblasDatatype_t (workaround) // TODO: change the type to the correct one after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529 - {"cublasComputeType_t", {"hipblasDatatype_t", "", CONV_TYPE, API_BLAS, 2}}, + {"cublasComputeType_t", {"hipblasDatatype_t", "rocblas_computetype", CONV_TYPE, API_BLAS, 2}}, {"CUBLAS_COMPUTE_16F", {"HIPBLAS_COMPUTE_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 64 {"CUBLAS_COMPUTE_16F_PEDANTIC", {"HIPBLAS_COMPUTE_16F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 65 - {"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 68 + {"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "rocblas_compute_type_f32", CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 68 {"CUBLAS_COMPUTE_32F_PEDANTIC", {"HIPBLAS_COMPUTE_32F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 69 {"CUBLAS_COMPUTE_32F_FAST_16F", {"HIPBLAS_COMPUTE_32F_FAST_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 74 {"CUBLAS_COMPUTE_32F_FAST_16BF", {"HIPBLAS_COMPUTE_32F_FAST_16BF", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 75 @@ -387,4 +387,9 @@ const std::map HIP_BLAS_TYPE_NAME_VER_MAP { {"rocblas_atomics_allowed", {HIP_3080, HIP_0, HIP_0 }}, {"rocblas_gemm_algo", {HIP_1082, HIP_0, HIP_0 }}, {"rocblas_gemm_algo_standard", {HIP_1082, HIP_0, HIP_0 }}, + {"rocblas_math_mode", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_default_math", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_xf32_xdl_math_op", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_computetype", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_compute_type_f32", {HIP_5070, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 9d9511e4..1b3c83ae 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -710,6 +710,9 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__hgeu2_mask", {"__hgeu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hltu2_mask", {"__hltu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hgtu2_mask", {"__hgtu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"make_half2", {"make_half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__half2char_rz", {"__half2char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__half2uchar_rz", {"__half2uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // bfp16 functions {"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, @@ -775,12 +778,16 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__low2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__low2bfloat162", {"__low2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float22bfloat162_rn", {"__float22bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162char_rz", {"__bfloat162char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uchar_rz", {"__bfloat162uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"make_bfloat162", {"make_bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // atomic functions {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -945,6 +952,13 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"__hgeu2_mask", {CUDA_120, CUDA_0, CUDA_0 }}, {"__hltu2_mask", {CUDA_120, CUDA_0, CUDA_0 }}, {"__hgtu2_mask", {CUDA_120, CUDA_0, CUDA_0 }}, + {"__float22bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__bfloat162uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"make_bfloat162", {CUDA_122, CUDA_0, CUDA_0 }}, + {"make_half2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__half2char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__half2uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 0e7ed78c..1711d102 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -353,13 +353,13 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, // no analogue // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures - {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, + {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}}, // no analogue // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures - {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, + {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}}, // no analogue // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures - {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, + {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}}, // cudaArrayGetSparseProperties {"cuArrayGetSparseProperties", {"hipArrayGetSparseProperties", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, // cudaArrayGetPlane @@ -431,8 +431,12 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 17. Unified Addressing // cudaMemAdvise {"cuMemAdvise", {"hipMemAdvise", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, - // TODO: double check cudaMemPrefetchAsync + // cudaMemAdvise_v2 + {"cuMemAdvise_v2", {"hipMemAdvise_v2", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}}, + // cudaMemPrefetchAsync {"cuMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, + // cudaMemPrefetchAsync_v2 + {"cuMemPrefetchAsync_v2", {"hipMemPrefetchAsync_v2", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}}, // cudaMemRangeGetAttribute {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, // cudaMemRangeGetAttributes @@ -782,6 +786,12 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphExecGetFlags {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cudaGraphAddNode + {"cuGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cudaGraphNodeSetParams + {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cudaGraphExecNodeSetParams + {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // 25. Occupancy // cudaOccupancyAvailableDynamicSMemPerBlock @@ -1384,6 +1394,11 @@ const std::map CUDA_DRIVER_FUNCTION_VER_MAP { {"cuCoredumpGetAttributeGlobal", {CUDA_121, CUDA_0, CUDA_0 }}, {"cuCoredumpSetAttribute", {CUDA_121, CUDA_0, CUDA_0 }}, {"cuCoredumpSetAttributeGlobal", {CUDA_121, CUDA_0, CUDA_0 }}, + {"cuMemPrefetchAsync_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuMemAdvise_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuGraphAddNode", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuGraphNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuGraphExecNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_FUNCTION_VER_MAP { @@ -1443,9 +1458,9 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipMemsetD32Async", {HIP_2030, HIP_0, HIP_0 }}, {"hipMemsetD8", {HIP_1060, HIP_0, HIP_0 }}, {"hipMemsetD8Async", {HIP_3000, HIP_0, HIP_0 }}, - {"hipMipmappedArrayCreate", {HIP_3050, HIP_0, HIP_0 }}, - {"hipMipmappedArrayDestroy", {HIP_3050, HIP_0, HIP_0 }}, - {"hipMipmappedArrayGetLevel", {HIP_3050, HIP_0, HIP_0 }}, + {"hipMipmappedArrayCreate", {HIP_3050, HIP_5070, HIP_0 }}, + {"hipMipmappedArrayDestroy", {HIP_3050, HIP_5070, HIP_0 }}, + {"hipMipmappedArrayGetLevel", {HIP_3050, HIP_5070, HIP_0 }}, {"hipFuncGetAttribute", {HIP_2080, HIP_0, HIP_0 }}, {"hipModuleLaunchKernel", {HIP_1060, HIP_0, HIP_0 }}, {"hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", {HIP_3050, HIP_0, HIP_0 }}, diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 479db2c0..15e8748f 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -74,12 +74,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_HOST_NODE_PARAMS", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_HOST_NODE_PARAMS_v1", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_HOST_NODE_PARAMS_v2_st", {"hipHostNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaHostNodeParamsV2 {"CUDA_HOST_NODE_PARAMS_v2", {"hipHostNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaKernelNodeParams {"CUDA_KERNEL_NODE_PARAMS_st", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_KERNEL_NODE_PARAMS", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_KERNEL_NODE_PARAMS_v1", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, + // cudaKernelNodeParamsV2 {"CUDA_KERNEL_NODE_PARAMS_v2_st", {"hipKernelNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_KERNEL_NODE_PARAMS_v2", {"hipKernelNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_KERNEL_NODE_PARAMS_v3_st", {"hipKernelNodeParams_v3", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -109,13 +111,15 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_MEMCPY3D_PEER_v1", {"hip_Memcpy3D_Peer", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_MEMCPY_NODE_PARAMS_st", {"hiMemcpyNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaMemcpyNodeParams {"CUDA_MEMCPY_NODE_PARAMS", {"hiMemcpyNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // cudaMemsetParams {"CUDA_MEMSET_NODE_PARAMS_st", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, + // cudaMemsetParams {"CUDA_MEMSET_NODE_PARAMS", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEMSET_NODE_PARAMS_v1", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEMSET_NODE_PARAMS_v2_st", {"hipMemsetParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaMemsetParamsV2 {"CUDA_MEMSET_NODE_PARAMS_v2", {"hipMemsetParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -277,6 +281,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaExternalSemaphoreSignalNodeParamsV2 {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st", {"hipExternalSemaphoreSignalNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2", {"hipExternalSemaphoreSignalNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -284,6 +289,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_st", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_WAIT_NODE_PARAMS", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaExternalSemaphoreWaitNodeParamsV2 {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st", {"hipExternalSemaphoreWaitNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2", {"hipExternalSemaphoreWaitNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -315,35 +321,32 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaMemAllocNodeParams {"CUDA_MEM_ALLOC_NODE_PARAMS_st", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, CUDA_REMOVED}}, {"CUDA_MEM_ALLOC_NODE_PARAMS_v1_st", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, + // cudaMemAllocNodeParamsV2 {"CUDA_MEM_ALLOC_NODE_PARAMS_v2_st", {"hipMemAllocNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_MEM_ALLOC_NODE_PARAMS", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEM_ALLOC_NODE_PARAMS_v1", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEM_ALLOC_NODE_PARAMS_v2", {"hipMemAllocNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaMemFreeNodeParams {"CUDA_MEM_FREE_NODE_PARAMS_st", {"hipMemFreeNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_MEM_FREE_NODE_PARAMS", {"hipMemFreeNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaChildGraphNodeParams {"CUDA_CHILD_GRAPH_NODE_PARAMS_st", {"hipChildGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_CHILD_GRAPH_NODE_PARAMS", {"hipChildGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaEventRecordNodeParams {"CUDA_EVENT_RECORD_NODE_PARAMS_st", {"hipEventRecordNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EVENT_RECORD_NODE_PARAMS", {"hipEventRecordNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaEventWaitNodeParams {"CUDA_EVENT_WAIT_NODE_PARAMS_st", {"hipEventWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EVENT_WAIT_NODE_PARAMS", {"hipEventWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaGraphNodeParams {"CUgraphNodeParams_st", {"hipGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUgraphNodeParams", {"hipGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // - {"CUdeviceNumaConfig_enum", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - {"CUdeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // cudaArrayMemoryRequirements {"CUDA_ARRAY_MEMORY_REQUIREMENTS_st", {"hipArrayMemoryRequirements", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_ARRAY_MEMORY_REQUIREMENTS_v1", {"hipArrayMemoryRequirements", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -859,27 +862,27 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 120 // cudaDevAttrDeferredMappingCudaArraySupported {"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 121 - // + // cudaDevAttrReserved122 {"CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2", {"hipDeviceAttributeCanUse64BitStreamMemOpsV2", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 122 - // + // cudaDevAttrReserved123 {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2", {"hipDeviceAttributeCanUseStreamWaitValueNorV2", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 123 - // + // cudaDevAttrReserved124 {"CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED", {"hipDeviceAttributeDmaBufSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 124 // cudaDevAttrIpcEventSupport {"CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED", {"hipDeviceAttributeIpcEventSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 125 // cudaDevAttrMemSyncDomainCount {"CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT", {"hipDeviceAttributeMemSyncDomainCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 126 - // + // cudaDevAttrReserved127 {"CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED", {"hipDeviceAttributeTensorMapAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 127 - // + // cudaDevAttrReserved129 {"CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS", {"hipDeviceAttributeUnifiedFunctionPointers", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 129 - // + // cudaDevAttrNumaConfig {"CU_DEVICE_ATTRIBUTE_NUMA_CONFIG", {"hipDeviceAttributeNumaConfig", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 130 - // + // cudaDevAttrNumaId {"CU_DEVICE_ATTRIBUTE_NUMA_ID", {"hipDeviceAttributeNumaId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 131 // {"CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED", {"hipDeviceAttributeMulticastSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 132 - // + // cudaDevAttrHostNumaId {"CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID", {"hipDeviceAttributeHostNumaId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134 // cudaDevAttrMax {"CU_DEVICE_ATTRIBUTE_MAX", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1457,14 +1460,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 3 // cudaMemRangeAttributeLastPrefetchLocation {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 4 - // + // cudaMemRangeAttributePreferredLocationType {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE", {"hipMemRangeAttributePreferredLocationType", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 5 - // + // cudaMemRangeAttributePreferredLocationId {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID", {"hipMemRangeAttributePreferredLocationId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 6 - // - {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE", {"hipMemRangeAttributeLastPreferredLocationType", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7 - // - {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID", {"hipMemRangeAttributeLastPreferredLocationId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8 + // cudaMemRangeAttributeLastPrefetchLocationType + {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE", {"hipMemRangeAttributeLastPrefetchLocationType", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7 + // cudaMemRangeAttributeLastPrefetchLocationId + {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID", {"hipMemRangeAttributeLastPrefetchLocationId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8 // no analogue {"CUoccupancy_flags", {"hipOccupancyFlags", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1998,11 +2001,11 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_MEM_LOCATION_TYPE_INVALID", {"hipMemLocationTypeInvalid", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 0x0 // cudaMemLocationTypeDevice {"CU_MEM_LOCATION_TYPE_DEVICE", {"hipMemLocationTypeDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 0x1 - // + // cudaMemLocationTypeHost {"CU_MEM_LOCATION_TYPE_HOST", {"hipMemLocationTypeHost", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x2 - // + // cudaMemLocationTypeHostNuma {"CU_MEM_LOCATION_TYPE_HOST_NUMA", {"hipMemLocationTypeHostNuma", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x3 - // + // cudaMemLocationTypeHostNumaCurrent {"CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT", {"hipMemLocationTypeHostNumaCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x4 // no analogue {"CU_MEM_LOCATION_TYPE_MAX", {"hipMemLocationTypeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x7FFFFFFF @@ -2534,6 +2537,15 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"CU_COREDUMP_MAX", {"HIP_COREDUMP_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfig + {"CUdeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + {"CUdeviceNumaConfig_enum", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUdeviceNumaConfig enum values + // cudaDeviceNumaConfigNone + {"CU_DEVICE_NUMA_CONFIG_NONE", {"hipDeviceNumaConfigNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfigNumaNode + {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE", {"hipDeviceNumaConfigNumaNode", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -3512,6 +3524,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUgraphNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, {"CUdeviceNumaConfig_enum", {CUDA_122, CUDA_0, CUDA_0 }}, {"CUdeviceNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_NUMA_CONFIG_NONE", {CUDA_122, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 3616b64f..7ab124ac 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -308,6 +308,8 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaMallocPitch", {"hipMallocPitch", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemAdvise {"cudaMemAdvise", {"hipMemAdvise", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, + // cuMemAdvise_v2 + {"cudaMemAdvise_v2", {"hipMemAdvise_v2", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}}, // no analogue // NOTE: Not equal to cuMemcpy due to different signatures {"cudaMemcpy", {"hipMemcpy", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, @@ -358,8 +360,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemGetInfo {"cudaMemGetInfo", {"hipMemGetInfo", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, - // TODO: double check cuMemPrefetchAsync + // cuMemPrefetchAsync {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, + // cuMemPrefetchAsync_v2 + {"cudaMemPrefetchAsync_v2", {"hipMemPrefetchAsync_v2", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}}, // cuMemRangeGetAttribute {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemRangeGetAttributes @@ -836,6 +840,12 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphExecGetFlags {"cudaGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cuGraphAddNode + {"cudaGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cuGraphNodeSetParams + {"cudaGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cuGraphExecNodeSetParams + {"cudaGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // 29. Driver Entry Point Access // cuGetProcAddress @@ -875,7 +885,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaBindTextureToArray", {"hipBindTextureToArray", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, // no analogue - {"cudaBindTextureToMipmappedArray", {"hipBindTextureToMipmappedArray", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, CUDA_REMOVED}}, + {"cudaBindTextureToMipmappedArray", {"hipBindTextureToMipmappedArray", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, // no analogue {"cudaGetTextureAlignmentOffset", {"hipGetTextureAlignmentOffset", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, // no analogue @@ -1110,6 +1120,11 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP { {"cudaGraphInstantiateWithParams", {CUDA_120, CUDA_0, CUDA_0 }}, {"cudaGraphExecGetFlags", {CUDA_120, CUDA_0, CUDA_0 }}, {"cudaGetKernel", {CUDA_121, CUDA_0, CUDA_0 }}, + {"cudaMemPrefetchAsync_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemAdvise_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphAddNode", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphExecNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_FUNCTION_VER_MAP { @@ -1230,7 +1245,7 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipBindTexture", {HIP_1060, HIP_3080, HIP_0 }}, {"hipBindTexture2D", {HIP_1070, HIP_3080, HIP_0 }}, {"hipBindTextureToArray", {HIP_1060, HIP_3080, HIP_0 }}, - {"hipBindTextureToMipmappedArray", {HIP_1070, HIP_0, HIP_0 }}, + {"hipBindTextureToMipmappedArray", {HIP_1070, HIP_5070, HIP_0 }}, {"hipCreateChannelDesc", {HIP_1060, HIP_0, HIP_0 }}, {"hipGetChannelDesc", {HIP_1070, HIP_0, HIP_0 }}, {"hipGetTextureAlignmentOffset", {HIP_1090, HIP_3080, HIP_0 }}, diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index d66849e6..9428a1eb 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -70,6 +70,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_HOST_NODE_PARAMS {"cudaHostNodeParams", {"hipHostNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_HOST_NODE_PARAMS_v2 + {"cudaHostNodeParamsV2", {"hipHostNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUipcEventHandle {"cudaIpcEventHandle_t", {"hipIpcEventHandle_t", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -83,6 +85,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_KERNEL_NODE_PARAMS {"cudaKernelNodeParams", {"hipKernelNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_KERNEL_NODE_PARAMS_v2_st + {"cudaKernelNodeParamsV2", {"hipKernelNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // no analogue // CUDA_LAUNCH_PARAMS struct differs @@ -97,6 +101,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_MEMSET_NODE_PARAMS {"cudaMemsetParams", {"hipMemsetParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_MEMSET_NODE_PARAMS_v2 + {"cudaMemsetParamsV2", {"hipMemsetParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // no analogue {"cudaPitchedPtr", {"hipPitchedPtr", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -210,12 +216,33 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {"cudaExternalSemaphoreSignalNodeParams", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st + {"cudaExternalSemaphoreSignalNodeParamsV2", {"hipExternalSemaphoreSignalNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {"cudaExternalSemaphoreWaitNodeParams", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st + {"cudaExternalSemaphoreWaitNodeParamsV2", {"hipExternalSemaphoreWaitNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUDA_MEM_ALLOC_NODE_PARAMS_st {"cudaMemAllocNodeParams", {"hipMemAllocNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_MEM_ALLOC_NODE_PARAMS_v2_st + {"cudaMemAllocNodeParamsV2", {"hipMemAllocNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_MEM_FREE_NODE_PARAMS_st + {"cudaMemFreeNodeParams", {"hipMemFreeNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_CHILD_GRAPH_NODE_PARAMS_st + {"cudaChildGraphNodeParams", {"hipChildGraphNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_EVENT_RECORD_NODE_PARAMS_st + {"cudaEventRecordNodeParams", {"hipEventRecordNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_EVENT_WAIT_NODE_PARAMS_st + {"cudaEventWaitNodeParams", {"hipEventWaitNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUgraphNodeParams_st + {"cudaGraphNodeParams", {"hipGraphNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUDA_ARRAY_MEMORY_REQUIREMENTS_st {"cudaArrayMemoryRequirements", {"hipArrayMemoryRequirements", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -230,6 +257,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUkernel {"cudaKernel_t", {"hipKernel", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUDA_MEMCPY_NODE_PARAMS + {"cudaMemcpyNodeParams", {"hiMemcpyNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 2. Unions // CUstreamAttrValue @@ -586,11 +616,11 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrClusterLaunch", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 120 // CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED {"cudaDevAttrDeferredMappingCudaArraySupported", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 121 - // + // CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2 {"cudaDevAttrReserved122", {"hipDevAttrReserved122", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 122 - // + // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2 {"cudaDevAttrReserved123", {"hipDevAttrReserved123", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 123 - // + // CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED {"cudaDevAttrReserved124", {"hipDevAttrReserved124", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 124 // CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED {"cudaDevAttrIpcEventSupport", {"hipDevAttrIpcEventSupport", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 125 @@ -598,12 +628,18 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrMemSyncDomainCount", {"hipDevAttrMemSyncDomainCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 126 // CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED {"cudaDevAttrReserved127", {"hipDeviceAttributeTensorMapAccessSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 127 - // + // CUDA only {"cudaDevAttrReserved128", {"hipDevAttrReserved128", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 128 // CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS {"cudaDevAttrReserved129", {"hipDeviceAttributeUnifiedFunctionPointers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 129 + // CU_DEVICE_ATTRIBUTE_NUMA_CONFIG + {"cudaDevAttrNumaConfig", {"hipDeviceAttributeNumaConfig", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 130 + // CU_DEVICE_ATTRIBUTE_NUMA_ID + {"cudaDevAttrNumaId", {"hipDeviceAttributeNumaId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 131 // CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED {"cudaDevAttrReserved132", {"hipDeviceAttributeMulticastSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 132 + // CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID + {"cudaDevAttrHostNumaId", {"hipDeviceAttributeHostNumaId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134 // CU_DEVICE_ATTRIBUTE_MAX {"cudaDevAttrMax", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1292,6 +1328,14 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemRangeAttributeAccessedBy", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 3 // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION {"cudaMemRangeAttributeLastPrefetchLocation", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 4 + // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE + {"cudaMemRangeAttributePreferredLocationType", {"hipMemRangeAttributePreferredLocationType", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 5 + // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID + {"cudaMemRangeAttributePreferredLocationId", {"hipMemRangeAttributePreferredLocationId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 6 + // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE + {"cudaMemRangeAttributeLastPrefetchLocationType", {"hipMemRangeAttributeLastPrefetchLocationType", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7 + // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID + {"cudaMemRangeAttributeLastPrefetchLocationId", {"hipMemRangeAttributeLastPrefetchLocationId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8 // no analogue {"cudaOutputMode", {"hipOutputMode", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, @@ -1611,6 +1655,12 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemLocationTypeInvalid", {"hipMemLocationTypeInvalid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 0 // CU_MEM_LOCATION_TYPE_DEVICE {"cudaMemLocationTypeDevice", {"hipMemLocationTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 1 + // CU_MEM_LOCATION_TYPE_HOST + {"cudaMemLocationTypeHost", {"hipMemLocationTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 2 + // CU_MEM_LOCATION_TYPE_HOST_NUMA + {"cudaMemLocationTypeHostNuma", {"hipMemLocationTypeHostNuma", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 3 + // CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT + {"cudaMemLocationTypeHostNumaCurrent", {"hipMemLocationTypeHostNumaCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 4 // CUmemAllocationType {"cudaMemAllocationType", {"hipMemAllocationType", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -1819,6 +1869,14 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE {"cudaLaunchMemSyncDomainRemote", {"hipLaunchMemSyncDomainRemote", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUdeviceNumaConfig + {"cudaDeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfig enum values + // CU_DEVICE_NUMA_CONFIG_NONE + {"cudaDeviceNumaConfigNone", {"hipDeviceNumaConfigNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CU_DEVICE_NUMA_CONFIG_NUMA_NODE + {"cudaDeviceNumaConfigNumaNode", {"hipDeviceNumaConfigNumaNode", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 4. Typedefs // CUhostFn @@ -2508,6 +2566,31 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {"cudaDevAttrReserved132", {CUDA_121, CUDA_0, CUDA_0 }}, {"CUkern_st", {CUDA_121, CUDA_0, CUDA_0 }}, {"cudaKernel_t", {CUDA_121, CUDA_0, CUDA_0 }}, + {"cudaMemcpyNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemsetParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaHostNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributePreferredLocationType", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributePreferredLocationId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributeLastPrefetchLocationType", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributeLastPrefetchLocationId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDevAttrNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDevAttrNumaId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDevAttrHostNumaId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemLocationTypeHost", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemLocationTypeHostNuma", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemLocationTypeHostNumaCurrent", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemAllocNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemFreeNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaExternalSemaphoreSignalNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaExternalSemaphoreWaitNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaChildGraphNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaEventRecordNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaEventWaitNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDeviceNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDeviceNumaConfigNone", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDeviceNumaConfigNumaNode", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_SPARSE_API_functions.cpp b/src/CUDA2HIP_SPARSE_API_functions.cpp index b2a24b97..70562a19 100644 --- a/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -551,7 +551,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, {"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseXcoo2csr", {"hipsparseXcoo2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcoo2csr", {"hipsparseXcoo2csr", "rocsparse_coo2csr", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseScsc2dense", {"hipsparseScsc2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDcsc2dense", {"hipsparseDcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, @@ -615,10 +615,14 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseChyb2csc", {"hipsparseChyb2csc", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseZhyb2csc", {"hipsparseZhyb2csc", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseShyb2csr", {"hipsparseShyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseDhyb2csr", {"hipsparseDhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseChyb2csr", {"hipsparseChyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseZhyb2csr", {"hipsparseZhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_shyb2csr has one additioanl attribute void* temp_buffer; see hipsparseShyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_shyb2csr + {"cusparseShyb2csr", {"hipsparseShyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_dhyb2csr has one additioanl attribute void* temp_buffer; see hipsparseDhyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_dhyb2csr + {"cusparseDhyb2csr", {"hipsparseDhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_chyb2csr has one additioanl attribute void* temp_buffer; see hipsparseChyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_chyb2csr + {"cusparseChyb2csr", {"hipsparseChyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_zhyb2csr has one additioanl attribute void* temp_buffer; see hipsparseZhyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_zhyb2csr + {"cusparseZhyb2csr", {"hipsparseZhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseShyb2dense", {"hipsparseShyb2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDhyb2dense", {"hipsparseDhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, @@ -630,17 +634,17 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "rocsparse_create_identity_permutation", CONV_LIB_FUNC, API_SPARSE, 14}}, - {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", "rocsparse_coosort_by_row", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", "rocsparse_coosort_by_column", CONV_LIB_FUNC, API_SPARSE, 14}}, - {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseXcsrsort", {"hipsparseXcsrsort", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", "rocsparse_csrsort_buffer_size", CONV_LIB_FUNC, API_SPARSE, 14}}, + {"cusparseXcsrsort", {"hipsparseXcsrsort", "rocsparse_csrsort", CONV_LIB_FUNC, API_SPARSE, 14}}, - {"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseXcscsort", {"hipsparseXcscsort", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", "rocsparse_cscsort_buffer_size", CONV_LIB_FUNC, API_SPARSE, 14}}, + {"cusparseXcscsort", {"hipsparseXcscsort", "rocsparse_cscsort", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, {"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, @@ -698,7 +702,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseHpruneCsr2csrByPercentage", {"hipsparseHpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED}}, {"cusparseSpruneCsr2csrByPercentage", {"hipsparseSpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrByPercentage", {"hipsparseDpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseDpruneCsr2csrByPercentage", {"hipsparseDpruneCsr2csrByPercentage", "rocsparse_dprune_csr2csr_by_percentage", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED}}, {"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, @@ -1895,6 +1899,14 @@ const std::map HIP_SPARSE_FUNCTION_VER_MAP { {"rocsparse_sbsr2csr", {HIP_3100, HIP_0, HIP_0 }}, {"rocsparse_coosort_by_column", {HIP_1090, HIP_0, HIP_0 }}, {"rocsparse_coosort_by_row", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_coosort_buffer_size", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_cscsort", {HIP_2100, HIP_0, HIP_0 }}, + {"rocsparse_cscsort_buffer_size", {HIP_2100, HIP_0, HIP_0 }}, + {"rocsparse_csrsort", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_csrsort_buffer_size", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_create_identity_permutation", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_coo2csr", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_dprune_csr2csr_by_percentage", {HIP_3090, HIP_0, HIP_0 }}, }; const std::map CUDA_SPARSE_API_SECTION_MAP { diff --git a/src/CUDA2HIP_SPARSE_API_types.cpp b/src/CUDA2HIP_SPARSE_API_types.cpp index 3ae22cf2..233912cd 100644 --- a/src/CUDA2HIP_SPARSE_API_types.cpp +++ b/src/CUDA2HIP_SPARSE_API_types.cpp @@ -68,8 +68,8 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP { {"cusparseColorInfo", {"hipsparseColorInfo", "_rocsparse_color_info", CONV_TYPE, API_SPARSE, 4, HIP_UNSUPPORTED}}, {"cusparseColorInfo_t", {"hipsparseColorInfo_t", "rocsparse_color_info", CONV_TYPE, API_SPARSE, 4}}, - {"pruneInfo", {"pruneInfo", "", CONV_TYPE, API_SPARSE, 4, UNSUPPORTED}}, - {"pruneInfo_t", {"pruneInfo_t", "", CONV_TYPE, API_SPARSE, 4, ROC_UNSUPPORTED}}, + {"pruneInfo", {"pruneInfo", "_rocsparse_mat_info", CONV_TYPE, API_SPARSE, 4}}, + {"pruneInfo_t", {"pruneInfo_t", "rocsparse_mat_info", CONV_TYPE, API_SPARSE, 4}}, {"cusparseSpMatDescr", {"hipsparseSpMatDescr", "_rocsparse_spmat_descr", CONV_TYPE, API_SPARSE, 4, HIP_UNSUPPORTED}}, {"cusparseSpMatDescr_t", {"hipsparseSpMatDescr_t", "rocsparse_spmat_descr", CONV_TYPE, API_SPARSE, 4}}, @@ -401,6 +401,7 @@ const std::map HIP_SPARSE_TYPE_NAME_VER_MAP { {"csrilu02Info_t", {HIP_1092, HIP_0, HIP_0 }}, {"bsrilu02Info_t", {HIP_3090, HIP_0, HIP_0 }}, {"csrgemm2Info_t", {HIP_2080, HIP_0, HIP_0 }}, + {"pruneInfo", {HIP_3090, HIP_0, HIP_0 }}, {"pruneInfo_t", {HIP_3090, HIP_0, HIP_0 }}, {"hipsparseAction_t", {HIP_1092, HIP_0, HIP_0 }}, {"HIPSPARSE_ACTION_SYMBOLIC", {HIP_1092, HIP_0, HIP_0 }}, @@ -631,4 +632,6 @@ const std::map HIP_SPARSE_TYPE_NAME_VER_MAP { {"rocsparse_dense_to_sparse_alg_default", {HIP_4010, HIP_0, HIP_0 }}, {"rocsparse_spgemm_alg", {HIP_4010, HIP_0, HIP_0 }}, {"rocsparse_spgemm_alg_default", {HIP_4010, HIP_0, HIP_0 }}, + {"_rocsparse_mat_info", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_mat_info", {HIP_1090, HIP_0, HIP_0 }}, }; diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index 7e252a9b..2c635d58 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -1199,12 +1199,11 @@ class PPCallbackProxy : public clang::PPCallbacks { public: explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {} - // [ToDo] Remove SWDEV_375013 related guards from CMakeLists.txt and HipifyAction.cpp along with the LLVM 16.0.0 official release void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, #if LLVM_VERSION_MAJOR < 15 const clang::FileEntry *file, -#elif (LLVM_VERSION_MAJOR == 15) || (LLVM_VERSION_MAJOR == 16 && SWDEV_375013) +#elif LLVM_VERSION_MAJOR == 15 Optional file, #else clang::OptionalFileEntryRef file, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index fa917ec3..982f1e74 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -208,7 +208,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -228,12 +230,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // NOTE: float CUBLASWINAPI cublasSnrm2(int n, const float* x, int incx) is not supported by HIP @@ -254,20 +260,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: hipComplex** complexAarray = 0; + // CHECK: const hipComplex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: hipComplex** complexBarray = 0; + // CHECK: const hipComplex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: hipComplex** complexCarray = 0; // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1041,13 +1055,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: __half -> hipblasHalf // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount); @@ -1055,13 +1069,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); @@ -1267,63 +1281,63 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); - blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); + blasStatus = cublasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); - blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); + blasStatus = cublasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); - blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); + blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); - blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); + blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); - blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); + blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); - blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); + blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); - blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); + blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); - blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); + blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount); @@ -1528,8 +1542,8 @@ int main() { #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); - // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index dde02d90..21a984d3 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -219,7 +219,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -239,12 +241,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); @@ -267,20 +273,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: hipComplex** complexAarray = 0; + // CHECK: const hipComplex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: hipComplex** complexBarray = 0; + // CHECK: const hipComplex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: hipComplex** complexCarray = 0; // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1188,13 +1202,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: __half -> hipblasHalf // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount); @@ -1202,13 +1216,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* beta, float* CP, int ldc); @@ -1436,63 +1450,63 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); - blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); + blasStatus = cublasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); - blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); + blasStatus = cublasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); - blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); + blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); - blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); + blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); - blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); + blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); - blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); + blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); - blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); + blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); - blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); + blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount); @@ -1697,8 +1711,8 @@ int main() { #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); - // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu index 7a73c9b2..7589371d 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu @@ -235,7 +235,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -255,12 +257,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // TODO: #1281 @@ -283,20 +289,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: rocblas_float_complex** complexAarray = 0; + // CHECK: const rocblas_float_complex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: rocblas_float_complex** complexBarray = 0; + // CHECK: const rocblas_float_complex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: rocblas_float_complex** complexCarray = 0; // CHECK-NEXT: rocblas_float_complex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: rocblas_double_complex** dcomplexAarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: rocblas_double_complex** dcomplexBarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: rocblas_double_complex** dcomplexCarray = 0; // CHECK-NEXT: rocblas_double_complex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1194,14 +1208,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: #1281 // TODO: __half -> rocblas_half @@ -1211,14 +1225,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // TODO: #1281 // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP @@ -1465,26 +1479,26 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); @@ -1651,14 +1665,31 @@ int main() { #if CUDA_VERSION >= 9000 // CHECK: rocblas_gemm_algo BLAS_GEMM_DEFAULT = rocblas_gemm_algo_standard; cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT; + + // CHECK: rocblas_math_mode blasMath; + // CHECK-NEXT: rocblas_math_mode BLAS_DEFAULT_MATH = rocblas_default_math; + // CHECK-NEXT: rocblas_math_mode BLAS_TF32_TENSOR_OP_MATH = rocblas_xf32_xdl_math_op; + cublasMath_t blasMath; + cublasMath_t BLAS_DEFAULT_MATH = CUBLAS_DEFAULT_MATH; + cublasMath_t BLAS_TF32_TENSOR_OP_MATH = CUBLAS_TF32_TENSOR_OP_MATH; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, cublasMath_t* mode); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_math_mode(rocblas_handle handle, rocblas_math_mode* math_mode); + // CHECK: blasStatus = rocblas_get_math_mode(blasHandle, &blasMath); + blasStatus = cublasGetMathMode(blasHandle, &blasMath); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_math_mode(rocblas_handle handle, rocblas_math_mode math_mode); + // CHECK: blasStatus = rocblas_set_math_mode(blasHandle, blasMath); + blasStatus = cublasSetMathMode(blasHandle, blasMath); #endif #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); - // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); @@ -1686,6 +1717,11 @@ int main() { // CHECK-NEXT: rocblas_datatype C_16BF = rocblas_datatype_bf16_c; cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; + + // CHECK: rocblas_computetype blasComputeType; + // CHECK-NEXT: rocblas_computetype BLAS_COMPUTE_32F = rocblas_compute_type_f32; + cublasComputeType_t blasComputeType; + cublasComputeType_t BLAS_COMPUTE_32F = CUBLAS_COMPUTE_32F; #endif #if CUDA_VERSION >= 11040 && CUBLAS_VERSION >= 11600 diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index eab649d8..c03a1f0f 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -243,7 +243,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -263,12 +265,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // TODO: #1281 @@ -293,20 +299,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: rocblas_float_complex** complexAarray = 0; + // CHECK: const rocblas_float_complex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: rocblas_float_complex** complexBarray = 0; + // CHECK: const rocblas_float_complex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: rocblas_float_complex** complexCarray = 0; // CHECK-NEXT: rocblas_float_complex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: rocblas_double_complex** dcomplexAarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: rocblas_double_complex** dcomplexBarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: rocblas_double_complex** dcomplexCarray = 0; // CHECK-NEXT: rocblas_double_complex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1338,14 +1352,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: #1281 // TODO: __half -> rocblas_half @@ -1355,14 +1369,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); @@ -1635,26 +1649,26 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); @@ -1827,8 +1841,8 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); - // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); diff --git a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu index b59e9adc..382bd209 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu @@ -125,6 +125,8 @@ int main() { int nnzb = 0; int innz = 0; int blockDim = 0; + int cscRowIndA = 0; + int cscColPtrA = 0; int csrRowPtrA = 0; int csrColIndA = 0; int ncolors = 0; @@ -157,6 +159,7 @@ int main() { void *indices = nullptr; void *values = nullptr; void *cooRowInd = nullptr; + int icooRowInd = 0; void *cscRowInd = nullptr; void *csrColInd = nullptr; void *cooColInd = nullptr; @@ -185,6 +188,7 @@ int main() { float ffractionToColor = 0.f; double bsrValA = 0.f; double csrValA = 0.f; + float fcsrValA = 0.f; double csrValC = 0.f; float csrSortedValA = 0.f; double dbsrSortedValA = 0.f; @@ -192,6 +196,9 @@ int main() { float fbsrSortedValA = 0.f; float fbsrSortedValC = 0.f; float fcsrSortedValC = 0.f; + double percentage = 0.f; + + pruneInfo_t prune_info; // CHECK: hipDoubleComplex dcomplex, dComplexbsrSortedValA, dComplexbsrSortedValC; cuDoubleComplex dcomplex, dComplexbsrSortedValA, dComplexbsrSortedValC; @@ -412,6 +419,41 @@ int main() { // CHECK: status_t = hipsparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); status_t = cusparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cooRowsA, const int* cooColsA, size_t* pBufferSizeInBytes); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcoosort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* cooRows, const int* cooCols, size_t* pBufferSizeInBytes); + // CHECK: status_t = hipsparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + status_t = cusparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* cscColPtrA, int* cscRowIndA, int* P, void* pBuffer); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcscsort(hipsparseHandle_t handle, int m, int n, int nnz, const hipsparseMatDescr_t descrA, const int* cscColPtr, int* cscRowInd, int* P, void* pBuffer); + // CHECK: status_t = hipsparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + status_t = cusparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cscColPtrA, const int* cscRowIndA, size_t* pBufferSizeInBytes); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcscsort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* cscColPtr, const int* cscRowInd, size_t* pBufferSizeInBytes); + // CHECK: status_t = hipsparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + status_t = cusparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* csrRowPtrA, int* csrColIndA, int* P, void* pBuffer); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcsrsort(hipsparseHandle_t handle, int m, int n, int nnz, const hipsparseMatDescr_t descrA, const int* csrRowPtr, int* csrColInd, int* P, void* pBuffer); + // CHECK: status_t = hipsparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + status_t = cusparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtrA, const int* csrColIndA, size_t* pBufferSizeInBytes); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcsrsort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtr, const int* csrColInd, size_t* pBufferSizeInBytes); + // CHECK: status_t = hipsparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + status_t = cusparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int* p); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCreateIdentityPermutation(hipsparseHandle_t handle, int n, int* p); + // CHECK: status_t = hipsparseCreateIdentityPermutation(handle_t, n, P); + status_t = cusparseCreateIdentityPermutation(handle_t, n, P); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrSortedRowPtr, cusparseIndexBase_t idxBase); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcoo2csr(hipsparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrRowPtr, hipsparseIndexBase_t idxBase); + // CHECK: status_t = hipsparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + status_t = cusparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + #if CUDA_VERSION >= 8000 // CHECK: hipDataType dataType_t; // CHECK-NEXT: hipDataType dataType; @@ -419,6 +461,13 @@ int main() { cudaDataType dataType; #endif +#if CUDA_VERSION >= 9000 + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(cusparseHandle_t handle, int m, int n, int nnzA, const cusparseMatDescr_t descrA, const double* csrSortedValA, const int* csrSortedRowPtrA, const int* csrSortedColIndA, float percentage, const cusparseMatDescr_t descrC, double* csrSortedValC, const int* csrSortedRowPtrC, int* csrSortedColIndC, pruneInfo_t info, void* pBuffer); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDpruneCsr2csrByPercentage(hipsparseHandle_t handle, int m, int n, int nnzA, const hipsparseMatDescr_t descrA, const double* csrValA, const int* csrRowPtrA, const int* csrColIndA, double percentage, const hipsparseMatDescr_t descrC, double* csrValC, const int* csrRowPtrC, int* csrColIndC, pruneInfo_t info, void* buffer); + // CHECK: status_t = hipsparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); + status_t = cusparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); +#endif + #if CUDA_VERSION >= 8000 && CUDA_VERSION < 12000 // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src); // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCopyMatDescr(hipsparseMatDescr_t dest, const hipsparseMatDescr_t src); @@ -426,7 +475,7 @@ int main() { status_t = cusparseCopyMatDescr(matDescr_t, matDescr_t_2); #endif -#if CUDA_VERSION >= 10010 +#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: hipsparseSpMatDescr_t spMatDescr_t, matC; cusparseSpMatDescr_t spMatDescr_t, matC; @@ -471,11 +520,6 @@ int main() { // CHECK: hipsparseSpMMAlg_t spMMAlg_t; cusparseSpMMAlg_t spMMAlg_t; - // CHECK: hipsparseCsr2CscAlg_t Csr2CscAlg_t; - // CHECK-NEXT: hipsparseCsr2CscAlg_t CSR2CSC_ALG1 = HIPSPARSE_CSR2CSC_ALG1; - cusparseCsr2CscAlg_t Csr2CscAlg_t; - cusparseCsr2CscAlg_t CSR2CSC_ALG1 = CUSPARSE_CSR2CSC_ALG1; - // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t* spMatDescr, int64_t ows, int64_t cols, int64_t nnz, void* cooRowInd, void* cooColInd, void* cooValues, cusparseIndexType_t cooIdxType, cusparseIndexBase_t idxBase, cudaDataType valueType); // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCreateCoo(hipsparseSpMatDescr_t* spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void* cooRowInd, void* cooColInd, void* cooValues, hipsparseIndexType_t cooIdxType, hipsparseIndexBase_t idxBase, hipDataType valueType); // CHECK: status_t = hipsparseCreateCoo(&spMatDescr_t, rows, cols, nnz, cooRowInd, cooColInd, cooValues, indexType_t, indexBase_t, dataType); @@ -527,28 +571,34 @@ int main() { status_t = cusparseDnMatSetStridedBatch(dnMatDescr_t, batchCount, batchStride); #endif -#if CUDA_VERSION >= 10010 && CUDA_VERSION < 12000 +#if CUDA_VERSION >= 10010 + // CHECK: hipsparseCsr2CscAlg_t Csr2CscAlg_t; + // CHECK-NEXT: hipsparseCsr2CscAlg_t CSR2CSC_ALG1 = HIPSPARSE_CSR2CSC_ALG1; + cusparseCsr2CscAlg_t Csr2CscAlg_t; + cusparseCsr2CscAlg_t CSR2CSC_ALG1 = CUSPARSE_CSR2CSC_ALG1; +#endif + +#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) // CHECK: hipsparseSpMMAlg_t COOMM_ALG1 = HIPSPARSE_COOMM_ALG1; // CHECK-NEXT: hipsparseSpMMAlg_t COOMM_ALG2 = HIPSPARSE_COOMM_ALG2; // CHECK-NEXT: hipsparseSpMMAlg_t COOMM_ALG3 = HIPSPARSE_COOMM_ALG3; cusparseSpMMAlg_t COOMM_ALG1 = CUSPARSE_COOMM_ALG1; cusparseSpMMAlg_t COOMM_ALG2 = CUSPARSE_COOMM_ALG2; cusparseSpMMAlg_t COOMM_ALG3 = CUSPARSE_COOMM_ALG3; +#endif +#if CUDA_VERSION >= 10010 && CUDA_VERSION < 12000 // CHECK: hipsparseCsr2CscAlg_t CSR2CSC_ALG2 = HIPSPARSE_CSR2CSC_ALG2; cusparseCsr2CscAlg_t CSR2CSC_ALG2 = CUSPARSE_CSR2CSC_ALG2; #endif -#if CUDA_VERSION >= 10020 +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: hipsparseSpVecDescr_t spVecDescr_t; cusparseSpVecDescr_t spVecDescr_t; // CHECK: hipsparseDnVecDescr_t dnVecDescr_t, vecX, vecY; cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY; - // CHECK: hipsparseStatus_t STATUS_NOT_SUPPORTED = HIPSPARSE_STATUS_NOT_SUPPORTED; - cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; - // CHECK: hipsparseSpMVAlg_t spMVAlg_t; cusparseSpMVAlg_t spMVAlg_t; @@ -648,7 +698,12 @@ int main() { status_t = cusparseSpMV(handle_t, opA, alpha, spMatDescr_t, vecX, beta, vecY, dataType, spMVAlg_t, tempBuffer); #endif -#if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000 +#if CUDA_VERSION >= 10020 + // CHECK: hipsparseStatus_t STATUS_NOT_SUPPORTED = HIPSPARSE_STATUS_NOT_SUPPORTED; + cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; +#endif + +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) // CHECK: hipsparseFormat_t FORMAT_COO_AOS = HIPSPARSE_FORMAT_COO_AOS; cusparseFormat_t FORMAT_COO_AOS = CUSPARSE_FORMAT_COO_AOS; @@ -772,6 +827,26 @@ int main() { // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDestroyHybMat(hipsparseHybMat_t hybA); // CHECK: status_t = hipsparseDestroyHybMat(hybMat_t); status_t = cusparseDestroyHybMat(hybMat_t); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, cuDoubleComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseZhyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, hipDoubleComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseZhyb2csr(handle_t, matDescr_t, hybMat_t, &dComplexbsrSortedValA, &csrRowPtrA, &csrColIndA); + status_t = cusparseZhyb2csr(handle_t, matDescr_t, hybMat_t, &dComplexbsrSortedValA, &csrRowPtrA, &csrColIndA); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, cuComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseChyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, hipComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseChyb2csr(handle_t, matDescr_t, hybMat_t, &complex, &csrRowPtrA, &csrColIndA); + status_t = cusparseChyb2csr(handle_t, matDescr_t, hybMat_t, &complex, &csrRowPtrA, &csrColIndA); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, double* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDhyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, double* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseDhyb2csr(handle_t, matDescr_t, hybMat_t, &csrValA, &csrRowPtrA, &csrColIndA); + status_t = cusparseDhyb2csr(handle_t, matDescr_t, hybMat_t, &csrValA, &csrRowPtrA, &csrColIndA); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, float* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseShyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, float* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseShyb2csr(handle_t, matDescr_t, hybMat_t, &fcsrValA, &csrRowPtrA, &csrColIndA); + status_t = cusparseShyb2csr(handle_t, matDescr_t, hybMat_t, &fcsrValA, &csrRowPtrA, &csrColIndA); #endif #if CUDA_VERSION >= 11010 && CUSPARSE_VERSION >= 11300 diff --git a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu index 14a3dc10..9c484183 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu @@ -125,6 +125,8 @@ int main() { int nnzb = 0; int innz = 0; int blockDim = 0; + int cscRowIndA = 0; + int cscColPtrA = 0; int csrRowPtrA = 0; int csrColIndA = 0; int ncolors = 0; @@ -157,6 +159,7 @@ int main() { void *indices = nullptr; void *values = nullptr; void *cooRowInd = nullptr; + int icooRowInd = 0; void *cscRowInd = nullptr; void *csrColInd = nullptr; void *cooColInd = nullptr; @@ -192,6 +195,10 @@ int main() { float fbsrSortedValA = 0.f; float fbsrSortedValC = 0.f; float fcsrSortedValC = 0.f; + double percentage = 0.f; + + // CHECK: rocsparse_mat_info prune_info; + pruneInfo_t prune_info; // TODO: should be rocsparse_double_complex // TODO: add to TypeOverloads cuDoubleComplex -> rocsparse_double_complex under a new option --sparse @@ -416,6 +423,41 @@ int main() { // CHECK: status_t = rocsparse_coosort_by_row(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); status_t = cusparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cooRowsA, const int* cooColsA, size_t* pBufferSizeInBytes); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_coosort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, size_t* buffer_size); + // CHECK: status_t = rocsparse_coosort_buffer_size(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + status_t = cusparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* cscColPtrA, int* cscRowIndA, int* P, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cscsort(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_mat_descr descr, const rocsparse_int* csc_col_ptr, rocsparse_int* csc_row_ind, rocsparse_int* perm, void* temp_buffer); + // CHECK: status_t = rocsparse_cscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + status_t = cusparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cscColPtrA, const int* cscRowIndA, size_t* pBufferSizeInBytes); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cscsort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csc_col_ptr, const rocsparse_int* csc_row_ind, size_t* buffer_size); + // CHECK: status_t = rocsparse_cscsort_buffer_size(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + status_t = cusparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* csrRowPtrA, int* csrColIndA, int* P, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csrsort(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_mat_descr descr, const rocsparse_int* csr_row_ptr, rocsparse_int* csr_col_ind, rocsparse_int* perm, void* temp_buffer); + // CHECK: status_t = rocsparse_csrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + status_t = cusparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtrA, const int* csrColIndA, size_t* pBufferSizeInBytes); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csrsort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, size_t* buffer_size); + // CHECK: status_t = rocsparse_csrsort_buffer_size(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + status_t = cusparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int* p); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_create_identity_permutation(rocsparse_handle handle, rocsparse_int n, rocsparse_int* p); + // CHECK: status_t = rocsparse_create_identity_permutation(handle_t, n, P); + status_t = cusparseCreateIdentityPermutation(handle_t, n, P); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrSortedRowPtr, cusparseIndexBase_t idxBase); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_coo2csr(rocsparse_handle handle, const rocsparse_int* coo_row_ind, rocsparse_int nnz, rocsparse_int m, rocsparse_int* csr_row_ptr, rocsparse_index_base idx_base); + // CHECK: status_t = rocsparse_coo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + status_t = cusparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + #if CUDA_VERSION >= 8000 // CHECK: hipDataType dataType_t; // TODO: [#899] There should be rocsparse_datatype @@ -424,6 +466,13 @@ int main() { cudaDataType dataType; #endif +#if CUDA_VERSION >= 9000 + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(cusparseHandle_t handle, int m, int n, int nnzA, const cusparseMatDescr_t descrA, const double* csrSortedValA, const int* csrSortedRowPtrA, const int* csrSortedColIndA, float percentage, const cusparseMatDescr_t descrC, double* csrSortedValC, const int* csrSortedRowPtrC, int* csrSortedColIndC, pruneInfo_t info, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_dprune_csr2csr_by_percentage(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz_A, const rocsparse_mat_descr csr_descr_A, const double* csr_val_A, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, double percentage, const rocsparse_mat_descr csr_descr_C, double* csr_val_C, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, rocsparse_mat_info info, void* temp_buffer); + // CHECK: status_t = rocsparse_dprune_csr2csr_by_percentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); + status_t = cusparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); +#endif + #if CUDA_VERSION >= 8000 && CUDA_VERSION < 12000 // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src); // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_copy_mat_descr(rocsparse_mat_descr dest, const rocsparse_mat_descr src); @@ -431,7 +480,7 @@ int main() { status_t = cusparseCopyMatDescr(matDescr_t, matDescr_t_2); #endif -#if CUDA_VERSION >= 10010 +#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: _rocsparse_spmat_descr *spMatDescr = nullptr; // CHECK-NEXT: rocsparse_spmat_descr spMatDescr_t, matC; cusparseSpMatDescr *spMatDescr = nullptr; @@ -531,7 +580,7 @@ int main() { status_t = cusparseDnMatSetStridedBatch(dnMatDescr_t, batchCount, batchStride); #endif -#if CUDA_VERSION >= 10020 +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: _rocsparse_spvec_descr *spVecDescr = nullptr; // CHECK-NEXT: rocsparse_spvec_descr spVecDescr_t; cusparseSpVecDescr *spVecDescr = nullptr; @@ -542,9 +591,6 @@ int main() { cusparseDnVecDescr *dnVecDescr = nullptr; cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY; - // CHECK: rocsparse_status STATUS_NOT_SUPPORTED = rocsparse_status_not_implemented; - cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; - // CHECK: rocsparse_spmv_alg spMVAlg_t; cusparseSpMVAlg_t spMVAlg_t; @@ -644,7 +690,12 @@ int main() { status_t = cusparseSpMV(handle_t, opA, alpha, spMatDescr_t, vecX, beta, vecY, dataType, spMVAlg_t, tempBuffer); #endif -#if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000 +#if CUDA_VERSION >= 10020 + // CHECK: rocsparse_status STATUS_NOT_SUPPORTED = rocsparse_status_not_implemented; + cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; +#endif + +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) // CHECK: rocsparse_format FORMAT_COO_AOS = rocsparse_format_coo_aos; cusparseFormat_t FORMAT_COO_AOS = CUSPARSE_FORMAT_COO_AOS;
12.1.112.2.0 LATEST STABLE CONFIG
17.0.0git12.1.112.2.0 + +