diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e5a8b87..d3ae1564 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,22 @@
 
 Full documentation for HIPIFY is available at [hipify.readthedocs.io](https://hipify.readthedocs.io/en/latest/).
 
+## HIPIFY for ROCm 5.7.0
+### Added
+- CUDA 12.2.0 support
+- cuDNN 8.9.2 support
+- LLVM 16.0.6 support
+- Initial rocSPARSE support
+- Initial CUDA2ROC documentation generation for rocBLAS, rocSPARSE, and MIOpen:
+  - in separate files: hipify-clang --md --doc-format=full --doc-roc=separate
+  - in a single file: hipify-clang --md --doc-format=full --doc-roc=joint
+- New options:
+  - --use-hip-data-types (Use 'hipDataType' instead of 'hipblasDatatype_t' or 'rocblas_datatype')
+  - --doc-roc=\<value\> (ROC documentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified)
+### Fixed
+- [#822] Add a new function call transformation type "additional const by value arg"
+- [#830] Add a new function call transformation type "move arg from place X to place Y"
+
 ## HIPIFY for ROCm 5.6.0
 ### Added
 - CUDA 12.1.0 support
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9ec50dc6..ce188b75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,11 +5,11 @@ project(hipify-clang)
 include(GNUInstallDirs)
 
 option(HIPIFY_INCLUDE_IN_HIP_SDK "Include HIPIFY in HIP SDK" OFF)
+
 if(HIPIFY_INCLUDE_IN_HIP_SDK)
   if(NOT WIN32)
     message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is only supported on Windows")
-  endif()
-  if(CMAKE_GENERATOR MATCHES "Visual Studio")
+  elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
     message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is not targeting Visual Studio")
   endif()
 else()
@@ -28,48 +28,47 @@ list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR})
 include(AddLLVM)
 
 if (NOT HIPIFY_CLANG_TESTS_ONLY)
-
-if(MSVC AND MSVC_VERSION VERSION_LESS "1900")
+  if(MSVC AND MSVC_VERSION VERSION_LESS "1900")
     message(SEND_ERROR "hipify-clang could be built by Visual Studio 14 2015 or higher.")
     return()
-endif()
+  endif()
 
-include_directories(${LLVM_INCLUDE_DIRS})
-add_definitions(${LLVM_DEFINITIONS})
+  include_directories(${LLVM_INCLUDE_DIRS})
+  add_definitions(${LLVM_DEFINITIONS})
 
-file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp)
-file(GLOB_RECURSE HIPIFY_HEADERS src/*.h)
-add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS})
-target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS})
+  file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp)
+  file(GLOB_RECURSE HIPIFY_HEADERS src/*.h)
 
-if(HIPIFY_INCLUDE_IN_HIP_SDK)
-  if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
-    message(FATAL_ERROR "In order to include HIPIFY in HIP SDK, HIPIFY needs to be built with LLVM_EXTERNAL_PROJECTS")
-  endif()
+  add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS})
+  target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS})
 
-  # Need to add clang include directories explicitly if
-  # building as part of llvm.
-  if(LLVM_EXTERNAL_CLANG_SOURCE_DIR)
-    target_include_directories(hipify-clang
-      PRIVATE
-        ${LLVM_BINARY_DIR}/tools/clang/include
-        ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include)
-  endif()
-  # Need to add lld include directories explicitly if
-  # building as part of llvm.
-  if(LLVM_EXTERNAL_LLD_SOURCE_DIR)
-    target_include_directories(hipify-clang
-      PRIVATE
-        ${LLVM_BINARY_DIR}/tools/lld/include
-        ${LLVM_EXTERNAL_LLD_SOURCE_DIR}/include)
+  if(HIPIFY_INCLUDE_IN_HIP_SDK)
+    if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+      message(FATAL_ERROR "In order to include HIPIFY in HIP SDK, HIPIFY needs to be built with LLVM_EXTERNAL_PROJECTS")
+    endif()
+
+    # Need to add clang include directories explicitly if building as part of llvm.
+    if(LLVM_EXTERNAL_CLANG_SOURCE_DIR)
+      target_include_directories(hipify-clang
+        PRIVATE
+          ${LLVM_BINARY_DIR}/tools/clang/include
+          ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include)
+    endif()
+
+    # Need to add lld include directories explicitly if building as part of llvm.
+    if(LLVM_EXTERNAL_LLD_SOURCE_DIR)
+      target_include_directories(hipify-clang
+        PRIVATE
+          ${LLVM_BINARY_DIR}/tools/lld/include
+          ${LLVM_EXTERNAL_LLD_SOURCE_DIR}/include)
+    endif()
+  else()
+    set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++)
+    set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang)
   endif()
-else()
-  set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++)
-  set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang)
-endif()
 
-# Link against LLVM and CLANG libraries
-target_link_libraries(hipify-clang PRIVATE
+  # Link against LLVM and CLANG libraries.
+  target_link_libraries(hipify-clang PRIVATE
     clangASTMatchers
     clangFrontend
     clangTooling
@@ -93,87 +92,80 @@ target_link_libraries(hipify-clang PRIVATE
     LLVMOption
     LLVMCore)
 
-if(LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1")
+  if(LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1")
     target_link_libraries(hipify-clang PRIVATE clangToolingInclusions)
-endif()
+  endif()
 
-if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1")
+  if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1")
     target_link_libraries(hipify-clang PRIVATE LLVMFrontendOpenMP)
-endif()
+  endif()
 
-if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0")
+  if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0")
     target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver clangSupport)
-endif()
+  endif()
 
-if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0")
+  if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0")
     if(MSVC)
-        set(STD "/std:c++17")
+      set(STD "/std:c++17")
     else()
-        set(STD "-std=c++17")
+      set(STD "-std=c++17")
     endif()
-else()
+  else()
     if(MSVC)
-        set(STD "/std:c++14")
+      set(STD "/std:c++14")
     else()
-        set(STD "-std=c++14")
+      set(STD "-std=c++14")
     endif()
-endif()
-
-# [ToDo] Remove SWDEV_375013 related guards from CMakeLists.txt and HipifyAction.cpp along with the LLVM 16.0.0 official release
-option (SWDEV_375013 "Enables SWDEV-375013 blocker workaround for the clang's change https://reviews.llvm.org/D140332" OFF)
-if(SWDEV_375013)
-    add_definitions(-DSWDEV_375013)
-endif()
+  endif()
 
-if(MSVC)
+  if(MSVC)
     target_link_libraries(hipify-clang PRIVATE version)
     target_compile_options(hipify-clang PRIVATE ${STD} /Od /GR- /EHs- /EHc-)
     set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /SUBSYSTEM:WINDOWS")
-else()
+  else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STD} -pthread -fno-rtti -fvisibility-inlines-hidden")
-endif()
+  endif()
 
-# Address Sanitize Flag
-if(ADDRESS_SANITIZER)
+  # Address Sanitize Flag.
+  if(ADDRESS_SANITIZER)
     set(addr_var -fsanitize=address)
-else()
+  else()
     set(addr_var )
-endif()
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}")
+  endif()
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}")
 
-# [ToDo] Remove D125860 related guards from CMakeLists.txt with the LLVM 16.0.0 official release
-option (D125860 "Enables treating clang's resource dir as lib/clang/X.Y.Z, as it was before clang's change D125860, merged as e1b88c8a09be25b86b13f98755a9bd744b4dbf14" OFF)
-if(D125860)
+  # [ToDo] Remove D125860 related guards from CMakeLists.txt with the LLVM 16.0.0 official release.
+  option (D125860 "Enables treating clang's resource dir as lib/clang/X.Y.Z, as it was before clang's change D125860, merged as e1b88c8a09be25b86b13f98755a9bd744b4dbf14" OFF)
+  if(D125860)
     add_definitions(-D125860)
-endif()
-if((LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") AND (NOT D125860))
+  endif()
+  if((LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") AND (NOT D125860))
     set(LIB_CLANG_RES ${LLVM_VERSION_MAJOR})
-else()
+  else()
     set(LIB_CLANG_RES ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH})
-endif()
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}\\\" ${addr_var}")
+  endif()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}\\\" ${addr_var}")
 
-set(INSTALL_PATH_DOC_STRING "hipify-clang Installation Path")
-if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  set(INSTALL_PATH_DOC_STRING "hipify-clang Installation Path")
+  if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
     set(CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/dist" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE)
-endif()
+  endif()
 
-set(HIPIFY_BIN_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/bin")
+  set(HIPIFY_BIN_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/bin")
 
-install(TARGETS hipify-clang DESTINATION bin)
-# install bin directory in CMAKE_INSTALL_PREFIX path
-install(
+  install(TARGETS hipify-clang DESTINATION bin)
+  # Install bin directory in CMAKE_INSTALL_PREFIX path.
+  install(
     DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin
     DESTINATION .
     USE_SOURCE_PERMISSIONS
     PATTERN "hipify-perl"
     PATTERN "*.sh")
 
-# Headers are already included in HIP SDK, so skip those if including
-# HIPIFY in HIP SDK.
-if(NOT HIPIFY_INCLUDE_IN_HIP_SDK)
-  # install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path
-  install(
+  # Headers are already included in HIP SDK, so skip those if including HIPIFY in HIP SDK.
+  if(NOT HIPIFY_INCLUDE_IN_HIP_SDK)
+    # Install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path.
+    install(
       DIRECTORY ${LLVM_DIR}/../../clang/${LIB_CLANG_RES}/
       DESTINATION .
       COMPONENT clang-resource-headers
@@ -185,7 +177,7 @@ if(NOT HIPIFY_INCLUDE_IN_HIP_SDK)
       PATTERN "new"
       PATTERN "ppc_wrappers" EXCLUDE
       PATTERN "openmp_wrappers" EXCLUDE)
-endif()
+  endif()
 
 # install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path
 install(
@@ -201,97 +193,99 @@ install(
     PATTERN "ppc_wrappers" EXCLUDE
     PATTERN "openmp_wrappers" EXCLUDE)
 
-option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
-
-if(UNIX)
-
-    #get rid of any RPATH definations already
+  if(UNIX)
+    # Get rid of any RPATH definations already.
     set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "")
-    #set RPATH for the binary
-    set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--disable-new-dtags -Wl,--rpath,$ORIGIN/../lib" )
+    # Set RPATH for the binary.
+    set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--enable-new-dtags -Wl,--rpath,$ORIGIN/../lib" )
+
+    option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
 
     if(FILE_REORG_BACKWARD_COMPATIBILITY)
-        include(hipify-backward-compat.cmake)
+      include(hipify-backward-compat.cmake)
     endif()
+
     set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm" CACHE PATH "HIP Package Installation Path")
     set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hipify-clang)
+
     configure_file(packaging/hipify-clang.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
     configure_file(${CMAKE_SOURCE_DIR}/LICENSE.txt ${BUILD_DIR}/LICENSE.txt @ONLY)
 
-    add_custom_target(package_hipify-clang  COMMAND ${CMAKE_COMMAND} .
-        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
-        COMMAND rm -rf *.deb *.rpm *.tar.gz
-        COMMAND make package
-        COMMAND cp *.deb ${PROJECT_BINARY_DIR}
-        COMMAND cp *.rpm ${PROJECT_BINARY_DIR}
-        COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
-        WORKING_DIRECTORY ${BUILD_DIR})
-endif()
+    add_custom_target(package_hipify-clang COMMAND ${CMAKE_COMMAND} .
+      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+      COMMAND rm -rf *.deb *.rpm *.tar.gz
+      COMMAND make package
+      COMMAND cp *.deb ${PROJECT_BINARY_DIR}
+      COMMAND cp *.rpm ${PROJECT_BINARY_DIR}
+      COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
+      WORKING_DIRECTORY ${BUILD_DIR})
+  endif()
 
-endif()
+endif() # if (NOT HIPIFY_CLANG_TESTS_ONLY)
 
 if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY)
-    find_package(PythonInterp 2.7 REQUIRED)
-
-    function (require_program PROGRAM_NAME)
-        find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME})
-        if(FOUND_${PROGRAM_NAME})
-            message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}")
-        else()
-            message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS(_ONLY) to OFF to disable HIPIFY tests, or install the missing program.")
-        endif()
-    endfunction()
-
-    require_program(lit)
-    require_program(FileCheck)
-
-    find_package(CUDA REQUIRED)
-    if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR
-       (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR
-       (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR
-       (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR
-       (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR
-       (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR
-       (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0"))
-        message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.")
-        if(CUDA_VERSION_MAJOR VERSION_LESS "7")
-            message(STATUS "Please install CUDA 7.0 or higher.")
-        elseif(CUDA_VERSION_MAJOR VERSION_LESS "8")
-            message(STATUS "Please install LLVM + clang 3.8 or higher.")
-        elseif(CUDA_VERSION_MAJOR VERSION_LESS "9")
-            message(STATUS "Please install LLVM + clang 4.0 or higher.")
-        elseif(CUDA_VERSION VERSION_EQUAL "9.0")
-            message(STATUS "Please install LLVM + clang 6.0 or higher.")
-        elseif(CUDA_VERSION_MAJOR VERSION_LESS "10")
-            message(STATUS "Please install LLVM + clang 7.0 or higher.")
-        elseif(CUDA_VERSION VERSION_EQUAL "10.0")
-            message(STATUS "Please install LLVM + clang 8.0 or higher.")
-        elseif(CUDA_VERSION VERSION_EQUAL "10.1")
-            message(STATUS "Please install LLVM + clang 9.0 or higher.")
-        elseif(CUDA_VERSION VERSION_EQUAL "10.2" OR CUDA_VERSION VERSION_EQUAL "11.0")
-            message(STATUS "Please install LLVM + clang 10.0 or higher.")
-        endif()
+  find_package(PythonInterp 2.7 REQUIRED)
+
+  function (require_program PROGRAM_NAME)
+    find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME})
+    if(FOUND_${PROGRAM_NAME})
+       message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}")
+    else()
+       message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS(_ONLY) to OFF to disable HIPIFY tests, or install the missing program.")
     endif()
+  endfunction()
+
+  require_program(lit)
+  require_program(FileCheck)
+
+  find_package(CUDA REQUIRED)
+  if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR
+     (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR
+     (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR
+     (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR
+     (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR
+     (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR
+     (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0"))
+    message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.")
+    if(CUDA_VERSION_MAJOR VERSION_LESS "7")
+      message(STATUS "Please install CUDA 7.0 or higher.")
+    elseif(CUDA_VERSION_MAJOR VERSION_LESS "8")
+      message(STATUS "Please install LLVM + clang 3.8 or higher.")
+    elseif(CUDA_VERSION_MAJOR VERSION_LESS "9")
+      message(STATUS "Please install LLVM + clang 4.0 or higher.")
+    elseif(CUDA_VERSION VERSION_EQUAL "9.0")
+      message(STATUS "Please install LLVM + clang 6.0 or higher.")
+    elseif(CUDA_VERSION_MAJOR VERSION_LESS "10")
+      message(STATUS "Please install LLVM + clang 7.0 or higher.")
+    elseif(CUDA_VERSION VERSION_EQUAL "10.0")
+      message(STATUS "Please install LLVM + clang 8.0 or higher.")
+    elseif(CUDA_VERSION VERSION_EQUAL "10.1")
+      message(STATUS "Please install LLVM + clang 9.0 or higher.")
+    elseif(CUDA_VERSION VERSION_EQUAL "10.2" OR CUDA_VERSION VERSION_EQUAL "11.0")
+      message(STATUS "Please install LLVM + clang 10.0 or higher.")
+    endif()
+  endif()
 
-    configure_file(
-        ${CMAKE_CURRENT_LIST_DIR}/tests/lit.site.cfg.in
-        ${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg
-        @ONLY)
+  configure_file(
+    ${CMAKE_CURRENT_LIST_DIR}/tests/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg
+    @ONLY)
 
-if(HIPIFY_CLANG_TESTS_ONLY)
+  if(HIPIFY_CLANG_TESTS_ONLY)
     add_lit_testsuite(test-hipify "Running HIPIFY regression tests"
-        ${CMAKE_CURRENT_LIST_DIR}/tests
-        PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg
-        ARGS -v)
-else()
+      ${CMAKE_CURRENT_LIST_DIR}/tests
+      PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg
+      ARGS -v)
+  else()
     add_lit_testsuite(test-hipify "Running HIPIFY regression tests"
-        ${CMAKE_CURRENT_LIST_DIR}/tests
-        PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg
-        ARGS -v
-        DEPENDS hipify-clang)
-endif()
+      ${CMAKE_CURRENT_LIST_DIR}/tests
+      PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg
+      ARGS -v
+      DEPENDS hipify-clang)
+  endif()
 
-    add_custom_target(test-hipify-clang)
-    add_dependencies(test-hipify-clang test-hipify)
-    set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests")
-endif()
+  add_custom_target(test-hipify-clang)
+  add_dependencies(test-hipify-clang test-hipify)
+  set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests")
+
+endif() # if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY)
diff --git a/bin/hipify-perl b/bin/hipify-perl
index a066031b..b090469f 100755
--- a/bin/hipify-perl
+++ b/bin/hipify-perl
@@ -1173,6 +1173,7 @@ sub rocSubstitutions {
     subst("cublasGemmEx", "rocblas_gemm_ex", "library");
     subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library");
     subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library");
+    subst("cublasGetMathMode", "rocblas_get_math_mode", "library");
     subst("cublasGetMatrix", "rocblas_get_matrix", "library");
     subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library");
     subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library");
@@ -1219,6 +1220,7 @@ sub rocSubstitutions {
     subst("cublasSdot", "rocblas_sdot", "library");
     subst("cublasSdot_v2", "rocblas_sdot", "library");
     subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library");
+    subst("cublasSetMathMode", "rocblas_set_math_mode", "library");
     subst("cublasSetMatrix", "rocblas_set_matrix", "library");
     subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library");
     subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library");
@@ -1476,6 +1478,7 @@ sub rocSubstitutions {
     subst("cusparseCreateDnMat", "rocsparse_create_dnmat_descr", "library");
     subst("cusparseCreateDnVec", "rocsparse_create_dnvec_descr", "library");
     subst("cusparseCreateHybMat", "rocsparse_create_hyb_mat", "library");
+    subst("cusparseCreateIdentityPermutation", "rocsparse_create_identity_permutation", "library");
     subst("cusparseCreateMatDescr", "rocsparse_create_mat_descr", "library");
     subst("cusparseCreateSpVec", "rocsparse_create_spvec_descr", "library");
     subst("cusparseCscSetPointers", "rocsparse_csc_set_pointers", "library");
@@ -1503,6 +1506,7 @@ sub rocSubstitutions {
     subst("cusparseDnVecGet", "rocsparse_dnvec_get", "library");
     subst("cusparseDnVecGetValues", "rocsparse_dnvec_get_values", "library");
     subst("cusparseDnVecSetValues", "rocsparse_dnvec_set_values", "library");
+    subst("cusparseDpruneCsr2csrByPercentage", "rocsparse_dprune_csr2csr_by_percentage", "library");
     subst("cusparseGather", "rocsparse_gather", "library");
     subst("cusparseGetMatDiagType", "rocsparse_get_mat_diag_type", "library");
     subst("cusparseGetMatFillMode", "rocsparse_get_mat_fill_mode", "library");
@@ -1541,8 +1545,14 @@ sub rocSubstitutions {
     subst("cusparseSpVecGetIndexBase", "rocsparse_spvec_get_index_base", "library");
     subst("cusparseSpVecGetValues", "rocsparse_spvec_get_values", "library");
     subst("cusparseSpVecSetValues", "rocsparse_spvec_set_values", "library");
+    subst("cusparseXcoo2csr", "rocsparse_coo2csr", "library");
     subst("cusparseXcoosortByColumn", "rocsparse_coosort_by_column", "library");
     subst("cusparseXcoosortByRow", "rocsparse_coosort_by_row", "library");
+    subst("cusparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", "library");
+    subst("cusparseXcscsort", "rocsparse_cscsort", "library");
+    subst("cusparseXcscsort_bufferSizeExt", "rocsparse_cscsort_buffer_size", "library");
+    subst("cusparseXcsrsort", "rocsparse_csrsort", "library");
+    subst("cusparseXcsrsort_bufferSizeExt", "rocsparse_csrsort_buffer_size", "library");
     subst("cusparseXgebsr2gebsrNnz", "rocsparse_gebsr2gebsr_nnz", "library");
     subst("cusparseZbsr2csr", "rocsparse_zbsr2csr", "library");
     subst("cusparseZcsrcolor", "rocsparse_zcsrcolor", "library");
@@ -1555,12 +1565,14 @@ sub rocSubstitutions {
     subst("cuDoubleComplex", "rocblas_double_complex", "type");
     subst("cuFloatComplex", "rocblas_float_complex", "type");
     subst("cublasAtomicsMode_t", "rocblas_atomics_mode", "type");
+    subst("cublasComputeType_t", "rocblas_computetype", "type");
     subst("cublasContext", "_rocblas_handle", "type");
     subst("cublasDataType_t", "rocblas_datatype", "type");
     subst("cublasDiagType_t", "rocblas_diagonal", "type");
     subst("cublasFillMode_t", "rocblas_fill", "type");
     subst("cublasGemmAlgo_t", "rocblas_gemm_algo", "type");
     subst("cublasHandle_t", "rocblas_handle", "type");
+    subst("cublasMath_t", "rocblas_math_mode", "type");
     subst("cublasOperation_t", "rocblas_operation", "type");
     subst("cublasPointerMode_t", "rocblas_pointer_mode", "type");
     subst("cublasSideMode_t", "rocblas_side", "type");
@@ -1644,8 +1656,12 @@ sub rocSubstitutions {
     subst("cusparseSpVecDescr_t", "rocsparse_spvec_descr", "type");
     subst("cusparseSparseToDenseAlg_t", "rocsparse_sparse_to_dense_alg", "type");
     subst("cusparseStatus_t", "rocsparse_status", "type");
+    subst("pruneInfo", "_rocsparse_mat_info", "type");
+    subst("pruneInfo_t", "rocsparse_mat_info", "type");
     subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal");
     subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal");
+    subst("CUBLAS_COMPUTE_32F", "rocblas_compute_type_f32", "numeric_literal");
+    subst("CUBLAS_DEFAULT_MATH", "rocblas_default_math", "numeric_literal");
     subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal");
     subst("CUBLAS_DIAG_UNIT", "rocblas_diagonal_unit", "numeric_literal");
     subst("CUBLAS_FILL_MODE_FULL", "rocblas_fill_full", "numeric_literal");
@@ -1670,6 +1686,7 @@ sub rocSubstitutions {
     subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal");
     subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal");
     subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal");
+    subst("CUBLAS_TF32_TENSOR_OP_MATH", "rocblas_xf32_xdl_math_op", "numeric_literal");
     subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal");
     subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal");
     subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal");
@@ -4059,6 +4076,7 @@ sub simpleSubstitutions {
     subst("cusparseStatus_t", "hipsparseStatus_t", "type");
     subst("nvrtcProgram", "hiprtcProgram", "type");
     subst("nvrtcResult", "hiprtcResult", "type");
+    subst("pruneInfo", "pruneInfo", "type");
     subst("pruneInfo_t", "pruneInfo_t", "type");
     subst("surfaceReference", "surfaceReference", "type");
     subst("texture", "texture", "type");
@@ -5880,6 +5898,8 @@ sub warnUnsupportedDeviceFunctions {
         "mulhi",
         "mul64hi",
         "mul24",
+        "make_half2",
+        "make_bfloat162",
         "llmin",
         "llmax",
         "int_as_float",
@@ -6061,6 +6081,8 @@ sub warnUnsupportedDeviceFunctions {
         "__heq2_mask",
         "__hcmadd",
         "__halves2bfloat162",
+        "__half2uchar_rz",
+        "__half2char_rz",
         "__hadd_rn",
         "__hadd2_rn",
         "__fsub_rz",
@@ -6088,6 +6110,7 @@ sub warnUnsupportedDeviceFunctions {
         "__float2bfloat16_rd",
         "__float2bfloat162_rn",
         "__float2bfloat16",
+        "__float22bfloat162_rn",
         "__finitel",
         "__finitef",
         "__finite",
@@ -6132,6 +6155,7 @@ sub warnUnsupportedDeviceFunctions {
         "__bfloat162uint_ru",
         "__bfloat162uint_rn",
         "__bfloat162uint_rd",
+        "__bfloat162uchar_rz",
         "__bfloat162short_rz",
         "__bfloat162short_ru",
         "__bfloat162short_rn",
@@ -6145,6 +6169,7 @@ sub warnUnsupportedDeviceFunctions {
         "__bfloat162int_rn",
         "__bfloat162int_rd",
         "__bfloat162float",
+        "__bfloat162char_rz",
         "__bfloat162bfloat162",
         "__bfloat1622float2",
         "_Pow_int"
@@ -6218,7 +6243,6 @@ sub warnUnsupportedFunctions {
     my $line_num = shift;
     my $k = 0;
     foreach $func (
-        "pruneInfo",
         "nvrtcGetSupportedArchs",
         "nvrtcGetOptiXIRSize",
         "nvrtcGetOptiXIR",
@@ -6764,14 +6788,27 @@ sub warnUnsupportedFunctions {
         "cudaOccupancyAvailableDynamicSMemPerBlock",
         "cudaNvSciSyncAttrWait",
         "cudaNvSciSyncAttrSignal",
+        "cudaMemsetParamsV2",
         "cudaMemoryTypeUnregistered",
         "cudaMemcpyToArrayAsync",
+        "cudaMemcpyNodeParams",
         "cudaMemcpyFromArrayAsync",
         "cudaMemcpyArrayToArray",
         "cudaMemcpy3DPeerParms",
         "cudaMemcpy3DPeerAsync",
         "cudaMemcpy3DPeer",
         "cudaMemcpy2DArrayToArray",
+        "cudaMemRangeAttributePreferredLocationType",
+        "cudaMemRangeAttributePreferredLocationId",
+        "cudaMemRangeAttributeLastPrefetchLocationType",
+        "cudaMemRangeAttributeLastPrefetchLocationId",
+        "cudaMemPrefetchAsync_v2",
+        "cudaMemLocationTypeHostNumaCurrent",
+        "cudaMemLocationTypeHostNuma",
+        "cudaMemLocationTypeHost",
+        "cudaMemFreeNodeParams",
+        "cudaMemAllocNodeParamsV2",
+        "cudaMemAdvise_v2",
         "cudaLimitPersistingL2CacheSize",
         "cudaLimitMaxL2FetchGranularity",
         "cudaLimitDevRuntimeSyncDepth",
@@ -6801,6 +6838,7 @@ sub warnUnsupportedFunctions {
         "cudaLaunchAttribute",
         "cudaKeyValuePair",
         "cudaKernel_t",
+        "cudaKernelNodeParamsV2",
         "cudaKernelNodeAttributePriority",
         "cudaKernelNodeAttributeMemSyncDomainMap",
         "cudaKernelNodeAttributeMemSyncDomain",
@@ -6808,6 +6846,7 @@ sub warnUnsupportedFunctions {
         "cudaKernelNodeAttributeClusterDimension",
         "cudaInitDeviceFlagsAreValid",
         "cudaInitDevice",
+        "cudaHostNodeParamsV2",
         "cudaGraphicsVDPAURegisterVideoSurface",
         "cudaGraphicsVDPAURegisterOutputSurface",
         "cudaGraphicsResourceSetMapFlags",
@@ -6828,6 +6867,8 @@ sub warnUnsupportedFunctions {
         "cudaGraphicsCubeFaceNegativeY",
         "cudaGraphicsCubeFaceNegativeX",
         "cudaGraphicsCubeFace",
+        "cudaGraphNodeSetParams",
+        "cudaGraphNodeParams",
         "cudaGraphInstantiateWithParams",
         "cudaGraphInstantiateSuccess",
         "cudaGraphInstantiateResult",
@@ -6844,9 +6885,11 @@ sub warnUnsupportedFunctions {
         "cudaGraphExecUpdateResultInfo_st",
         "cudaGraphExecUpdateResultInfo",
         "cudaGraphExecUpdateErrorAttributesChanged",
+        "cudaGraphExecNodeSetParams",
         "cudaGraphExecGetFlags",
         "cudaGraphExecExternalSemaphoresWaitNodeSetParams",
         "cudaGraphExecExternalSemaphoresSignalNodeSetParams",
+        "cudaGraphAddNode",
         "cudaGraphAddExternalSemaphoresWaitNode",
         "cudaGraphAddExternalSemaphoresSignalNode",
         "cudaGetTextureObjectTextureDesc_v2",
@@ -6891,8 +6934,10 @@ sub warnUnsupportedFunctions {
         "cudaFlushGPUDirectRDMAWritesOptionMemOps",
         "cudaFlushGPUDirectRDMAWritesOptionHost",
         "cudaExternalSemaphoreWaitSkipNvSciBufMemSync",
+        "cudaExternalSemaphoreWaitNodeParamsV2",
         "cudaExternalSemaphoreWaitNodeParams",
         "cudaExternalSemaphoreSignalSkipNvSciBufMemSync",
+        "cudaExternalSemaphoreSignalNodeParamsV2",
         "cudaExternalSemaphoreSignalNodeParams",
         "cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32",
         "cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd",
@@ -6903,9 +6948,11 @@ sub warnUnsupportedFunctions {
         "cudaExternalMemoryMipmappedArrayDesc",
         "cudaExternalMemoryHandleTypeNvSciBuf",
         "cudaExternalMemoryGetMappedMipmappedArray",
+        "cudaEventWaitNodeParams",
         "cudaEventWaitExternal",
         "cudaEventWaitDefault",
         "cudaEventRecordWithFlags",
+        "cudaEventRecordNodeParams",
         "cudaEventRecordExternal",
         "cudaEventRecordDefault",
         "cudaEventCreateFromEGLSync",
@@ -7069,6 +7116,9 @@ sub warnUnsupportedFunctions {
         "cudaDriverEntryPointQueryResult",
         "cudaDeviceSyncMemops",
         "cudaDevicePropDontCare",
+        "cudaDeviceNumaConfigNumaNode",
+        "cudaDeviceNumaConfigNone",
+        "cudaDeviceNumaConfig",
         "cudaDeviceMask",
         "cudaDeviceGetTexture1DLinearMaxWidth",
         "cudaDeviceGetNvSciSyncAttributes",
@@ -7085,6 +7135,8 @@ sub warnUnsupportedFunctions {
         "cudaDevAttrReserved124",
         "cudaDevAttrReserved123",
         "cudaDevAttrReserved122",
+        "cudaDevAttrNumaId",
+        "cudaDevAttrNumaConfig",
         "cudaDevAttrMemoryPoolSupportedHandleTypes",
         "cudaDevAttrMemSyncDomainCount",
         "cudaDevAttrMaxTimelineSemaphoreInteropSupported",
@@ -7100,6 +7152,7 @@ sub warnUnsupportedFunctions {
         "cudaDevAttrIpcEventSupport",
         "cudaDevAttrHostRegisterSupported",
         "cudaDevAttrHostRegisterReadOnlySupported",
+        "cudaDevAttrHostNumaId",
         "cudaDevAttrGPUDirectRDMAWritesOrdering",
         "cudaDevAttrGPUDirectRDMASupported",
         "cudaDevAttrGPUDirectRDMAFlushWritesOptions",
@@ -7170,6 +7223,7 @@ sub warnUnsupportedFunctions {
         "cudaClusterSchedulingPolicyLoadBalancing",
         "cudaClusterSchedulingPolicyDefault",
         "cudaClusterSchedulingPolicy",
+        "cudaChildGraphNodeParams",
         "cudaChannelFormatKindUnsignedNormalized8X4",
         "cudaChannelFormatKindUnsignedNormalized8X2",
         "cudaChannelFormatKindUnsignedNormalized8X1",
@@ -7278,7 +7332,9 @@ sub warnUnsupportedFunctions {
         "cuMemcpy3DPeerAsync",
         "cuMemcpy3DPeer",
         "cuMemcpy",
+        "cuMemPrefetchAsync_v2",
         "cuMemGetHandleForAddressRange",
+        "cuMemAdvise_v2",
         "cuLibraryUnload",
         "cuLibraryLoadFromFile",
         "cuLibraryLoadData",
@@ -7305,11 +7361,13 @@ sub warnUnsupportedFunctions {
         "cuGraphicsD3D9RegisterResource",
         "cuGraphicsD3D11RegisterResource",
         "cuGraphicsD3D10RegisterResource",
+        "cuGraphNodeSetParams",
         "cuGraphInstantiateWithParams",
         "cuGraphExternalSemaphoresWaitNodeSetParams",
         "cuGraphExternalSemaphoresWaitNodeGetParams",
         "cuGraphExternalSemaphoresSignalNodeSetParams",
         "cuGraphExternalSemaphoresSignalNodeGetParams",
+        "cuGraphExecNodeSetParams",
         "cuGraphExecMemsetNodeSetParams",
         "cuGraphExecMemcpyNodeSetParams",
         "cuGraphExecGetFlags",
@@ -7318,6 +7376,7 @@ sub warnUnsupportedFunctions {
         "cuGraphExecBatchMemOpNodeSetParams",
         "cuGraphBatchMemOpNodeSetParams",
         "cuGraphBatchMemOpNodeGetParams",
+        "cuGraphAddNode",
         "cuGraphAddMemsetNode",
         "cuGraphAddMemcpyNode",
         "cuGraphAddExternalSemaphoresWaitNode",
@@ -7882,6 +7941,8 @@ sub warnUnsupportedFunctions {
         "CU_EGL_COLOR_FORMAT_ARGB",
         "CU_EGL_COLOR_FORMAT_ABGR",
         "CU_EGL_COLOR_FORMAT_A",
+        "CU_DEVICE_NUMA_CONFIG_NUMA_NODE",
+        "CU_DEVICE_NUMA_CONFIG_NONE",
         "CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED",
         "CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS",
         "CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED",
@@ -9524,7 +9585,6 @@ sub warnRocOnlyUnsupportedFunctions {
         "cublasSetSmCountTarget",
         "cublasSetMatrix_64",
         "cublasSetMatrixAsync_64",
-        "cublasSetMathMode",
         "cublasSetLoggerCallback",
         "cublasSetKernelStream",
         "cublasSdot_v2_64",
@@ -9548,7 +9608,6 @@ sub warnRocOnlyUnsupportedFunctions {
         "cublasRotEx_64",
         "cublasNrm2Ex_64",
         "cublasMigrateComputeType",
-        "cublasMath_t",
         "cublasLoggerConfigure",
         "cublasLogCallback",
         "cublasIzamin_v2_64",
@@ -9591,7 +9650,6 @@ sub warnRocOnlyUnsupportedFunctions {
         "cublasGetProperty",
         "cublasGetMatrix_64",
         "cublasGetMatrixAsync_64",
-        "cublasGetMathMode",
         "cublasGetLoggerCallback",
         "cublasGetError",
         "cublasGetCudartVersion",
@@ -9818,7 +9876,6 @@ sub warnRocOnlyUnsupportedFunctions {
         "CUDA_C_4I",
         "CUDA_C_16U",
         "CUDA_C_16I",
-        "CUBLAS_TF32_TENSOR_OP_MATH",
         "CUBLAS_TENSOR_OP_MATH",
         "CUBLAS_STATUS_LICENSE_ERROR",
         "CUBLAS_PEDANTIC_MATH",
@@ -9866,7 +9923,6 @@ sub warnRocOnlyUnsupportedFunctions {
         "CUBLAS_GEMM_ALGO1",
         "CUBLAS_GEMM_ALGO0_TENSOR_OP",
         "CUBLAS_GEMM_ALGO0",
-        "CUBLAS_DEFAULT_MATH",
         "CUBLAS_COMPUTE_64F_PEDANTIC",
         "CUBLAS_COMPUTE_64F",
         "CUBLAS_COMPUTE_32I_PEDANTIC",
@@ -9875,7 +9931,6 @@ sub warnRocOnlyUnsupportedFunctions {
         "CUBLAS_COMPUTE_32F_FAST_TF32",
         "CUBLAS_COMPUTE_32F_FAST_16F",
         "CUBLAS_COMPUTE_32F_FAST_16BF",
-        "CUBLAS_COMPUTE_32F",
         "CUBLAS_COMPUTE_16F_PEDANTIC",
         "CUBLAS_COMPUTE_16F"
     )
diff --git a/docs/.sphinx/requirements.in b/docs/.sphinx/requirements.in
index a4f5c674..e53f3df1 100644
--- a/docs/.sphinx/requirements.in
+++ b/docs/.sphinx/requirements.in
@@ -1 +1 @@
-rocm-docs-core==0.18.4
+rocm-docs-core==0.19.0
diff --git a/docs/.sphinx/requirements.txt b/docs/.sphinx/requirements.txt
index 0a276bca..ee9448ee 100644
--- a/docs/.sphinx/requirements.txt
+++ b/docs/.sphinx/requirements.txt
@@ -92,7 +92,7 @@ requests==2.28.2
     # via
     #   pygithub
     #   sphinx
-rocm-docs-core==0.18.4
+rocm-docs-core==0.19.0
     # via -r requirements.in
 smmap==5.0.0
     # via gitdb
diff --git a/docs/hipify-clang.md b/docs/hipify-clang.md
index eb213a33..4e437667 100644
--- a/docs/hipify-clang.md
+++ b/docs/hipify-clang.md
@@ -23,7 +23,7 @@ After applying all the matchers, the output HIP source is produced.
 
 1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**16.0.6**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-16.0.6).
 
-2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**12.1.1**](https://developer.nvidia.com/cuda-downloads).
+2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**12.2.0**](https://developer.nvidia.com/cuda-downloads).
 
 <table align="center">
   <thead>
@@ -169,12 +169,12 @@ After applying all the matchers, the output HIP source is produced.
                            <a href="https://github.com/llvm/llvm-project/releases/tag/llvmorg-16.0.4">16.0.4</a>,
                            <a href="https://github.com/llvm/llvm-project/releases/tag/llvmorg-16.0.5">16.0.5</a>,<br>
                            <a href="https://github.com/llvm/llvm-project/releases/tag/llvmorg-16.0.6"><b>16.0.6</b></a></td>
-      <td bgcolor="eefaeb"><a href="https://developer.nvidia.com/cuda-downloads"><b>12.1.1</b></a></td>
+      <td bgcolor="eefaeb"><a href="https://developer.nvidia.com/cuda-downloads"><b>12.2.0</b></a></td>
       <td colspan=2 bgcolor="eefaeb"><font color="green"><b>LATEST STABLE CONFIG</b></font></td>
     </tr>
     <tr align="center">
       <td><a href="https://github.com/llvm/llvm-project">17.0.0git</a></td>
-      <td><a href="https://developer.nvidia.com/cuda-downloads">12.1.1</a></td>
+      <td><a href="https://developer.nvidia.com/cuda-downloads">12.2.0</a></td>
       <td>+</td>
       <td>+</td>
     </tr>
@@ -199,14 +199,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be
 For example:
 
 ```shell
-./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.1 -I /usr/local/cuda-12.1/samples/common/inc
+./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.2 -I /usr/local/cuda-12.2/samples/common/inc
 ```
 
 `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you
 were compiling the input file. For example:
 
 ```bash
-./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.1 -- -std=c++17
+./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.2 -- -std=c++17
 ```
 
 The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful.
@@ -327,9 +327,9 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro
 
         - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/include`
 
-        - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1"`
+        - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2"`
 
-          `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.1"`
+          `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.2"`
 
 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed.
 
@@ -389,7 +389,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5
 
 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5
 
-Ubuntu 20-21: LLVM 9.0.0 - 16.0.6, CUDA 8.0 - 12.1.1, cuDNN 5.1.10 - 8.9.2
+Ubuntu 20-21: LLVM 9.0.0 - 16.0.6, CUDA 8.0 - 12.2.0, cuDNN 5.1.10 - 8.9.2
 
 Minimum build system requirements for the above configurations:
 
@@ -443,7 +443,7 @@ cmake
 -- Performing Test CMAKE_HAVE_LIBC_PTHREAD
 -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
 -- Found Threads: TRUE
--- Found CUDA: /usr/local/cuda (found version "12.1")
+-- Found CUDA: /usr/local/cuda (found version "12.2")
 -- Configuring done
 -- Generating done
 -- Build files have been written to: /usr/hipify/build
@@ -457,7 +457,7 @@ make test-hipify
 ```shell
 Running HIPify regression tests
 ========================================
-CUDA 12.1 - will be used for testing
+CUDA 12.2 - will be used for testing
 LLVM 16.0.6 - will be used for testing
 x86_64 - Platform architecture
 Linux 5.13.0-21-generic - Platform OS
@@ -576,8 +576,8 @@ Testing Time: 7.90s
 | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5  - 8.3.2 | 2017.15.9.43, 2019.16.11.9               | 3.22.2          | 3.10.2        |
 | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5  - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0          | 3.10.6        |
 | 15.0.0 - 15.0.7 | 7.0 - 11.8.0 | 8.0.5  - 8.8.1 | 2017.15.9.53, 2019.16.11.25, 2022.17.5.2 | 3.26.0          | 3.11.2        |
-| 16.0.0 - 16.0.6 | 7.0 - 12.1.1 | 8.0.5  - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4          | 3.11.4        |
-| 17.0.0git       | 7.0 - 12.1.1 | 8.0.5  - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4          | 3.11.4        |
+| 16.0.0 - 16.0.6 | 7.0 - 12.2.0 | 8.0.5  - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4          | 3.11.4        |
+| 17.0.0git       | 7.0 - 12.2.0 | 8.0.5  - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4          | 3.11.4        |
 
 *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:*
 
@@ -590,9 +590,9 @@ cmake
  -DCMAKE_BUILD_TYPE=Release \
  -DCMAKE_INSTALL_PREFIX=../dist \
  -DCMAKE_PREFIX_PATH=d:/LLVM/16.0.6/dist \
- -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1" \
- -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.1" \
- -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-12.1-windows-x64-v8.9.2 \
+ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2" \
+ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.2" \
+ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-12.2-windows-x64-v8.9.2 \
  -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \
  -DLLVM_EXTERNAL_LIT=d:/LLVM/16.0.6/build/Release/bin/llvm-lit.py \
  ../hipify
@@ -606,7 +606,7 @@ cmake
 -- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.4", minimum required is "3.6")
 -- Found lit: c:/Program Files/Python311/Scripts/lit.exe
 -- Found FileCheck: d:/LLVM/16.0.6/dist/bin/FileCheck.exe
--- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1 (found version "12.1")
+-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2 (found version "12.2")
 -- Configuring done
 -- Generating done
 -- Build files have been written to: d:/hipify/build
diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md
index 2c350b76..b0850b1d 100644
--- a/docs/tables/CUBLAS_API_supported_by_HIP.md
+++ b/docs/tables/CUBLAS_API_supported_by_HIP.md
@@ -182,7 +182,7 @@
 |`cublasSetAtomicsMode`| | | |`hipblasSetAtomicsMode`|3.10.0| | | |
 |`cublasSetKernelStream`| | | | | | | | |
 |`cublasSetLoggerCallback`|9.2| | | | | | | |
-|`cublasSetMathMode`| | | | | | | | |
+|`cublasSetMathMode`|9.0| | | | | | | |
 |`cublasSetMatrix`| | | |`hipblasSetMatrix`|1.8.2| | | |
 |`cublasSetMatrixAsync`| | | |`hipblasSetMatrixAsync`|3.7.0| | | |
 |`cublasSetMatrixAsync_64`|12.0| | | | | | | |
diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md
index 8c232a5c..282b2eca 100644
--- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md
+++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md
@@ -8,7 +8,7 @@
 |`CUBLAS_ATOMICS_NOT_ALLOWED`| | | |`HIPBLAS_ATOMICS_NOT_ALLOWED`|3.10.0| | | |`rocblas_atomics_not_allowed`|3.8.0| | | |
 |`CUBLAS_COMPUTE_16F`|11.0| | | | | | | | | | | | |
 |`CUBLAS_COMPUTE_16F_PEDANTIC`|11.0| | | | | | | | | | | | |
-|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | | | | | | |
+|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | |`rocblas_compute_type_f32`|5.7.0| | | |
 |`CUBLAS_COMPUTE_32F_FAST_16BF`|11.0| | | | | | | | | | | | |
 |`CUBLAS_COMPUTE_32F_FAST_16F`|11.0| | | | | | | | | | | | |
 |`CUBLAS_COMPUTE_32F_FAST_TF32`|11.0| | | | | | | | | | | | |
@@ -17,7 +17,7 @@
 |`CUBLAS_COMPUTE_32I_PEDANTIC`|11.0| | | | | | | | | | | | |
 |`CUBLAS_COMPUTE_64F`|11.0| | | | | | | | | | | | |
 |`CUBLAS_COMPUTE_64F_PEDANTIC`|11.0| | | | | | | | | | | | |
-|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | | | | | | |
+|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | |`rocblas_default_math`|5.7.0| | | |
 |`CUBLAS_DIAG_NON_UNIT`| | | |`HIPBLAS_DIAG_NON_UNIT`|1.8.2| | | |`rocblas_diagonal_non_unit`|1.5.0| | | |
 |`CUBLAS_DIAG_UNIT`| | | |`HIPBLAS_DIAG_UNIT`|1.8.2| | | |`rocblas_diagonal_unit`|1.5.0| | | |
 |`CUBLAS_FILL_MODE_FULL`|10.1| | |`HIPBLAS_FILL_MODE_FULL`|1.8.2| | | |`rocblas_fill_full`|1.5.0| | | |
@@ -89,16 +89,16 @@
 |`CUBLAS_STATUS_NOT_SUPPORTED`| | | |`HIPBLAS_STATUS_NOT_SUPPORTED`|1.8.2| | | |`rocblas_status_perf_degraded`|3.5.0| | | |
 |`CUBLAS_STATUS_SUCCESS`| | | |`HIPBLAS_STATUS_SUCCESS`|1.8.2| | | |`rocblas_status_success`|1.5.0| | | |
 |`CUBLAS_TENSOR_OP_MATH`|9.0|11.0| | | | | | | | | | | |
-|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | | | | | | |
+|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | |`rocblas_xf32_xdl_math_op`|5.7.0| | | |
 |`cublasAtomicsMode_t`| | | |`hipblasAtomicsMode_t`|3.10.0| | | |`rocblas_atomics_mode`|3.8.0| | | |
-|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | | | | | | |
+|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | |`rocblas_computetype`|5.7.0| | | |
 |`cublasContext`| | | | | | | | |`_rocblas_handle`|1.5.0| | | |
 |`cublasDataType_t`|7.5| | |`hipblasDatatype_t`|1.8.2| | | |`rocblas_datatype`|1.8.2| | | |
 |`cublasDiagType_t`| | | |`hipblasDiagType_t`|1.8.2| | | |`rocblas_diagonal`|1.5.0| | | |
 |`cublasFillMode_t`| | | |`hipblasFillMode_t`|1.8.2| | | |`rocblas_fill`|1.5.0| | | |
 |`cublasGemmAlgo_t`|8.0| | |`hipblasGemmAlgo_t`|1.8.2| | | |`rocblas_gemm_algo`|1.8.2| | | |
 |`cublasHandle_t`| | | |`hipblasHandle_t`|3.0.0| | | |`rocblas_handle`|1.5.0| | | |
-|`cublasMath_t`|9.0| | | | | | | | | | | | |
+|`cublasMath_t`|9.0| | | | | | | |`rocblas_math_mode`|5.7.0| | | |
 |`cublasOperation_t`| | | |`hipblasOperation_t`|1.8.2| | | |`rocblas_operation`|1.5.0| | | |
 |`cublasPointerMode_t`| | | |`hipblasPointerMode_t`|1.8.2| | | |`rocblas_pointer_mode`|1.6.0| | | |
 |`cublasSideMode_t`| | | |`hipblasSideMode_t`|1.8.2| | | |`rocblas_side`|1.5.0| | | |
@@ -156,7 +156,7 @@
 |`cublasGetCudartVersion`|10.1| | | | | | | | | | | | |
 |`cublasGetError`| | | | | | | | | | | | | |
 |`cublasGetLoggerCallback`|9.2| | | | | | | | | | | | |
-|`cublasGetMathMode`|9.0| | | | | | | | | | | | |
+|`cublasGetMathMode`|9.0| | | | | | | |`rocblas_get_math_mode`|5.7.0| | | |
 |`cublasGetMatrix`| | | |`hipblasGetMatrix`|1.8.2| | | |`rocblas_get_matrix`|1.6.0| | | |
 |`cublasGetMatrixAsync`| | | |`hipblasGetMatrixAsync`|3.7.0| | | |`rocblas_get_matrix_async`|3.5.0| | | |
 |`cublasGetMatrixAsync_64`|12.0| | | | | | | | | | | | |
@@ -182,7 +182,7 @@
 |`cublasSetAtomicsMode`| | | |`hipblasSetAtomicsMode`|3.10.0| | | |`rocblas_set_atomics_mode`|3.8.0| | | |
 |`cublasSetKernelStream`| | | | | | | | | | | | | |
 |`cublasSetLoggerCallback`|9.2| | | | | | | | | | | | |
-|`cublasSetMathMode`| | | | | | | | | | | | | |
+|`cublasSetMathMode`|9.0| | | | | | | |`rocblas_set_math_mode`|5.7.0| | | |
 |`cublasSetMatrix`| | | |`hipblasSetMatrix`|1.8.2| | | |`rocblas_set_matrix`|1.6.0| | | |
 |`cublasSetMatrixAsync`| | | |`hipblasSetMatrixAsync`|3.7.0| | | |`rocblas_set_matrix_async`|3.5.0| | | |
 |`cublasSetMatrixAsync_64`|12.0| | | | | | | | | | | | |
diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md
index c1ab92b9..8d89c7ca 100644
--- a/docs/tables/CUBLAS_API_supported_by_ROC.md
+++ b/docs/tables/CUBLAS_API_supported_by_ROC.md
@@ -8,7 +8,7 @@
 |`CUBLAS_ATOMICS_NOT_ALLOWED`| | | |`rocblas_atomics_not_allowed`|3.8.0| | | |
 |`CUBLAS_COMPUTE_16F`|11.0| | | | | | | |
 |`CUBLAS_COMPUTE_16F_PEDANTIC`|11.0| | | | | | | |
-|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | |
+|`CUBLAS_COMPUTE_32F`|11.0| | |`rocblas_compute_type_f32`|5.7.0| | | |
 |`CUBLAS_COMPUTE_32F_FAST_16BF`|11.0| | | | | | | |
 |`CUBLAS_COMPUTE_32F_FAST_16F`|11.0| | | | | | | |
 |`CUBLAS_COMPUTE_32F_FAST_TF32`|11.0| | | | | | | |
@@ -17,7 +17,7 @@
 |`CUBLAS_COMPUTE_32I_PEDANTIC`|11.0| | | | | | | |
 |`CUBLAS_COMPUTE_64F`|11.0| | | | | | | |
 |`CUBLAS_COMPUTE_64F_PEDANTIC`|11.0| | | | | | | |
-|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | |
+|`CUBLAS_DEFAULT_MATH`|9.0| | |`rocblas_default_math`|5.7.0| | | |
 |`CUBLAS_DIAG_NON_UNIT`| | | |`rocblas_diagonal_non_unit`|1.5.0| | | |
 |`CUBLAS_DIAG_UNIT`| | | |`rocblas_diagonal_unit`|1.5.0| | | |
 |`CUBLAS_FILL_MODE_FULL`|10.1| | |`rocblas_fill_full`|1.5.0| | | |
@@ -89,16 +89,16 @@
 |`CUBLAS_STATUS_NOT_SUPPORTED`| | | |`rocblas_status_perf_degraded`|3.5.0| | | |
 |`CUBLAS_STATUS_SUCCESS`| | | |`rocblas_status_success`|1.5.0| | | |
 |`CUBLAS_TENSOR_OP_MATH`|9.0|11.0| | | | | | |
-|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | |
+|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | |`rocblas_xf32_xdl_math_op`|5.7.0| | | |
 |`cublasAtomicsMode_t`| | | |`rocblas_atomics_mode`|3.8.0| | | |
-|`cublasComputeType_t`|11.0| | | | | | | |
+|`cublasComputeType_t`|11.0| | |`rocblas_computetype`|5.7.0| | | |
 |`cublasContext`| | | |`_rocblas_handle`|1.5.0| | | |
 |`cublasDataType_t`|7.5| | |`rocblas_datatype`|1.8.2| | | |
 |`cublasDiagType_t`| | | |`rocblas_diagonal`|1.5.0| | | |
 |`cublasFillMode_t`| | | |`rocblas_fill`|1.5.0| | | |
 |`cublasGemmAlgo_t`|8.0| | |`rocblas_gemm_algo`|1.8.2| | | |
 |`cublasHandle_t`| | | |`rocblas_handle`|1.5.0| | | |
-|`cublasMath_t`|9.0| | | | | | | |
+|`cublasMath_t`|9.0| | |`rocblas_math_mode`|5.7.0| | | |
 |`cublasOperation_t`| | | |`rocblas_operation`|1.5.0| | | |
 |`cublasPointerMode_t`| | | |`rocblas_pointer_mode`|1.6.0| | | |
 |`cublasSideMode_t`| | | |`rocblas_side`|1.5.0| | | |
@@ -156,7 +156,7 @@
 |`cublasGetCudartVersion`|10.1| | | | | | | |
 |`cublasGetError`| | | | | | | | |
 |`cublasGetLoggerCallback`|9.2| | | | | | | |
-|`cublasGetMathMode`|9.0| | | | | | | |
+|`cublasGetMathMode`|9.0| | |`rocblas_get_math_mode`|5.7.0| | | |
 |`cublasGetMatrix`| | | |`rocblas_get_matrix`|1.6.0| | | |
 |`cublasGetMatrixAsync`| | | |`rocblas_get_matrix_async`|3.5.0| | | |
 |`cublasGetMatrixAsync_64`|12.0| | | | | | | |
@@ -182,7 +182,7 @@
 |`cublasSetAtomicsMode`| | | |`rocblas_set_atomics_mode`|3.8.0| | | |
 |`cublasSetKernelStream`| | | | | | | | |
 |`cublasSetLoggerCallback`|9.2| | | | | | | |
-|`cublasSetMathMode`| | | | | | | | |
+|`cublasSetMathMode`|9.0| | |`rocblas_set_math_mode`|5.7.0| | | |
 |`cublasSetMatrix`| | | |`rocblas_set_matrix`|1.6.0| | | |
 |`cublasSetMatrixAsync`| | | |`rocblas_set_matrix_async`|3.5.0| | | |
 |`cublasSetMatrixAsync_64`|12.0| | | | | | | |
diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md
index a44ed3a9..a38bc6b4 100644
--- a/docs/tables/CUDA_Device_API_supported_by_HIP.md
+++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md
@@ -12,6 +12,7 @@
 |`__ballot`| | | |`__ballot`|1.6.0| | | |
 |`__bfloat1622float2`|11.0| | | | | | | |
 |`__bfloat162bfloat162`|11.0| | | | | | | |
+|`__bfloat162char_rz`|12.2| | | | | | | |
 |`__bfloat162float`|11.0| | | | | | | |
 |`__bfloat162int_rd`|11.0| | | | | | | |
 |`__bfloat162int_rn`|11.0| | | | | | | |
@@ -25,6 +26,7 @@
 |`__bfloat162short_rn`|11.0| | | | | | | |
 |`__bfloat162short_ru`|11.0| | | | | | | |
 |`__bfloat162short_rz`|11.0| | | | | | | |
+|`__bfloat162uchar_rz`|12.2| | | | | | | |
 |`__bfloat162uint_rd`|11.0| | | | | | | |
 |`__bfloat162uint_rn`|11.0| | | | | | | |
 |`__bfloat162uint_ru`|11.0| | | | | | | |
@@ -111,6 +113,7 @@
 |`__finite`| | | | | | | | |
 |`__finitef`| | | | | | | | |
 |`__finitel`| | | | | | | | |
+|`__float22bfloat162_rn`|11.0| | | | | | | |
 |`__float22half2_rn`| | | |`__float22half2_rn`|1.6.0| | | |
 |`__float2bfloat16`|11.0| | | | | | | |
 |`__float2bfloat162_rn`|11.0| | | | | | | |
@@ -183,6 +186,7 @@
 |`__hadd_rn`|11.6| | | | | | | |
 |`__hadd_sat`| | | |`__hadd_sat`|1.6.0| | | |
 |`__half22float2`| | | |`__half22float2`|1.6.0| | | |
+|`__half2char_rz`|12.2| | | | | | | |
 |`__half2float`| | | |`__half2float`|1.6.0| | | |
 |`__half2half2`| | | |`__half2half2`|1.9.0| | | |
 |`__half2int_rd`| | | |`__half2int_rd`|1.6.0| | | |
@@ -197,6 +201,7 @@
 |`__half2short_rn`| | | |`__half2short_rn`|1.6.0| | | |
 |`__half2short_ru`| | | |`__half2short_ru`|1.6.0| | | |
 |`__half2short_rz`| | | |`__half2short_rz`|1.6.0| | | |
+|`__half2uchar_rz`|12.2| | | | | | | |
 |`__half2uint_rd`| | | |`__half2uint_rd`|1.6.0| | | |
 |`__half2uint_rn`| | | |`__half2uint_rn`|1.6.0| | | |
 |`__half2uint_ru`| | | |`__half2uint_ru`|1.6.0| | | |
@@ -707,6 +712,8 @@
 |`lrintf`| | | |`lrintf`|1.6.0| | | |
 |`lround`| | | |`lround`|1.6.0| | | |
 |`lroundf`| | | |`lroundf`|1.6.0| | | |
+|`make_bfloat162`|12.2| | | | | | | |
+|`make_half2`|12.2| | | | | | | |
 |`max`| | | |`max`|1.6.0| | | |
 |`min`| | | |`min`|1.6.0| | | |
 |`modf`| | | |`modf`|1.9.0| | | |
diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md
index b43fa72e..dca42564 100644
--- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md
+++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md
@@ -479,6 +479,8 @@
 |`CU_DEVICE_ATTRIBUTE_WARP_SIZE`| | | |`hipDeviceAttributeWarpSize`|1.6.0| | | |
 |`CU_DEVICE_CPU`|8.0| | |`hipCpuDeviceId`|3.7.0| | | |
 |`CU_DEVICE_INVALID`|8.0| | |`hipInvalidDeviceId`|3.7.0| | | |
+|`CU_DEVICE_NUMA_CONFIG_NONE`|12.2| | | | | | | |
+|`CU_DEVICE_NUMA_CONFIG_NUMA_NODE`|12.2| | | | | | | |
 |`CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED`|10.1|10.1| |`hipDevP2PAttrHipArrayAccessSupported`|3.8.0| | | |
 |`CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED`|8.0| | |`hipDevP2PAttrAccessSupported`|3.8.0| | | |
 |`CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED`|9.2|10.0|10.1|`hipDevP2PAttrHipArrayAccessSupported`|3.8.0| | | |
@@ -1588,9 +1590,9 @@
 |`cuMemsetD8`| | | |`hipMemsetD8`|1.6.0| | | |
 |`cuMemsetD8Async`| | | |`hipMemsetD8Async`|3.0.0| | | |
 |`cuMemsetD8_v2`| | | |`hipMemsetD8`|1.6.0| | | |
-|`cuMipmappedArrayCreate`| | | |`hipMipmappedArrayCreate`|3.5.0| | | |
-|`cuMipmappedArrayDestroy`| | | |`hipMipmappedArrayDestroy`|3.5.0| | | |
-|`cuMipmappedArrayGetLevel`| | | |`hipMipmappedArrayGetLevel`|3.5.0| | | |
+|`cuMipmappedArrayCreate`| | | |`hipMipmappedArrayCreate`|3.5.0|5.7.0| | |
+|`cuMipmappedArrayDestroy`| | | |`hipMipmappedArrayDestroy`|3.5.0|5.7.0| | |
+|`cuMipmappedArrayGetLevel`| | | |`hipMipmappedArrayGetLevel`|3.5.0|5.7.0| | |
 |`cuMipmappedArrayGetMemoryRequirements`|11.6| | | | | | | |
 
 ## **14. Virtual Memory Management**
@@ -1647,7 +1649,9 @@
 |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
 |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
 |`cuMemAdvise`|8.0| | |`hipMemAdvise`|3.7.0| | | |
+|`cuMemAdvise_v2`|12.2| | | | | | | |
 |`cuMemPrefetchAsync`|8.0| | |`hipMemPrefetchAsync`|3.7.0| | | |
+|`cuMemPrefetchAsync_v2`|12.2| | | | | | | |
 |`cuMemRangeGetAttribute`|8.0| | |`hipMemRangeGetAttribute`|3.7.0| | | |
 |`cuMemRangeGetAttributes`|8.0| | |`hipMemRangeGetAttributes`|3.7.0| | | |
 |`cuPointerGetAttribute`| | | |`hipPointerGetAttribute`|5.0.0| | | |
@@ -1776,6 +1780,7 @@
 |`cuGraphAddMemFreeNode`|11.4| | |`hipGraphAddMemFreeNode`|5.5.0| | | |
 |`cuGraphAddMemcpyNode`|10.0| | | | | | | |
 |`cuGraphAddMemsetNode`|10.0| | | | | | | |
+|`cuGraphAddNode`|12.2| | | | | | | |
 |`cuGraphBatchMemOpNodeGetParams`|11.7| | | | | | | |
 |`cuGraphBatchMemOpNodeSetParams`|11.7| | | | | | | |
 |`cuGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | |
@@ -1800,6 +1805,7 @@
 |`cuGraphExecKernelNodeSetParams`|10.1| | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | |
 |`cuGraphExecMemcpyNodeSetParams`|10.2| | | | | | | |
 |`cuGraphExecMemsetNodeSetParams`|10.2| | | | | | | |
+|`cuGraphExecNodeSetParams`|12.2| | | | | | | |
 |`cuGraphExecUpdate`|10.2| | |`hipGraphExecUpdate`|5.0.0| | | |
 |`cuGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | | | | | |
 |`cuGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | | | | | |
@@ -1832,6 +1838,7 @@
 |`cuGraphNodeGetEnabled`|11.6| | |`hipGraphNodeGetEnabled`|5.5.0| | | |
 |`cuGraphNodeGetType`|10.0| | |`hipGraphNodeGetType`|5.0.0| | | |
 |`cuGraphNodeSetEnabled`|11.6| | |`hipGraphNodeSetEnabled`|5.5.0| | | |
+|`cuGraphNodeSetParams`|12.2| | | | | | | |
 |`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | |
 |`cuGraphRemoveDependencies`|10.0| | |`hipGraphRemoveDependencies`|5.0.0| | | |
 |`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | |
diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md
index eb93726b..4f8a4eb3 100644
--- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md
+++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md
@@ -171,8 +171,10 @@
 |`cudaMallocMipmappedArray`| | | |`hipMallocMipmappedArray`|3.5.0| | | |
 |`cudaMallocPitch`| | | |`hipMallocPitch`|1.6.0| | | |
 |`cudaMemAdvise`|8.0| | |`hipMemAdvise`|3.7.0| | | |
+|`cudaMemAdvise_v2`|12.2| | | | | | | |
 |`cudaMemGetInfo`| | | |`hipMemGetInfo`|1.6.0| | | |
 |`cudaMemPrefetchAsync`|8.0| | |`hipMemPrefetchAsync`|3.7.0| | | |
+|`cudaMemPrefetchAsync_v2`|12.2| | | | | | | |
 |`cudaMemRangeGetAttribute`|8.0| | |`hipMemRangeGetAttribute`|3.7.0| | | |
 |`cudaMemRangeGetAttributes`|8.0| | |`hipMemRangeGetAttributes`|3.7.0| | | |
 |`cudaMemcpy`| | | |`hipMemcpy`|1.5.0| | | |
@@ -424,6 +426,7 @@
 |`cudaGraphAddMemcpyNodeFromSymbol`|11.1| | |`hipGraphAddMemcpyNodeFromSymbol`|5.0.0| | | |
 |`cudaGraphAddMemcpyNodeToSymbol`|11.1| | |`hipGraphAddMemcpyNodeToSymbol`|5.0.0| | | |
 |`cudaGraphAddMemsetNode`|10.0| | |`hipGraphAddMemsetNode`|4.3.0| | | |
+|`cudaGraphAddNode`|12.2| | | | | | | |
 |`cudaGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | |
 |`cudaGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | |
 |`cudaGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | |
@@ -448,6 +451,7 @@
 |`cudaGraphExecMemcpyNodeSetParamsFromSymbol`|11.1| | |`hipGraphExecMemcpyNodeSetParamsFromSymbol`|5.0.0| | | |
 |`cudaGraphExecMemcpyNodeSetParamsToSymbol`|11.1| | |`hipGraphExecMemcpyNodeSetParamsToSymbol`|5.0.0| | | |
 |`cudaGraphExecMemsetNodeSetParams`|11.0| | |`hipGraphExecMemsetNodeSetParams`|5.0.0| | | |
+|`cudaGraphExecNodeSetParams`|12.2| | | | | | | |
 |`cudaGraphExecUpdate`|11.0| | |`hipGraphExecUpdate`|5.0.0| | | |
 |`cudaGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | | | | | |
 |`cudaGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | | | | | |
@@ -482,6 +486,7 @@
 |`cudaGraphNodeGetEnabled`|11.6| | |`hipGraphNodeGetEnabled`|5.5.0| | | |
 |`cudaGraphNodeGetType`|11.0| | |`hipGraphNodeGetType`|5.0.0| | | |
 |`cudaGraphNodeSetEnabled`|11.6| | |`hipGraphNodeSetEnabled`|5.5.0| | | |
+|`cudaGraphNodeSetParams`|12.2| | | | | | | |
 |`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | |
 |`cudaGraphRemoveDependencies`|11.0| | |`hipGraphRemoveDependencies`|5.0.0| | | |
 |`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | |
@@ -595,6 +600,7 @@
 |`cudaChannelFormatKindUnsignedNormalized8X1`|11.5| | | | | | | |
 |`cudaChannelFormatKindUnsignedNormalized8X2`|11.5| | | | | | | |
 |`cudaChannelFormatKindUnsignedNormalized8X4`|11.5| | | | | | | |
+|`cudaChildGraphNodeParams`|12.2| | | | | | | |
 |`cudaClusterSchedulingPolicy`|11.8| | | | | | | |
 |`cudaClusterSchedulingPolicyDefault`|11.8| | | | | | | |
 |`cudaClusterSchedulingPolicyLoadBalancing`|11.8| | | | | | | |
@@ -657,6 +663,7 @@
 |`cudaDevAttrGlobalMemoryBusWidth`| | | |`hipDeviceAttributeMemoryBusWidth`|1.6.0| | | |
 |`cudaDevAttrGpuOverlap`| | | |`hipDeviceAttributeAsyncEngineCount`|4.3.0| | | |
 |`cudaDevAttrHostNativeAtomicSupported`|8.0| | |`hipDeviceAttributeHostNativeAtomicSupported`|4.3.0| | | |
+|`cudaDevAttrHostNumaId`|12.2| | | | | | | |
 |`cudaDevAttrHostRegisterReadOnlySupported`|11.1| | | | | | | |
 |`cudaDevAttrHostRegisterSupported`|9.2| | | | | | | |
 |`cudaDevAttrIntegrated`| | | |`hipDeviceAttributeIntegrated`|1.9.0| | | |
@@ -731,6 +738,8 @@
 |`cudaDevAttrMemoryPoolsSupported`|11.2| | |`hipDeviceAttributeMemoryPoolsSupported`|5.2.0| | | |
 |`cudaDevAttrMultiGpuBoardGroupID`| | | |`hipDeviceAttributeMultiGpuBoardGroupID`|5.0.0| | | |
 |`cudaDevAttrMultiProcessorCount`| | | |`hipDeviceAttributeMultiprocessorCount`|1.6.0| | | |
+|`cudaDevAttrNumaConfig`|12.2| | | | | | | |
+|`cudaDevAttrNumaId`|12.2| | | | | | | |
 |`cudaDevAttrPageableMemoryAccess`|8.0| | |`hipDeviceAttributePageableMemoryAccess`|3.10.0| | | |
 |`cudaDevAttrPageableMemoryAccessUsesHostPageTables`|9.2| | |`hipDeviceAttributePageableMemoryAccessUsesHostPageTables`|3.10.0| | | |
 |`cudaDevAttrPciBusId`| | | |`hipDeviceAttributePciBusId`|1.6.0| | | |
@@ -767,6 +776,9 @@
 |`cudaDeviceLmemResizeToMax`| | | |`hipDeviceLmemResizeToMax`|1.6.0| | | |
 |`cudaDeviceMapHost`| | | |`hipDeviceMapHost`|1.6.0| | | |
 |`cudaDeviceMask`| | | | | | | | |
+|`cudaDeviceNumaConfig`|12.2| | | | | | | |
+|`cudaDeviceNumaConfigNone`|12.2| | | | | | | |
+|`cudaDeviceNumaConfigNumaNode`|12.2| | | | | | | |
 |`cudaDeviceP2PAttr`|8.0| | |`hipDeviceP2PAttr`|3.8.0| | | |
 |`cudaDeviceProp`| | | |`hipDeviceProp_t`|1.6.0| | | |
 |`cudaDevicePropDontCare`| | |12.0| | | | | |
@@ -1002,8 +1014,10 @@
 |`cudaEventInterprocess`| | | |`hipEventInterprocess`|1.6.0| | | |
 |`cudaEventRecordDefault`|11.1| | | | | | | |
 |`cudaEventRecordExternal`|11.1| | | | | | | |
+|`cudaEventRecordNodeParams`|12.2| | | | | | | |
 |`cudaEventWaitDefault`|11.1| | | | | | | |
 |`cudaEventWaitExternal`| | | | | | | | |
+|`cudaEventWaitNodeParams`|12.2| | | | | | | |
 |`cudaEvent_t`| | | |`hipEvent_t`|1.6.0| | | |
 |`cudaExtent`| | | |`hipExtent`|1.7.0| | | |
 |`cudaExternalMemoryBufferDesc`|10.0| | |`hipExternalMemoryBufferDesc`|4.3.0| | | |
@@ -1033,10 +1047,12 @@
 |`cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`|11.2| | | | | | | |
 |`cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32`|11.2| | | | | | | |
 |`cudaExternalSemaphoreSignalNodeParams`|11.2| | | | | | | |
+|`cudaExternalSemaphoreSignalNodeParamsV2`|12.2| | | | | | | |
 |`cudaExternalSemaphoreSignalParams`|10.0| | |`hipExternalSemaphoreSignalParams`|4.4.0| | | |
 |`cudaExternalSemaphoreSignalParams_v1`|11.2| | |`hipExternalSemaphoreSignalParams`|4.4.0| | | |
 |`cudaExternalSemaphoreSignalSkipNvSciBufMemSync`|10.2| | | | | | | |
 |`cudaExternalSemaphoreWaitNodeParams`|11.2| | | | | | | |
+|`cudaExternalSemaphoreWaitNodeParamsV2`|12.2| | | | | | | |
 |`cudaExternalSemaphoreWaitParams`|10.0| | |`hipExternalSemaphoreWaitParams`|4.4.0| | | |
 |`cudaExternalSemaphoreWaitParams_v1`|11.2| | |`hipExternalSemaphoreWaitParams`|4.4.0| | | |
 |`cudaExternalSemaphoreWaitSkipNvSciBufMemSync`|10.2| | | | | | | |
@@ -1125,6 +1141,7 @@
 |`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | |
 |`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | |
 |`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | |
+|`cudaGraphNodeParams`|12.2| | | | | | | |
 |`cudaGraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | |
 |`cudaGraphNodeTypeCount`|10.0| | |`hipGraphNodeTypeCount`|4.3.0| | | |
 |`cudaGraphNodeTypeEmpty`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | |
@@ -1167,6 +1184,7 @@
 |`cudaHostAllocWriteCombined`| | | |`hipHostMallocWriteCombined`|1.6.0| | | |
 |`cudaHostFn_t`|10.0| | |`hipHostFn_t`|4.3.0| | | |
 |`cudaHostNodeParams`|10.0| | |`hipHostNodeParams`|4.3.0| | | |
+|`cudaHostNodeParamsV2`|12.2| | | | | | | |
 |`cudaHostRegisterDefault`| | | |`hipHostRegisterDefault`|1.6.0| | | |
 |`cudaHostRegisterIoMemory`|7.5| | |`hipHostRegisterIoMemory`|1.6.0| | | |
 |`cudaHostRegisterMapped`| | | |`hipHostRegisterMapped`|1.6.0| | | |
@@ -1189,6 +1207,7 @@
 |`cudaKernelNodeAttributeMemSyncDomainMap`|12.0| | | | | | | |
 |`cudaKernelNodeAttributePriority`|11.7| | | | | | | |
 |`cudaKernelNodeParams`|10.0| | |`hipKernelNodeParams`|4.3.0| | | |
+|`cudaKernelNodeParamsV2`|12.2| | | | | | | |
 |`cudaKernel_t`|12.1| | | | | | | |
 |`cudaKeyValuePair`| | |12.0| | | | | |
 |`cudaLaunchAttribute`|11.8| | | | | | | |
@@ -1234,6 +1253,7 @@
 |`cudaMemAdviseUnsetPreferredLocation`|8.0| | |`hipMemAdviseUnsetPreferredLocation`|3.7.0| | | |
 |`cudaMemAdviseUnsetReadMostly`|8.0| | |`hipMemAdviseUnsetReadMostly`|3.7.0| | | |
 |`cudaMemAllocNodeParams`|11.4| | |`hipMemAllocNodeParams`|5.5.0| | | |
+|`cudaMemAllocNodeParamsV2`|12.2| | | | | | | |
 |`cudaMemAllocationHandleType`|11.2| | |`hipMemAllocationHandleType`|5.2.0| | | |
 |`cudaMemAllocationType`|11.2| | |`hipMemAllocationType`|5.2.0| | | |
 |`cudaMemAllocationTypeInvalid`|11.2| | |`hipMemAllocationTypeInvalid`|5.2.0| | | |
@@ -1242,6 +1262,7 @@
 |`cudaMemAttachGlobal`| | | |`hipMemAttachGlobal`|2.5.0| | | |
 |`cudaMemAttachHost`| | | |`hipMemAttachHost`|2.5.0| | | |
 |`cudaMemAttachSingle`| | | |`hipMemAttachSingle`|3.7.0| | | |
+|`cudaMemFreeNodeParams`|12.2| | | | | | | |
 |`cudaMemHandleTypeNone`|11.2| | |`hipMemHandleTypeNone`|5.2.0| | | |
 |`cudaMemHandleTypePosixFileDescriptor`|11.2| | |`hipMemHandleTypePosixFileDescriptor`|5.2.0| | | |
 |`cudaMemHandleTypeWin32`|11.2| | |`hipMemHandleTypeWin32`|5.2.0| | | |
@@ -1249,6 +1270,9 @@
 |`cudaMemLocation`|11.2| | |`hipMemLocation`|5.2.0| | | |
 |`cudaMemLocationType`|11.2| | |`hipMemLocationType`|5.2.0| | | |
 |`cudaMemLocationTypeDevice`|11.2| | |`hipMemLocationTypeDevice`|5.2.0| | | |
+|`cudaMemLocationTypeHost`|12.2| | | | | | | |
+|`cudaMemLocationTypeHostNuma`|12.2| | | | | | | |
+|`cudaMemLocationTypeHostNumaCurrent`|12.2| | | | | | | |
 |`cudaMemLocationTypeInvalid`|11.2| | |`hipMemLocationTypeInvalid`|5.2.0| | | |
 |`cudaMemPoolAttr`|11.2| | |`hipMemPoolAttr`|5.2.0| | | |
 |`cudaMemPoolAttrReleaseThreshold`|11.2| | |`hipMemPoolAttrReleaseThreshold`|5.2.0| | | |
@@ -1265,7 +1289,11 @@
 |`cudaMemRangeAttribute`|8.0| | |`hipMemRangeAttribute`|3.7.0| | | |
 |`cudaMemRangeAttributeAccessedBy`|8.0| | |`hipMemRangeAttributeAccessedBy`|3.7.0| | | |
 |`cudaMemRangeAttributeLastPrefetchLocation`|8.0| | |`hipMemRangeAttributeLastPrefetchLocation`|3.7.0| | | |
+|`cudaMemRangeAttributeLastPrefetchLocationId`|12.2| | | | | | | |
+|`cudaMemRangeAttributeLastPrefetchLocationType`|12.2| | | | | | | |
 |`cudaMemRangeAttributePreferredLocation`|8.0| | |`hipMemRangeAttributePreferredLocation`|3.7.0| | | |
+|`cudaMemRangeAttributePreferredLocationId`|12.2| | | | | | | |
+|`cudaMemRangeAttributePreferredLocationType`|12.2| | | | | | | |
 |`cudaMemRangeAttributeReadMostly`|8.0| | |`hipMemRangeAttributeReadMostly`|3.7.0| | | |
 |`cudaMemcpy3DParms`| | | |`hipMemcpy3DParms`|1.7.0| | | |
 |`cudaMemcpy3DPeerParms`| | | | | | | | |
@@ -1275,6 +1303,7 @@
 |`cudaMemcpyHostToDevice`| | | |`hipMemcpyHostToDevice`|1.5.0| | | |
 |`cudaMemcpyHostToHost`| | | |`hipMemcpyHostToHost`|1.5.0| | | |
 |`cudaMemcpyKind`| | | |`hipMemcpyKind`|1.5.0| | | |
+|`cudaMemcpyNodeParams`|12.2| | | | | | | |
 |`cudaMemoryAdvise`|8.0| | |`hipMemoryAdvise`|3.7.0| | | |
 |`cudaMemoryType`| | | |`hipMemoryType`|1.6.0| | | |
 |`cudaMemoryTypeDevice`| | | |`hipMemoryTypeDevice`|1.6.0| | | |
@@ -1282,6 +1311,7 @@
 |`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | | |
 |`cudaMemoryTypeUnregistered`| | | | | | | | |
 |`cudaMemsetParams`|10.0| | |`hipMemsetParams`|4.3.0| | | |
+|`cudaMemsetParamsV2`|12.2| | | | | | | |
 |`cudaMipmappedArray`| | | |`hipMipmappedArray`|1.7.0| | | |
 |`cudaMipmappedArray_const_t`| | | |`hipMipmappedArray_const_t`|1.6.0| | | |
 |`cudaMipmappedArray_t`| | | |`hipMipmappedArray_t`|1.7.0| | | |
@@ -1418,7 +1448,7 @@
 |`cudaBindTexture`| |11.0|12.0|`hipBindTexture`|1.6.0|3.8.0| | |
 |`cudaBindTexture2D`| |11.0|12.0|`hipBindTexture2D`|1.7.0|3.8.0| | |
 |`cudaBindTextureToArray`| |11.0|12.0|`hipBindTextureToArray`|1.6.0|3.8.0| | |
-|`cudaBindTextureToMipmappedArray`| |11.0|12.0|`hipBindTextureToMipmappedArray`|1.7.0| | | |
+|`cudaBindTextureToMipmappedArray`| |11.0|12.0|`hipBindTextureToMipmappedArray`|1.7.0|5.7.0| | |
 |`cudaGetTextureAlignmentOffset`| |11.0|12.0|`hipGetTextureAlignmentOffset`|1.9.0|3.8.0| | |
 |`cudaGetTextureReference`| |11.0|12.0|`hipGetTextureReference`|1.7.0|5.3.0| | |
 |`cudaUnbindTexture`| |11.0|12.0|`hipUnbindTexture`|1.6.0|3.8.0| | |
diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP.md b/docs/tables/CUSPARSE_API_supported_by_HIP.md
index 496269da..089f9327 100644
--- a/docs/tables/CUSPARSE_API_supported_by_HIP.md
+++ b/docs/tables/CUSPARSE_API_supported_by_HIP.md
@@ -183,7 +183,7 @@
 |`cusparseSpVecDescr_t`|10.2| | |`hipsparseSpVecDescr_t`|4.1.0| | | |
 |`cusparseSparseToDenseAlg_t`|11.1| | |`hipsparseSparseToDenseAlg_t`|4.2.0| | | |
 |`cusparseStatus_t`| | | |`hipsparseStatus_t`|1.9.2| | | |
-|`pruneInfo`|9.0| | | | | | | |
+|`pruneInfo`|9.0| | |`pruneInfo`|3.9.0| | | |
 |`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | |
 
 ## **5. CUSPARSE Management Function Reference**
diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md
index bbc87ed2..07efeb46 100644
--- a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md
+++ b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md
@@ -183,8 +183,8 @@
 |`cusparseSpVecDescr_t`|10.2| | |`hipsparseSpVecDescr_t`|4.1.0| | | |`rocsparse_spvec_descr`|4.1.0| | | |
 |`cusparseSparseToDenseAlg_t`|11.1| | |`hipsparseSparseToDenseAlg_t`|4.2.0| | | |`rocsparse_sparse_to_dense_alg`|4.1.0| | | |
 |`cusparseStatus_t`| | | |`hipsparseStatus_t`|1.9.2| | | |`rocsparse_status`|1.9.0| | | |
-|`pruneInfo`|9.0| | | | | | | | | | | | |
-|`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | | | | | | |
+|`pruneInfo`|9.0| | |`pruneInfo`|3.9.0| | | |`_rocsparse_mat_info`|1.9.0| | | |
+|`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | |`rocsparse_mat_info`|1.9.0| | | |
 
 ## **5. CUSPARSE Management Function Reference**
 
@@ -645,7 +645,7 @@
 |`cusparseCnnz`| | | |`hipsparseCnnz`|3.2.0| | | | | | | | |
 |`cusparseCnnz_compress`|8.0| | |`hipsparseCnnz_compress`|3.5.0| | | | | | | | |
 |`cusparseCreateCsru2csrInfo`| | | |`hipsparseCreateCsru2csrInfo`|4.2.0| | | | | | | | |
-|`cusparseCreateIdentityPermutation`| | | |`hipsparseCreateIdentityPermutation`|1.9.2| | | | | | | | |
+|`cusparseCreateIdentityPermutation`| | | |`hipsparseCreateIdentityPermutation`|1.9.2| | | |`rocsparse_create_identity_permutation`|1.9.0| | | |
 |`cusparseCsr2cscEx`|8.0|10.2|11.0| | | | | | | | | | |
 |`cusparseCsr2cscEx2`|10.1| | |`hipsparseCsr2cscEx2`|5.4.0| | | | | | | | |
 |`cusparseCsr2cscEx2_bufferSize`|10.1| | |`hipsparseCsr2cscEx2_bufferSize`|5.4.0| | | | | | | | |
@@ -680,7 +680,7 @@
 |`cusparseDnnz`| | | |`hipsparseDnnz`|3.2.0| | | | | | | | |
 |`cusparseDnnz_compress`|8.0| | |`hipsparseDnnz_compress`|3.5.0| | | | | | | | |
 |`cusparseDpruneCsr2csr`|9.0| | |`hipsparseDpruneCsr2csr`|3.9.0| | | | | | | | |
-|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`hipsparseDpruneCsr2csrByPercentage`|3.9.0| | | | | | | | |
+|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`hipsparseDpruneCsr2csrByPercentage`|3.9.0| | | |`rocsparse_dprune_csr2csr_by_percentage`|3.9.0| | | |
 |`cusparseDpruneCsr2csrByPercentage_bufferSizeExt`|9.0| | |`hipsparseDpruneCsr2csrByPercentage_bufferSizeExt`|3.9.0| | | | | | | | |
 |`cusparseDpruneCsr2csrNnz`|9.0| | |`hipsparseDpruneCsr2csrNnz`|3.9.0| | | | | | | | |
 |`cusparseDpruneCsr2csrNnzByPercentage`|9.0| | |`hipsparseDpruneCsr2csrNnzByPercentage`|3.9.0| | | | | | | | |
@@ -744,17 +744,17 @@
 |`cusparseSpruneDense2csrNnz`|9.0| | |`hipsparseSpruneDense2csrNnz`|3.9.0| | | | | | | | |
 |`cusparseSpruneDense2csrNnzByPercentage`|9.0| | |`hipsparseSpruneDense2csrNnzByPercentage`|3.9.0| | | | | | | | |
 |`cusparseSpruneDense2csr_bufferSizeExt`|9.0| | |`hipsparseSpruneDense2csr_bufferSizeExt`|3.9.0| | | | | | | | |
-|`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | | | | | | |
+|`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | |`rocsparse_coo2csr`|1.9.0| | | |
 |`cusparseXcoosortByColumn`| | | |`hipsparseXcoosortByColumn`|1.9.2| | | |`rocsparse_coosort_by_column`|1.9.0| | | |
 |`cusparseXcoosortByRow`| | | |`hipsparseXcoosortByRow`|1.9.2| | | |`rocsparse_coosort_by_row`|1.9.0| | | |
-|`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | | | | | | |
-|`cusparseXcscsort`| | | |`hipsparseXcscsort`|2.10.0| | | | | | | | |
-|`cusparseXcscsort_bufferSizeExt`| | | |`hipsparseXcscsort_bufferSizeExt`|2.10.0| | | | | | | | |
+|`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | |
+|`cusparseXcscsort`| | | |`hipsparseXcscsort`|2.10.0| | | |`rocsparse_cscsort`|2.10.0| | | |
+|`cusparseXcscsort_bufferSizeExt`| | | |`hipsparseXcscsort_bufferSizeExt`|2.10.0| | | |`rocsparse_cscsort_buffer_size`|2.10.0| | | |
 |`cusparseXcsr2bsrNnz`| | | |`hipsparseXcsr2bsrNnz`|3.5.0| | | | | | | | |
 |`cusparseXcsr2coo`| | | |`hipsparseXcsr2coo`|1.9.2| | | | | | | | |
 |`cusparseXcsr2gebsrNnz`| | | |`hipsparseXcsr2gebsrNnz`|4.1.0| | | | | | | | |
-|`cusparseXcsrsort`| | | |`hipsparseXcsrsort`|1.9.2| | | | | | | | |
-|`cusparseXcsrsort_bufferSizeExt`| | | |`hipsparseXcsrsort_bufferSizeExt`|1.9.2| | | | | | | | |
+|`cusparseXcsrsort`| | | |`hipsparseXcsrsort`|1.9.2| | | |`rocsparse_csrsort`|1.9.0| | | |
+|`cusparseXcsrsort_bufferSizeExt`| | | |`hipsparseXcsrsort_bufferSizeExt`|1.9.2| | | |`rocsparse_csrsort_buffer_size`|1.9.0| | | |
 |`cusparseXgebsr2csr`| | | | | | | | | | | | | |
 |`cusparseXgebsr2gebsrNnz`| | | |`hipsparseXgebsr2gebsrNnz`|4.1.0| | | |`rocsparse_gebsr2gebsr_nnz`|4.1.0| | | |
 |`cusparseZbsr2csr`| | | |`hipsparseZbsr2csr`|3.5.0| | | |`rocsparse_zbsr2csr`|3.10.0| | | |
diff --git a/docs/tables/CUSPARSE_API_supported_by_ROC.md b/docs/tables/CUSPARSE_API_supported_by_ROC.md
index 2dabb8b1..aa15c8da 100644
--- a/docs/tables/CUSPARSE_API_supported_by_ROC.md
+++ b/docs/tables/CUSPARSE_API_supported_by_ROC.md
@@ -183,8 +183,8 @@
 |`cusparseSpVecDescr_t`|10.2| | |`rocsparse_spvec_descr`|4.1.0| | | |
 |`cusparseSparseToDenseAlg_t`|11.1| | |`rocsparse_sparse_to_dense_alg`|4.1.0| | | |
 |`cusparseStatus_t`| | | |`rocsparse_status`|1.9.0| | | |
-|`pruneInfo`|9.0| | | | | | | |
-|`pruneInfo_t`|9.0| | | | | | | |
+|`pruneInfo`|9.0| | |`_rocsparse_mat_info`|1.9.0| | | |
+|`pruneInfo_t`|9.0| | |`rocsparse_mat_info`|1.9.0| | | |
 
 ## **5. CUSPARSE Management Function Reference**
 
@@ -645,7 +645,7 @@
 |`cusparseCnnz`| | | | | | | | |
 |`cusparseCnnz_compress`|8.0| | | | | | | |
 |`cusparseCreateCsru2csrInfo`| | | | | | | | |
-|`cusparseCreateIdentityPermutation`| | | | | | | | |
+|`cusparseCreateIdentityPermutation`| | | |`rocsparse_create_identity_permutation`|1.9.0| | | |
 |`cusparseCsr2cscEx`|8.0|10.2|11.0| | | | | |
 |`cusparseCsr2cscEx2`|10.1| | | | | | | |
 |`cusparseCsr2cscEx2_bufferSize`|10.1| | | | | | | |
@@ -680,7 +680,7 @@
 |`cusparseDnnz`| | | | | | | | |
 |`cusparseDnnz_compress`|8.0| | | | | | | |
 |`cusparseDpruneCsr2csr`|9.0| | | | | | | |
-|`cusparseDpruneCsr2csrByPercentage`|9.0| | | | | | | |
+|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`rocsparse_dprune_csr2csr_by_percentage`|3.9.0| | | |
 |`cusparseDpruneCsr2csrByPercentage_bufferSizeExt`|9.0| | | | | | | |
 |`cusparseDpruneCsr2csrNnz`|9.0| | | | | | | |
 |`cusparseDpruneCsr2csrNnzByPercentage`|9.0| | | | | | | |
@@ -744,17 +744,17 @@
 |`cusparseSpruneDense2csrNnz`|9.0| | | | | | | |
 |`cusparseSpruneDense2csrNnzByPercentage`|9.0| | | | | | | |
 |`cusparseSpruneDense2csr_bufferSizeExt`|9.0| | | | | | | |
-|`cusparseXcoo2csr`| | | | | | | | |
+|`cusparseXcoo2csr`| | | |`rocsparse_coo2csr`|1.9.0| | | |
 |`cusparseXcoosortByColumn`| | | |`rocsparse_coosort_by_column`|1.9.0| | | |
 |`cusparseXcoosortByRow`| | | |`rocsparse_coosort_by_row`|1.9.0| | | |
-|`cusparseXcoosort_bufferSizeExt`| | | | | | | | |
-|`cusparseXcscsort`| | | | | | | | |
-|`cusparseXcscsort_bufferSizeExt`| | | | | | | | |
+|`cusparseXcoosort_bufferSizeExt`| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | |
+|`cusparseXcscsort`| | | |`rocsparse_cscsort`|2.10.0| | | |
+|`cusparseXcscsort_bufferSizeExt`| | | |`rocsparse_cscsort_buffer_size`|2.10.0| | | |
 |`cusparseXcsr2bsrNnz`| | | | | | | | |
 |`cusparseXcsr2coo`| | | | | | | | |
 |`cusparseXcsr2gebsrNnz`| | | | | | | | |
-|`cusparseXcsrsort`| | | | | | | | |
-|`cusparseXcsrsort_bufferSizeExt`| | | | | | | | |
+|`cusparseXcsrsort`| | | |`rocsparse_csrsort`|1.9.0| | | |
+|`cusparseXcsrsort_bufferSizeExt`| | | |`rocsparse_csrsort_buffer_size`|1.9.0| | | |
 |`cusparseXgebsr2csr`| | | | | | | | |
 |`cusparseXgebsr2gebsrNnz`| | | |`rocsparse_gebsr2gebsr_nnz`|4.1.0| | | |
 |`cusparseZbsr2csr`| | | |`rocsparse_zbsr2csr`|3.10.0| | | |
diff --git a/src/ArgParse.cpp b/src/ArgParse.cpp
index 904b5c2e..acead48e 100644
--- a/src/ArgParse.cpp
+++ b/src/ArgParse.cpp
@@ -172,7 +172,7 @@ cl::opt<std::string> DocFormat("doc-format",
   cl::cat(ToolTemplateCategory));
 
 cl::opt<std::string> DocRoc("doc-roc",
-  cl::desc("ROC cocumentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified"),
+  cl::desc("ROC documentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified"),
   cl::value_desc("value"),
   cl::cat(ToolTemplateCategory));
 
diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp
index fcc8fb66..c1cffdef 100644
--- a/src/CUDA2HIP_BLAS_API_functions.cpp
+++ b/src/CUDA2HIP_BLAS_API_functions.cpp
@@ -35,8 +35,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_BLAS_FUNCTION_MAP {
   {"cublasSetKernelStream",          {"hipblasSetKernelStream",          "",                                         CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
   {"cublasGetAtomicsMode",           {"hipblasGetAtomicsMode",           "rocblas_get_atomics_mode",                 CONV_LIB_FUNC, API_BLAS, 4}},
   {"cublasSetAtomicsMode",           {"hipblasSetAtomicsMode",           "rocblas_set_atomics_mode",                 CONV_LIB_FUNC, API_BLAS, 4}},
-  {"cublasGetMathMode",              {"hipblasGetMathMode",              "",                                         CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
-  {"cublasSetMathMode",              {"hipblasSetMathMode",              "",                                         CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
+  {"cublasGetMathMode",              {"hipblasGetMathMode",              "rocblas_get_math_mode",                    CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}},
+  {"cublasSetMathMode",              {"hipblasSetMathMode",              "rocblas_set_math_mode",                    CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}},
   {"cublasMigrateComputeType",       {"hipblasMigrateComputeType",       "",                                         CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
   {"cublasGetSmCountTarget",         {"hipblasGetSmCountTarget",         "",                                         CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
   {"cublasSetSmCountTarget",         {"hipblasSetSmCountTarget",         "",                                         CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
@@ -1075,6 +1075,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_BLAS_FUNCTION_MAP {
 
 const std::map<llvm::StringRef, cudaAPIversions> CUDA_BLAS_FUNCTION_VER_MAP {
   {"cublasGetMathMode",                          {CUDA_90,  CUDA_0, CUDA_0}},
+  {"cublasSetMathMode",                          {CUDA_90,  CUDA_0, CUDA_0}},
   {"cublasMigrateComputeType",                   {CUDA_110, CUDA_0, CUDA_0}},
   {"cublasLogCallback",                          {CUDA_92,  CUDA_0, CUDA_0}},
   {"cublasLoggerConfigure",                      {CUDA_92,  CUDA_0, CUDA_0}},
@@ -1962,6 +1963,8 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_BLAS_FUNCTION_VER_MAP {
   {"hipblasDgelsBatched",                        {HIP_5040, HIP_0,    HIP_0   }},
   {"hipblasCgelsBatched",                        {HIP_5040, HIP_0,    HIP_0   }},
   {"hipblasZgelsBatched",                        {HIP_5040, HIP_0,    HIP_0   }},
+  {"rocblas_get_math_mode",                      {HIP_5070, HIP_0,    HIP_0   }},
+  {"rocblas_set_math_mode",                      {HIP_5070, HIP_0,    HIP_0   }},
 };
 
 const std::map<unsigned int, llvm::StringRef> CUDA_BLAS_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp
index fdaccdb0..d7b31796 100644
--- a/src/CUDA2HIP_BLAS_API_types.cpp
+++ b/src/CUDA2HIP_BLAS_API_types.cpp
@@ -73,12 +73,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_BLAS_TYPE_NAME_MAP {
   {"CUBLAS_ATOMICS_ALLOWED",         {"HIPBLAS_ATOMICS_ALLOWED",         "rocblas_atomics_allowed",               CONV_NUMERIC_LITERAL, API_BLAS, 2}},
 
   // Blas Math mode/tensor operation
-  {"cublasMath_t",                                     {"hipblasMath_t",                                     "",  CONV_TYPE, API_BLAS, 2, UNSUPPORTED}},
-  {"CUBLAS_DEFAULT_MATH",                              {"HIPBLAS_DEFAULT_MATH",                              "",  CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 0
-  {"CUBLAS_TENSOR_OP_MATH",                            {"HIPBLAS_TENSOR_OP_MATH",                            "",  CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED | CUDA_DEPRECATED}}, // 1
-  {"CUBLAS_PEDANTIC_MATH",                             {"HIPBLAS_PEDANTIC_MATH",                             "",  CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 2
-  {"CUBLAS_TF32_TENSOR_OP_MATH",                       {"HIPBLAS_TF32_TENSOR_OP_MATH",                       "",  CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 3
-  {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {"HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "",  CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 16
+  {"cublasMath_t",                                     {"hipblasMath_t",                                     "rocblas_math_mode",         CONV_TYPE, API_BLAS, 2, HIP_UNSUPPORTED}},
+  {"CUBLAS_DEFAULT_MATH",                              {"HIPBLAS_DEFAULT_MATH",                              "rocblas_default_math",      CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 0
+  {"CUBLAS_TENSOR_OP_MATH",                            {"HIPBLAS_TENSOR_OP_MATH",                            "",                          CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED | CUDA_DEPRECATED}}, // 1
+  {"CUBLAS_PEDANTIC_MATH",                             {"HIPBLAS_PEDANTIC_MATH",                             "",                          CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 2
+  {"CUBLAS_TF32_TENSOR_OP_MATH",                       {"HIPBLAS_TF32_TENSOR_OP_MATH",                       "rocblas_xf32_xdl_math_op",  CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 3
+  {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {"HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "",                          CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 16
 
   // Blass different GEMM algorithms
   {"cublasGemmAlgo_t",               {"hipblasGemmAlgo_t",               "rocblas_gemm_algo",                     CONV_TYPE, API_BLAS, 2}},
@@ -170,10 +170,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_BLAS_TYPE_NAME_MAP {
 
   // NOTE: renamed UNSUPPORTED hipblasComputeType_t to the HIP supported hipblasDatatype_t (workaround)
   // TODO: change the type to the correct one after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529
-  {"cublasComputeType_t",            {"hipblasDatatype_t",               "",                                      CONV_TYPE, API_BLAS, 2}},
+  {"cublasComputeType_t",            {"hipblasDatatype_t",               "rocblas_computetype",                   CONV_TYPE, API_BLAS, 2}},
   {"CUBLAS_COMPUTE_16F",             {"HIPBLAS_COMPUTE_16F",             "",                                      CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 64
   {"CUBLAS_COMPUTE_16F_PEDANTIC",    {"HIPBLAS_COMPUTE_16F_PEDANTIC",    "",                                      CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 65
-  {"CUBLAS_COMPUTE_32F",             {"HIPBLAS_COMPUTE_32F",             "",                                      CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 68
+  {"CUBLAS_COMPUTE_32F",             {"HIPBLAS_COMPUTE_32F",             "rocblas_compute_type_f32",              CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 68
   {"CUBLAS_COMPUTE_32F_PEDANTIC",    {"HIPBLAS_COMPUTE_32F_PEDANTIC",    "",                                      CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 69
   {"CUBLAS_COMPUTE_32F_FAST_16F",    {"HIPBLAS_COMPUTE_32F_FAST_16F",    "",                                      CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 74
   {"CUBLAS_COMPUTE_32F_FAST_16BF",   {"HIPBLAS_COMPUTE_32F_FAST_16BF",   "",                                      CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 75
@@ -387,4 +387,9 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_BLAS_TYPE_NAME_VER_MAP {
   {"rocblas_atomics_allowed",                          {HIP_3080, HIP_0,    HIP_0   }},
   {"rocblas_gemm_algo",                                {HIP_1082, HIP_0,    HIP_0   }},
   {"rocblas_gemm_algo_standard",                       {HIP_1082, HIP_0,    HIP_0   }},
+  {"rocblas_math_mode",                                {HIP_5070, HIP_0,    HIP_0   }},
+  {"rocblas_default_math",                             {HIP_5070, HIP_0,    HIP_0   }},
+  {"rocblas_xf32_xdl_math_op",                         {HIP_5070, HIP_0,    HIP_0   }},
+  {"rocblas_computetype",                              {HIP_5070, HIP_0,    HIP_0   }},
+  {"rocblas_compute_type_f32",                         {HIP_5070, HIP_0,    HIP_0   }},
 };
diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp
index 9d9511e4..1b3c83ae 100644
--- a/src/CUDA2HIP_Device_functions.cpp
+++ b/src/CUDA2HIP_Device_functions.cpp
@@ -710,6 +710,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
   {"__hgeu2_mask",                      {"__hgeu2_mask",                       "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__hltu2_mask",                      {"__hltu2_mask",                       "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__hgtu2_mask",                      {"__hgtu2_mask",                       "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"make_half2",                        {"make_half2",                         "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"__half2char_rz",                    {"__half2char_rz",                     "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"__half2uchar_rz",                   {"__half2uchar_rz",                    "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   // bfp16 functions
   {"__double2bfloat16",                 {"__double2bfloat16",                  "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__float2bfloat16",                  {"__float2bfloat16",                   "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
@@ -775,12 +778,16 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
   {"__high2bfloat16",                   {"__high2bfloat16",                    "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__low2bfloat16",                    {"__low2bfloat16",                     "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__halves2bfloat162",                {"__halves2bfloat162",                 "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
-  {"__low2bfloat162",                   {"__halves2bfloat162",                 "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"__low2bfloat162",                   {"__low2bfloat162",                    "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__high2bfloat162",                  {"__high2bfloat162",                   "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__bfloat16_as_short",               {"__bfloat16_as_short",                "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__bfloat16_as_ushort",              {"__bfloat16_as_ushort",               "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__short_as_bfloat16",               {"__short_as_bfloat16",                "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   {"__ushort_as_bfloat16",              {"__ushort_as_bfloat16",               "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"__float22bfloat162_rn",             {"__float22bfloat162_rn",              "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"__bfloat162char_rz",                {"__bfloat162char_rz",                 "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"__bfloat162uchar_rz",               {"__bfloat162uchar_rz",                "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+  {"make_bfloat162",                    {"make_bfloat162",                     "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
   // atomic functions
   {"atomicAdd",                         {"atomicAdd",                          "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
   {"atomicAdd_system",                  {"atomicAdd_system",                   "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
@@ -945,6 +952,13 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_DEVICE_FUNCTION_VER_MAP {
   {"__hgeu2_mask",                      {CUDA_120, CUDA_0,   CUDA_0  }},
   {"__hltu2_mask",                      {CUDA_120, CUDA_0,   CUDA_0  }},
   {"__hgtu2_mask",                      {CUDA_120, CUDA_0,   CUDA_0  }},
+  {"__float22bfloat162_rn",             {CUDA_110, CUDA_0,   CUDA_0  }},
+  {"__bfloat162char_rz",                {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"__bfloat162uchar_rz",               {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"make_bfloat162",                    {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"make_half2",                        {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"__half2char_rz",                    {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"__half2uchar_rz",                   {CUDA_122, CUDA_0,   CUDA_0  }},
 };
 
 const std::map<llvm::StringRef, hipAPIversions> HIP_DEVICE_FUNCTION_VER_MAP {
diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp
index 0e7ed78c..1711d102 100644
--- a/src/CUDA2HIP_Driver_API_functions.cpp
+++ b/src/CUDA2HIP_Driver_API_functions.cpp
@@ -353,13 +353,13 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP {
   {"cuMemsetD8Async",                                      {"hipMemsetD8Async",                                        "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}},
   // no analogue
   // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures
-  {"cuMipmappedArrayCreate",                               {"hipMipmappedArrayCreate",                                 "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}},
+  {"cuMipmappedArrayCreate",                               {"hipMipmappedArrayCreate",                                 "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}},
   // no analogue
   // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures
-  {"cuMipmappedArrayDestroy",                              {"hipMipmappedArrayDestroy",                                "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}},
+  {"cuMipmappedArrayDestroy",                              {"hipMipmappedArrayDestroy",                                "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}},
   // no analogue
   // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures
-  {"cuMipmappedArrayGetLevel",                             {"hipMipmappedArrayGetLevel",                               "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}},
+  {"cuMipmappedArrayGetLevel",                             {"hipMipmappedArrayGetLevel",                               "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}},
   // cudaArrayGetSparseProperties
   {"cuArrayGetSparseProperties",                           {"hipArrayGetSparseProperties",                             "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}},
   // cudaArrayGetPlane
@@ -431,8 +431,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP {
   // 17. Unified Addressing
   // cudaMemAdvise
   {"cuMemAdvise",                                          {"hipMemAdvise",                                            "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}},
-  // TODO: double check cudaMemPrefetchAsync
+  // cudaMemAdvise_v2
+  {"cuMemAdvise_v2",                                       {"hipMemAdvise_v2",                                         "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}},
+  // cudaMemPrefetchAsync
   {"cuMemPrefetchAsync",                                   {"hipMemPrefetchAsync",                                     "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}},
+  // cudaMemPrefetchAsync_v2
+  {"cuMemPrefetchAsync_v2",                                {"hipMemPrefetchAsync_v2",                                  "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}},
   // cudaMemRangeGetAttribute
   {"cuMemRangeGetAttribute",                               {"hipMemRangeGetAttribute",                                 "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}},
   // cudaMemRangeGetAttributes
@@ -782,6 +786,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP {
   {"cuGraphInstantiateWithParams",                         {"hipGraphInstantiateWithParams",                           "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}},
   // cudaGraphExecGetFlags
   {"cuGraphExecGetFlags",                                  {"hipGraphExecGetFlags",                                    "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}},
+  // cudaGraphAddNode
+  {"cuGraphAddNode",                                       {"hipGraphAddNode",                                         "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}},
+  // cudaGraphNodeSetParams
+  {"cuGraphNodeSetParams",                                 {"hipGraphNodeSetParams",                                   "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}},
+  // cudaGraphExecNodeSetParams
+  {"cuGraphExecNodeSetParams",                             {"hipGraphExecNodeSetParams",                               "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}},
 
   // 25. Occupancy
   // cudaOccupancyAvailableDynamicSMemPerBlock
@@ -1384,6 +1394,11 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_DRIVER_FUNCTION_VER_MAP {
   {"cuCoredumpGetAttributeGlobal",                         {CUDA_121, CUDA_0,   CUDA_0  }},
   {"cuCoredumpSetAttribute",                               {CUDA_121, CUDA_0,   CUDA_0  }},
   {"cuCoredumpSetAttributeGlobal",                         {CUDA_121, CUDA_0,   CUDA_0  }},
+  {"cuMemPrefetchAsync_v2",                                {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cuMemAdvise_v2",                                       {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cuGraphAddNode",                                       {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cuGraphNodeSetParams",                                 {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cuGraphExecNodeSetParams",                             {CUDA_122, CUDA_0,   CUDA_0  }},
 };
 
 const std::map<llvm::StringRef, hipAPIversions> HIP_DRIVER_FUNCTION_VER_MAP {
@@ -1443,9 +1458,9 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_DRIVER_FUNCTION_VER_MAP {
   {"hipMemsetD32Async",                                    {HIP_2030, HIP_0,    HIP_0   }},
   {"hipMemsetD8",                                          {HIP_1060, HIP_0,    HIP_0   }},
   {"hipMemsetD8Async",                                     {HIP_3000, HIP_0,    HIP_0   }},
-  {"hipMipmappedArrayCreate",                              {HIP_3050, HIP_0,    HIP_0   }},
-  {"hipMipmappedArrayDestroy",                             {HIP_3050, HIP_0,    HIP_0   }},
-  {"hipMipmappedArrayGetLevel",                            {HIP_3050, HIP_0,    HIP_0   }},
+  {"hipMipmappedArrayCreate",                              {HIP_3050, HIP_5070, HIP_0   }},
+  {"hipMipmappedArrayDestroy",                             {HIP_3050, HIP_5070, HIP_0   }},
+  {"hipMipmappedArrayGetLevel",                            {HIP_3050, HIP_5070, HIP_0   }},
   {"hipFuncGetAttribute",                                  {HIP_2080, HIP_0,    HIP_0   }},
   {"hipModuleLaunchKernel",                                {HIP_1060, HIP_0,    HIP_0   }},
   {"hipModuleOccupancyMaxActiveBlocksPerMultiprocessor",   {HIP_3050, HIP_0,    HIP_0   }},
diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp
index 479db2c0..15e8748f 100644
--- a/src/CUDA2HIP_Driver_API_types.cpp
+++ b/src/CUDA2HIP_Driver_API_types.cpp
@@ -74,12 +74,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CUDA_HOST_NODE_PARAMS",                                            {"hipHostNodeParams",                                        "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_HOST_NODE_PARAMS_v1",                                         {"hipHostNodeParams",                                        "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_HOST_NODE_PARAMS_v2_st",                                      {"hipHostNodeParams_v2",                                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaHostNodeParamsV2
   {"CUDA_HOST_NODE_PARAMS_v2",                                         {"hipHostNodeParams_v2",                                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // cudaKernelNodeParams
   {"CUDA_KERNEL_NODE_PARAMS_st",                                       {"hipKernelNodeParams",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_KERNEL_NODE_PARAMS",                                          {"hipKernelNodeParams",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_KERNEL_NODE_PARAMS_v1",                                       {"hipKernelNodeParams",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
+  // cudaKernelNodeParamsV2
   {"CUDA_KERNEL_NODE_PARAMS_v2_st",                                    {"hipKernelNodeParams_v2",                                   "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_KERNEL_NODE_PARAMS_v2",                                       {"hipKernelNodeParams_v2",                                   "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_KERNEL_NODE_PARAMS_v3_st",                                    {"hipKernelNodeParams_v3",                                   "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
@@ -109,13 +111,15 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CUDA_MEMCPY3D_PEER_v1",                                            {"hip_Memcpy3D_Peer",                                        "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   {"CUDA_MEMCPY_NODE_PARAMS_st",                                       {"hiMemcpyNodeParams",                                        "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaMemcpyNodeParams
   {"CUDA_MEMCPY_NODE_PARAMS",                                          {"hiMemcpyNodeParams",                                        "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  // cudaMemsetParams
   {"CUDA_MEMSET_NODE_PARAMS_st",                                       {"hipMemsetParams",                                          "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
+  // cudaMemsetParams
   {"CUDA_MEMSET_NODE_PARAMS",                                          {"hipMemsetParams",                                          "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_MEMSET_NODE_PARAMS_v1",                                       {"hipMemsetParams",                                          "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_MEMSET_NODE_PARAMS_v2_st",                                    {"hipMemsetParams_v2",                                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaMemsetParamsV2
   {"CUDA_MEMSET_NODE_PARAMS_v2",                                       {"hipMemsetParams_v2",                                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st",                             {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS",                         "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
@@ -277,6 +281,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st",                               {"hipExternalSemaphoreSignalNodeParams",                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS",                                  {"hipExternalSemaphoreSignalNodeParams",                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1",                               {"hipExternalSemaphoreSignalNodeParams",                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaExternalSemaphoreSignalNodeParamsV2
   {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st",                            {"hipExternalSemaphoreSignalNodeParams_v2",                  "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2",                               {"hipExternalSemaphoreSignalNodeParams_v2",                  "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
@@ -284,6 +289,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_st",                                 {"hipExternalSemaphoreWaitNodeParams",                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EXT_SEM_WAIT_NODE_PARAMS",                                    {"hipExternalSemaphoreWaitNodeParams",                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1",                                 {"hipExternalSemaphoreWaitNodeParams",                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaExternalSemaphoreWaitNodeParamsV2
   {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st",                              {"hipExternalSemaphoreWaitNodeParams_v2",                    "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2",                                 {"hipExternalSemaphoreWaitNodeParams_v2",                    "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
@@ -315,35 +321,32 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   // cudaMemAllocNodeParams
   {"CUDA_MEM_ALLOC_NODE_PARAMS_st",                                    {"hipMemAllocNodeParams",                                    "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, CUDA_REMOVED}},
   {"CUDA_MEM_ALLOC_NODE_PARAMS_v1_st",                                 {"hipMemAllocNodeParams",                                    "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
+  // cudaMemAllocNodeParamsV2
   {"CUDA_MEM_ALLOC_NODE_PARAMS_v2_st",                                 {"hipMemAllocNodeParams_v2",                                 "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_MEM_ALLOC_NODE_PARAMS",                                       {"hipMemAllocNodeParams",                                    "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_MEM_ALLOC_NODE_PARAMS_v1",                                    {"hipMemAllocNodeParams",                                    "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}},
   {"CUDA_MEM_ALLOC_NODE_PARAMS_v2",                                    {"hipMemAllocNodeParams_v2",                                 "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  //
+  // cudaMemFreeNodeParams
   {"CUDA_MEM_FREE_NODE_PARAMS_st",                                     {"hipMemFreeNodeParams",                                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_MEM_FREE_NODE_PARAMS",                                        {"hipMemFreeNodeParams",                                     "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  //
+  // cudaChildGraphNodeParams
   {"CUDA_CHILD_GRAPH_NODE_PARAMS_st",                                  {"hipChildGraphNodeParams",                                  "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_CHILD_GRAPH_NODE_PARAMS",                                     {"hipChildGraphNodeParams",                                  "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  //
+  // cudaEventRecordNodeParams
   {"CUDA_EVENT_RECORD_NODE_PARAMS_st",                                 {"hipEventRecordNodeParams",                                 "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EVENT_RECORD_NODE_PARAMS",                                    {"hipEventRecordNodeParams",                                 "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  //
+  // cudaEventWaitNodeParams
   {"CUDA_EVENT_WAIT_NODE_PARAMS_st",                                   {"hipEventWaitNodeParams",                                   "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_EVENT_WAIT_NODE_PARAMS",                                      {"hipEventWaitNodeParams",                                   "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  //
+  // cudaGraphNodeParams
   {"CUgraphNodeParams_st",                                             {"hipGraphNodeParams",                                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUgraphNodeParams",                                                {"hipGraphNodeParams",                                       "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
-  //
-  {"CUdeviceNumaConfig_enum",                                          {"hipDeviceNumaConfig",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
-  {"CUdeviceNumaConfig",                                               {"hipDeviceNumaConfig",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
-
   // cudaArrayMemoryRequirements
   {"CUDA_ARRAY_MEMORY_REQUIREMENTS_st",                                {"hipArrayMemoryRequirements",                               "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
   {"CUDA_ARRAY_MEMORY_REQUIREMENTS_v1",                                {"hipArrayMemoryRequirements",                               "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
@@ -859,27 +862,27 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH",                               {"hipDeviceAttributeClusterLaunch",                          "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 120
   // cudaDevAttrDeferredMappingCudaArraySupported
   {"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED",        {"hipDeviceAttributeDeferredMappingCudaArraySupported",      "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 121
-  //
+  // cudaDevAttrReserved122
   {"CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2",             {"hipDeviceAttributeCanUse64BitStreamMemOpsV2",              "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 122
-  //
+  // cudaDevAttrReserved123
   {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2",             {"hipDeviceAttributeCanUseStreamWaitValueNorV2",             "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 123
-  //
+  // cudaDevAttrReserved124
   {"CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED",                            {"hipDeviceAttributeDmaBufSupported",                        "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 124
   // cudaDevAttrIpcEventSupport
   {"CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED",                          {"hipDeviceAttributeIpcEventSupported",                      "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 125
   // cudaDevAttrMemSyncDomainCount
   {"CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT",                        {"hipDeviceAttributeMemSyncDomainCount",                     "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 126
-  //
+  // cudaDevAttrReserved127
   {"CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED",                  {"hipDeviceAttributeTensorMapAccessSupported",               "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 127
-  //
+  // cudaDevAttrReserved129
   {"CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS",                    {"hipDeviceAttributeUnifiedFunctionPointers",                "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 129
-  //
+  // cudaDevAttrNumaConfig
   {"CU_DEVICE_ATTRIBUTE_NUMA_CONFIG",                                  {"hipDeviceAttributeNumaConfig",                             "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 130
-  //
+  // cudaDevAttrNumaId
   {"CU_DEVICE_ATTRIBUTE_NUMA_ID",                                      {"hipDeviceAttributeNumaId",                                 "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 131
   //
   {"CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED",                          {"hipDeviceAttributeMulticastSupported",                     "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 132
-  //
+  // cudaDevAttrHostNumaId
   {"CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID",                                 {"hipDeviceAttributeHostNumaId",                             "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134
   // cudaDevAttrMax
   {"CU_DEVICE_ATTRIBUTE_MAX",                                          {"hipDeviceAttributeMax",                                    "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
@@ -1457,14 +1460,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY",                               {"hipMemRangeAttributeAccessedBy",                           "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 3
   // cudaMemRangeAttributeLastPrefetchLocation
   {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION",                    {"hipMemRangeAttributeLastPrefetchLocation",                 "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 4
-  //
+  // cudaMemRangeAttributePreferredLocationType
   {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE",                   {"hipMemRangeAttributePreferredLocationType",                "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 5
-  //
+  // cudaMemRangeAttributePreferredLocationId
   {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID",                     {"hipMemRangeAttributePreferredLocationId",                  "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 6
-  //
-  {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE",               {"hipMemRangeAttributeLastPreferredLocationType",            "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7
-  //
-  {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID",                 {"hipMemRangeAttributeLastPreferredLocationId",              "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8
+  // cudaMemRangeAttributeLastPrefetchLocationType
+  {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE",               {"hipMemRangeAttributeLastPrefetchLocationType",             "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7
+  // cudaMemRangeAttributeLastPrefetchLocationId
+  {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID",                 {"hipMemRangeAttributeLastPrefetchLocationId",               "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8
 
   // no analogue
   {"CUoccupancy_flags",                                                {"hipOccupancyFlags",                                        "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
@@ -1998,11 +2001,11 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   {"CU_MEM_LOCATION_TYPE_INVALID",                                     {"hipMemLocationTypeInvalid",                                "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 0x0
   // cudaMemLocationTypeDevice
   {"CU_MEM_LOCATION_TYPE_DEVICE",                                      {"hipMemLocationTypeDevice",                                 "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 0x1
-  //
+  // cudaMemLocationTypeHost
   {"CU_MEM_LOCATION_TYPE_HOST",                                        {"hipMemLocationTypeHost",                                   "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x2
-  //
+  // cudaMemLocationTypeHostNuma
   {"CU_MEM_LOCATION_TYPE_HOST_NUMA",                                   {"hipMemLocationTypeHostNuma",                               "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x3
-  //
+  // cudaMemLocationTypeHostNumaCurrent
   {"CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT",                           {"hipMemLocationTypeHostNumaCurrent",                        "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x4
   // no analogue
   {"CU_MEM_LOCATION_TYPE_MAX",                                         {"hipMemLocationTypeMax",                                    "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x7FFFFFFF
@@ -2534,6 +2537,15 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP {
   //
   {"CU_COREDUMP_MAX",                                                  {"HIP_COREDUMP_MAX",                                         "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
+  // cudaDeviceNumaConfig
+  {"CUdeviceNumaConfig",                                               {"hipDeviceNumaConfig",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  {"CUdeviceNumaConfig_enum",                                          {"hipDeviceNumaConfig",                                      "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // CUdeviceNumaConfig enum values
+  // cudaDeviceNumaConfigNone
+  {"CU_DEVICE_NUMA_CONFIG_NONE",                                       {"hipDeviceNumaConfigNone",                                  "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaDeviceNumaConfigNumaNode
+  {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE",                                  {"hipDeviceNumaConfigNumaNode",                              "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
   // 4. Typedefs
 
   // no analogue
@@ -3512,6 +3524,8 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_DRIVER_TYPE_NAME_VER_MAP {
   {"CUgraphNodeParams",                                                {CUDA_122, CUDA_0,   CUDA_0  }},
   {"CUdeviceNumaConfig_enum",                                          {CUDA_122, CUDA_0,   CUDA_0  }},
   {"CUdeviceNumaConfig",                                               {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"CU_DEVICE_NUMA_CONFIG_NONE",                                       {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE",                                  {CUDA_122, CUDA_0,   CUDA_0  }},
 };
 
 const std::map<llvm::StringRef, hipAPIversions> HIP_DRIVER_TYPE_NAME_VER_MAP {
diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp
index 3616b64f..7ab124ac 100644
--- a/src/CUDA2HIP_Runtime_API_functions.cpp
+++ b/src/CUDA2HIP_Runtime_API_functions.cpp
@@ -308,6 +308,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP {
   {"cudaMallocPitch",                                         {"hipMallocPitch",                                         "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
   // cuMemAdvise
   {"cudaMemAdvise",                                           {"hipMemAdvise",                                           "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
+  // cuMemAdvise_v2
+  {"cudaMemAdvise_v2",                                        {"hipMemAdvise_v2",                                        "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}},
   // no analogue
   // NOTE: Not equal to cuMemcpy due to different signatures
   {"cudaMemcpy",                                              {"hipMemcpy",                                              "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
@@ -358,8 +360,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP {
   {"cudaMemcpyToSymbolAsync",                                 {"hipMemcpyToSymbolAsync",                                 "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
   // cuMemGetInfo
   {"cudaMemGetInfo",                                          {"hipMemGetInfo",                                          "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
-  // TODO: double check cuMemPrefetchAsync
+  // cuMemPrefetchAsync
   {"cudaMemPrefetchAsync",                                    {"hipMemPrefetchAsync",                                    "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
+  // cuMemPrefetchAsync_v2
+  {"cudaMemPrefetchAsync_v2",                                 {"hipMemPrefetchAsync_v2",                                 "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}},
   // cuMemRangeGetAttribute
   {"cudaMemRangeGetAttribute",                                {"hipMemRangeGetAttribute",                                "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}},
   // cuMemRangeGetAttributes
@@ -836,6 +840,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP {
   {"cudaGraphInstantiateWithParams",                          {"hipGraphInstantiateWithParams",                          "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}},
   // cuGraphExecGetFlags
   {"cudaGraphExecGetFlags",                                   {"hipGraphExecGetFlags",                                   "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}},
+  // cuGraphAddNode
+  {"cudaGraphAddNode",                                        {"hipGraphAddNode",                                        "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}},
+  // cuGraphNodeSetParams
+  {"cudaGraphNodeSetParams",                                  {"hipGraphNodeSetParams",                                  "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}},
+  // cuGraphExecNodeSetParams
+  {"cudaGraphExecNodeSetParams",                              {"hipGraphExecNodeSetParams",                              "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}},
 
   // 29. Driver Entry Point Access
   // cuGetProcAddress
@@ -875,7 +885,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP {
   // no analogue
   {"cudaBindTextureToArray",                                  {"hipBindTextureToArray",                                  "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}},
   // no analogue
-  {"cudaBindTextureToMipmappedArray",                         {"hipBindTextureToMipmappedArray",                         "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, CUDA_REMOVED}},
+  {"cudaBindTextureToMipmappedArray",                         {"hipBindTextureToMipmappedArray",                         "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}},
   // no analogue
   {"cudaGetTextureAlignmentOffset",                           {"hipGetTextureAlignmentOffset",                           "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}},
   // no analogue
@@ -1110,6 +1120,11 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_RUNTIME_FUNCTION_VER_MAP {
   {"cudaGraphInstantiateWithParams",                          {CUDA_120, CUDA_0,   CUDA_0  }},
   {"cudaGraphExecGetFlags",                                   {CUDA_120, CUDA_0,   CUDA_0  }},
   {"cudaGetKernel",                                           {CUDA_121, CUDA_0,   CUDA_0  }},
+  {"cudaMemPrefetchAsync_v2",                                 {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemAdvise_v2",                                        {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaGraphAddNode",                                        {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaGraphNodeSetParams",                                  {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaGraphExecNodeSetParams",                              {CUDA_122, CUDA_0,   CUDA_0  }},
 };
 
 const std::map<llvm::StringRef, hipAPIversions> HIP_RUNTIME_FUNCTION_VER_MAP {
@@ -1230,7 +1245,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_RUNTIME_FUNCTION_VER_MAP {
   {"hipBindTexture",                                          {HIP_1060, HIP_3080, HIP_0   }},
   {"hipBindTexture2D",                                        {HIP_1070, HIP_3080, HIP_0   }},
   {"hipBindTextureToArray",                                   {HIP_1060, HIP_3080, HIP_0   }},
-  {"hipBindTextureToMipmappedArray",                          {HIP_1070, HIP_0,    HIP_0   }},
+  {"hipBindTextureToMipmappedArray",                          {HIP_1070, HIP_5070, HIP_0   }},
   {"hipCreateChannelDesc",                                    {HIP_1060, HIP_0,    HIP_0   }},
   {"hipGetChannelDesc",                                       {HIP_1070, HIP_0,    HIP_0   }},
   {"hipGetTextureAlignmentOffset",                            {HIP_1090, HIP_3080, HIP_0   }},
diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp
index d66849e6..9428a1eb 100644
--- a/src/CUDA2HIP_Runtime_API_types.cpp
+++ b/src/CUDA2HIP_Runtime_API_types.cpp
@@ -70,6 +70,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
 
   // CUDA_HOST_NODE_PARAMS
   {"cudaHostNodeParams",                                               {"hipHostNodeParams",                                        "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
+  // CUDA_HOST_NODE_PARAMS_v2
+  {"cudaHostNodeParamsV2",                                             {"hipHostNodeParams_v2",                                     "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // CUipcEventHandle
   {"cudaIpcEventHandle_t",                                             {"hipIpcEventHandle_t",                                      "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
@@ -83,6 +85,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
 
   // CUDA_KERNEL_NODE_PARAMS
   {"cudaKernelNodeParams",                                             {"hipKernelNodeParams",                                      "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
+  // CUDA_KERNEL_NODE_PARAMS_v2_st
+  {"cudaKernelNodeParamsV2",                                           {"hipKernelNodeParams_v2",                                   "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // no analogue
   // CUDA_LAUNCH_PARAMS struct differs
@@ -97,6 +101,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
 
   // CUDA_MEMSET_NODE_PARAMS
   {"cudaMemsetParams",                                                 {"hipMemsetParams",                                          "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
+  // CUDA_MEMSET_NODE_PARAMS_v2
+  {"cudaMemsetParamsV2",                                               {"hipMemsetParams_v2",                                       "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // no analogue
   {"cudaPitchedPtr",                                                   {"hipPitchedPtr",                                            "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
@@ -210,12 +216,33 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
 
   // CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st
   {"cudaExternalSemaphoreSignalNodeParams",                            {"hipExternalSemaphoreSignalNodeParams",                     "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st
+  {"cudaExternalSemaphoreSignalNodeParamsV2",                          {"hipExternalSemaphoreSignalNodeParams_v2",                  "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // CUDA_EXT_SEM_WAIT_NODE_PARAMS_st
   {"cudaExternalSemaphoreWaitNodeParams",                              {"hipExternalSemaphoreWaitNodeParams",                       "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st
+  {"cudaExternalSemaphoreWaitNodeParamsV2",                            {"hipExternalSemaphoreWaitNodeParams_v2",                    "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // CUDA_MEM_ALLOC_NODE_PARAMS_st
   {"cudaMemAllocNodeParams",                                           {"hipMemAllocNodeParams",                                    "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
+  // CUDA_MEM_ALLOC_NODE_PARAMS_v2_st
+  {"cudaMemAllocNodeParamsV2",                                         {"hipMemAllocNodeParams_v2",                                 "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
+  // CUDA_MEM_FREE_NODE_PARAMS_st
+  {"cudaMemFreeNodeParams",                                            {"hipMemFreeNodeParams",                                     "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
+  // CUDA_CHILD_GRAPH_NODE_PARAMS_st
+  {"cudaChildGraphNodeParams",                                         {"hipChildGraphNodeParams",                                  "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
+  // CUDA_EVENT_RECORD_NODE_PARAMS_st
+  {"cudaEventRecordNodeParams",                                        {"hipEventRecordNodeParams",                                 "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
+  // CUDA_EVENT_WAIT_NODE_PARAMS_st
+  {"cudaEventWaitNodeParams",                                          {"hipEventWaitNodeParams",                                   "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
+  // CUgraphNodeParams_st
+  {"cudaGraphNodeParams",                                              {"hipGraphNodeParams",                                       "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
   // CUDA_ARRAY_MEMORY_REQUIREMENTS_st
   {"cudaArrayMemoryRequirements",                                      {"hipArrayMemoryRequirements",                               "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
@@ -230,6 +257,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
   // CUkernel
   {"cudaKernel_t",                                                     {"hipKernel",                                                "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
+  // CUDA_MEMCPY_NODE_PARAMS
+  {"cudaMemcpyNodeParams",                                             {"hiMemcpyNodeParams",                                       "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
   // 2. Unions
 
   // CUstreamAttrValue
@@ -586,11 +616,11 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
   {"cudaDevAttrClusterLaunch",                                         {"hipDeviceAttributeClusterLaunch",                          "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 120
   // CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED
   {"cudaDevAttrDeferredMappingCudaArraySupported",                     {"hipDeviceAttributeDeferredMappingCudaArraySupported",      "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 121
-  //
+  // CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2
   {"cudaDevAttrReserved122",                                           {"hipDevAttrReserved122",                                    "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 122
-  //
+  // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2
   {"cudaDevAttrReserved123",                                           {"hipDevAttrReserved123",                                    "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 123
-  //
+  // CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED
   {"cudaDevAttrReserved124",                                           {"hipDevAttrReserved124",                                    "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 124
   // CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
   {"cudaDevAttrIpcEventSupport",                                       {"hipDevAttrIpcEventSupport",                                "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 125
@@ -598,12 +628,18 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
   {"cudaDevAttrMemSyncDomainCount",                                    {"hipDevAttrMemSyncDomainCount",                             "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 126
   // CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED
   {"cudaDevAttrReserved127",                                           {"hipDeviceAttributeTensorMapAccessSupported",               "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 127
-  //
+  // CUDA only
   {"cudaDevAttrReserved128",                                           {"hipDevAttrReserved128",                                    "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 128
   // CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS
   {"cudaDevAttrReserved129",                                           {"hipDeviceAttributeUnifiedFunctionPointers",                "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 129
+  // CU_DEVICE_ATTRIBUTE_NUMA_CONFIG
+  {"cudaDevAttrNumaConfig",                                            {"hipDeviceAttributeNumaConfig",                             "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 130
+  // CU_DEVICE_ATTRIBUTE_NUMA_ID
+  {"cudaDevAttrNumaId",                                                {"hipDeviceAttributeNumaId",                                 "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 131
   // CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED
   {"cudaDevAttrReserved132",                                           {"hipDeviceAttributeMulticastSupported",                     "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 132
+  // CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID
+  {"cudaDevAttrHostNumaId",                                            {"hipDeviceAttributeHostNumaId",                             "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134
   // CU_DEVICE_ATTRIBUTE_MAX
   {"cudaDevAttrMax",                                                   {"hipDeviceAttributeMax",                                    "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
@@ -1292,6 +1328,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
   {"cudaMemRangeAttributeAccessedBy",                                  {"hipMemRangeAttributeAccessedBy",                           "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 3
   // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
   {"cudaMemRangeAttributeLastPrefetchLocation",                        {"hipMemRangeAttributeLastPrefetchLocation",                 "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 4
+  // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE
+  {"cudaMemRangeAttributePreferredLocationType",                       {"hipMemRangeAttributePreferredLocationType",                "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 5
+  // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID
+  {"cudaMemRangeAttributePreferredLocationId",                         {"hipMemRangeAttributePreferredLocationId",                  "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 6
+  // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE
+  {"cudaMemRangeAttributeLastPrefetchLocationType",                    {"hipMemRangeAttributeLastPrefetchLocationType",             "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7
+  // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID
+  {"cudaMemRangeAttributeLastPrefetchLocationId",                      {"hipMemRangeAttributeLastPrefetchLocationId",               "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8
 
   // no analogue
   {"cudaOutputMode",                                                   {"hipOutputMode",                                            "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}},
@@ -1611,6 +1655,12 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
   {"cudaMemLocationTypeInvalid",                                       {"hipMemLocationTypeInvalid",                                "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 0
   // CU_MEM_LOCATION_TYPE_DEVICE
   {"cudaMemLocationTypeDevice",                                        {"hipMemLocationTypeDevice",                                 "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 1
+  // CU_MEM_LOCATION_TYPE_HOST
+  {"cudaMemLocationTypeHost",                                          {"hipMemLocationTypeHost",                                   "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 2
+  // CU_MEM_LOCATION_TYPE_HOST_NUMA
+  {"cudaMemLocationTypeHostNuma",                                      {"hipMemLocationTypeHostNuma",                               "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 3
+  // CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT
+  {"cudaMemLocationTypeHostNumaCurrent",                               {"hipMemLocationTypeHostNumaCurrent",                        "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 4
 
   // CUmemAllocationType
   {"cudaMemAllocationType",                                            {"hipMemAllocationType",                                     "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}},
@@ -1819,6 +1869,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
   // CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE
   {"cudaLaunchMemSyncDomainRemote",                                    {"hipLaunchMemSyncDomainRemote",                             "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
 
+  // CUdeviceNumaConfig
+  {"cudaDeviceNumaConfig",                                             {"hipDeviceNumaConfig",                                      "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // cudaDeviceNumaConfig enum values
+  // CU_DEVICE_NUMA_CONFIG_NONE
+  {"cudaDeviceNumaConfigNone",                                         {"hipDeviceNumaConfigNone",                                  "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+  // CU_DEVICE_NUMA_CONFIG_NUMA_NODE
+  {"cudaDeviceNumaConfigNumaNode",                                     {"hipDeviceNumaConfigNumaNode",                              "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}},
+
   // 4. Typedefs
 
   // CUhostFn
@@ -2508,6 +2566,31 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_RUNTIME_TYPE_NAME_VER_MAP
   {"cudaDevAttrReserved132",                                           {CUDA_121, CUDA_0,   CUDA_0  }},
   {"CUkern_st",                                                        {CUDA_121, CUDA_0,   CUDA_0  }},
   {"cudaKernel_t",                                                     {CUDA_121, CUDA_0,   CUDA_0  }},
+  {"cudaMemcpyNodeParams",                                             {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemsetParamsV2",                                               {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaHostNodeParamsV2",                                             {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemRangeAttributePreferredLocationType",                       {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemRangeAttributePreferredLocationId",                         {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemRangeAttributeLastPrefetchLocationType",                    {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemRangeAttributeLastPrefetchLocationId",                      {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaDevAttrNumaConfig",                                            {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaDevAttrNumaId",                                                {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaDevAttrHostNumaId",                                            {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemLocationTypeHost",                                          {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemLocationTypeHostNuma",                                      {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemLocationTypeHostNumaCurrent",                               {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemAllocNodeParamsV2",                                         {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaMemFreeNodeParams",                                            {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaKernelNodeParamsV2",                                           {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaExternalSemaphoreSignalNodeParamsV2",                          {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaExternalSemaphoreWaitNodeParamsV2",                            {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaChildGraphNodeParams",                                         {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaEventRecordNodeParams",                                        {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaEventWaitNodeParams",                                          {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaGraphNodeParams",                                              {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaDeviceNumaConfig",                                             {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaDeviceNumaConfigNone",                                         {CUDA_122, CUDA_0,   CUDA_0  }},
+  {"cudaDeviceNumaConfigNumaNode",                                     {CUDA_122, CUDA_0,   CUDA_0  }},
 };
 
 const std::map<llvm::StringRef, hipAPIversions> HIP_RUNTIME_TYPE_NAME_VER_MAP {
diff --git a/src/CUDA2HIP_SPARSE_API_functions.cpp b/src/CUDA2HIP_SPARSE_API_functions.cpp
index b2a24b97..70562a19 100644
--- a/src/CUDA2HIP_SPARSE_API_functions.cpp
+++ b/src/CUDA2HIP_SPARSE_API_functions.cpp
@@ -551,7 +551,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP {
   {"cusparseCcsr2gebsr",                                {"hipsparseCcsr2gebsr",                                "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
   {"cusparseZcsr2gebsr",                                {"hipsparseZcsr2gebsr",                                "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
 
-  {"cusparseXcoo2csr",                                  {"hipsparseXcoo2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
+  {"cusparseXcoo2csr",                                  {"hipsparseXcoo2csr",                                  "rocsparse_coo2csr",                                                CONV_LIB_FUNC, API_SPARSE, 14}},
 
   {"cusparseScsc2dense",                                {"hipsparseScsc2dense",                                "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
   {"cusparseDcsc2dense",                                {"hipsparseDcsc2dense",                                "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
@@ -615,10 +615,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP {
   {"cusparseChyb2csc",                                  {"hipsparseChyb2csc",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
   {"cusparseZhyb2csc",                                  {"hipsparseZhyb2csc",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
 
-  {"cusparseShyb2csr",                                  {"hipsparseShyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}},
-  {"cusparseDhyb2csr",                                  {"hipsparseDhyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}},
-  {"cusparseChyb2csr",                                  {"hipsparseChyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}},
-  {"cusparseZhyb2csr",                                  {"hipsparseZhyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}},
+  // NOTE: rocsparse_shyb2csr has one additioanl attribute void* temp_buffer; see hipsparseShyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_shyb2csr
+  {"cusparseShyb2csr",                                  {"hipsparseShyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
+  // NOTE: rocsparse_dhyb2csr has one additioanl attribute void* temp_buffer; see hipsparseDhyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_dhyb2csr
+  {"cusparseDhyb2csr",                                  {"hipsparseDhyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
+  // NOTE: rocsparse_chyb2csr has one additioanl attribute void* temp_buffer; see hipsparseChyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_chyb2csr
+  {"cusparseChyb2csr",                                  {"hipsparseChyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
+  // NOTE: rocsparse_zhyb2csr has one additioanl attribute void* temp_buffer; see hipsparseZhyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_zhyb2csr
+  {"cusparseZhyb2csr",                                  {"hipsparseZhyb2csr",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
 
   {"cusparseShyb2dense",                                {"hipsparseShyb2dense",                                "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
   {"cusparseDhyb2dense",                                {"hipsparseDhyb2dense",                                "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}},
@@ -630,17 +634,17 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP {
   {"cusparseCnnz",                                      {"hipsparseCnnz",                                      "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
   {"cusparseZnnz",                                      {"hipsparseZnnz",                                      "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
 
-  {"cusparseCreateIdentityPermutation",                 {"hipsparseCreateIdentityPermutation",                 "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
+  {"cusparseCreateIdentityPermutation",                 {"hipsparseCreateIdentityPermutation",                 "rocsparse_create_identity_permutation",                            CONV_LIB_FUNC, API_SPARSE, 14}},
 
-  {"cusparseXcoosort_bufferSizeExt",                    {"hipsparseXcoosort_bufferSizeExt",                    "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
+  {"cusparseXcoosort_bufferSizeExt",                    {"hipsparseXcoosort_bufferSizeExt",                    "rocsparse_coosort_buffer_size",                                    CONV_LIB_FUNC, API_SPARSE, 14}},
   {"cusparseXcoosortByRow",                             {"hipsparseXcoosortByRow",                             "rocsparse_coosort_by_row",                                         CONV_LIB_FUNC, API_SPARSE, 14}},
   {"cusparseXcoosortByColumn",                          {"hipsparseXcoosortByColumn",                          "rocsparse_coosort_by_column",                                      CONV_LIB_FUNC, API_SPARSE, 14}},
 
-  {"cusparseXcsrsort_bufferSizeExt",                    {"hipsparseXcsrsort_bufferSizeExt",                    "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
-  {"cusparseXcsrsort",                                  {"hipsparseXcsrsort",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
+  {"cusparseXcsrsort_bufferSizeExt",                    {"hipsparseXcsrsort_bufferSizeExt",                    "rocsparse_csrsort_buffer_size",                                    CONV_LIB_FUNC, API_SPARSE, 14}},
+  {"cusparseXcsrsort",                                  {"hipsparseXcsrsort",                                  "rocsparse_csrsort",                                                CONV_LIB_FUNC, API_SPARSE, 14}},
 
-  {"cusparseXcscsort_bufferSizeExt",                    {"hipsparseXcscsort_bufferSizeExt",                    "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
-  {"cusparseXcscsort",                                  {"hipsparseXcscsort",                                  "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
+  {"cusparseXcscsort_bufferSizeExt",                    {"hipsparseXcscsort_bufferSizeExt",                    "rocsparse_cscsort_buffer_size",                                    CONV_LIB_FUNC, API_SPARSE, 14}},
+  {"cusparseXcscsort",                                  {"hipsparseXcscsort",                                  "rocsparse_cscsort",                                                CONV_LIB_FUNC, API_SPARSE, 14}},
 
   {"cusparseCreateCsru2csrInfo",                        {"hipsparseCreateCsru2csrInfo",                        "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
   {"cusparseDestroyCsru2csrInfo",                       {"hipsparseDestroyCsru2csrInfo",                       "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
@@ -698,7 +702,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP {
 
   {"cusparseHpruneCsr2csrByPercentage",                 {"hipsparseHpruneCsr2csrByPercentage",                 "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED}},
   {"cusparseSpruneCsr2csrByPercentage",                 {"hipsparseSpruneCsr2csrByPercentage",                 "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
-  {"cusparseDpruneCsr2csrByPercentage",                 {"hipsparseDpruneCsr2csrByPercentage",                 "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
+  {"cusparseDpruneCsr2csrByPercentage",                 {"hipsparseDpruneCsr2csrByPercentage",                 "rocsparse_dprune_csr2csr_by_percentage",                           CONV_LIB_FUNC, API_SPARSE, 14}},
 
   {"cusparseHpruneCsr2csrByPercentage_bufferSizeExt",   {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt",   "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED}},
   {"cusparseSpruneCsr2csrByPercentage_bufferSizeExt",   {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt",   "",                                                                 CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}},
@@ -1895,6 +1899,14 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_SPARSE_FUNCTION_VER_MAP {
   {"rocsparse_sbsr2csr",                                 {HIP_3100, HIP_0,    HIP_0   }},
   {"rocsparse_coosort_by_column",                        {HIP_1090, HIP_0,    HIP_0   }},
   {"rocsparse_coosort_by_row",                           {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_coosort_buffer_size",                      {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_cscsort",                                  {HIP_2100, HIP_0,    HIP_0   }},
+  {"rocsparse_cscsort_buffer_size",                      {HIP_2100, HIP_0,    HIP_0   }},
+  {"rocsparse_csrsort",                                  {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_csrsort_buffer_size",                      {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_create_identity_permutation",              {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_coo2csr",                                  {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_dprune_csr2csr_by_percentage",             {HIP_3090, HIP_0,    HIP_0   }},
 };
 
 const std::map<unsigned int, llvm::StringRef> CUDA_SPARSE_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_SPARSE_API_types.cpp b/src/CUDA2HIP_SPARSE_API_types.cpp
index 3ae22cf2..233912cd 100644
--- a/src/CUDA2HIP_SPARSE_API_types.cpp
+++ b/src/CUDA2HIP_SPARSE_API_types.cpp
@@ -68,8 +68,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_TYPE_NAME_MAP {
   {"cusparseColorInfo",                         {"hipsparseColorInfo",                         "_rocsparse_color_info",                              CONV_TYPE, API_SPARSE, 4, HIP_UNSUPPORTED}},
   {"cusparseColorInfo_t",                       {"hipsparseColorInfo_t",                       "rocsparse_color_info",                               CONV_TYPE, API_SPARSE, 4}},
 
-  {"pruneInfo",                                 {"pruneInfo",                                  "",                                                   CONV_TYPE, API_SPARSE, 4, UNSUPPORTED}},
-  {"pruneInfo_t",                               {"pruneInfo_t",                                "",                                                   CONV_TYPE, API_SPARSE, 4, ROC_UNSUPPORTED}},
+  {"pruneInfo",                                 {"pruneInfo",                                  "_rocsparse_mat_info",                                CONV_TYPE, API_SPARSE, 4}},
+  {"pruneInfo_t",                               {"pruneInfo_t",                                "rocsparse_mat_info",                                 CONV_TYPE, API_SPARSE, 4}},
 
   {"cusparseSpMatDescr",                        {"hipsparseSpMatDescr",                        "_rocsparse_spmat_descr",                             CONV_TYPE, API_SPARSE, 4, HIP_UNSUPPORTED}},
   {"cusparseSpMatDescr_t",                      {"hipsparseSpMatDescr_t",                      "rocsparse_spmat_descr",                              CONV_TYPE, API_SPARSE, 4}},
@@ -401,6 +401,7 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_SPARSE_TYPE_NAME_VER_MAP {
   {"csrilu02Info_t",                             {HIP_1092, HIP_0,    HIP_0   }},
   {"bsrilu02Info_t",                             {HIP_3090, HIP_0,    HIP_0   }},
   {"csrgemm2Info_t",                             {HIP_2080, HIP_0,    HIP_0   }},
+  {"pruneInfo",                                  {HIP_3090, HIP_0,    HIP_0   }},
   {"pruneInfo_t",                                {HIP_3090, HIP_0,    HIP_0   }},
   {"hipsparseAction_t",                          {HIP_1092, HIP_0,    HIP_0   }},
   {"HIPSPARSE_ACTION_SYMBOLIC",                  {HIP_1092, HIP_0,    HIP_0   }},
@@ -631,4 +632,6 @@ const std::map<llvm::StringRef, hipAPIversions> HIP_SPARSE_TYPE_NAME_VER_MAP {
   {"rocsparse_dense_to_sparse_alg_default",      {HIP_4010, HIP_0,    HIP_0   }},
   {"rocsparse_spgemm_alg",                       {HIP_4010, HIP_0,    HIP_0   }},
   {"rocsparse_spgemm_alg_default",               {HIP_4010, HIP_0,    HIP_0   }},
+  {"_rocsparse_mat_info",                        {HIP_1090, HIP_0,    HIP_0   }},
+  {"rocsparse_mat_info",                         {HIP_1090, HIP_0,    HIP_0   }},
 };
diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp
index 7e252a9b..2c635d58 100644
--- a/src/HipifyAction.cpp
+++ b/src/HipifyAction.cpp
@@ -1199,12 +1199,11 @@ class PPCallbackProxy : public clang::PPCallbacks {
 
 public:
   explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {}
-  // [ToDo] Remove SWDEV_375013 related guards from CMakeLists.txt and HipifyAction.cpp along with the LLVM 16.0.0 official release
   void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token,
                           StringRef file_name, bool is_angled, clang::CharSourceRange filename_range,
 #if LLVM_VERSION_MAJOR < 15
                           const clang::FileEntry *file,
-#elif (LLVM_VERSION_MAJOR == 15) || (LLVM_VERSION_MAJOR == 16 && SWDEV_375013)
+#elif LLVM_VERSION_MAJOR == 15
                           Optional<clang::FileEntryRef> file,
 #else
                           clang::OptionalFileEntryRef file,
diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
index fa917ec3..982f1e74 100644
--- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
+++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
@@ -208,7 +208,9 @@ int main() {
   float fresult = 0;
 
   float** fAarray = 0;
+  const float** const fAarray_const = const_cast<const float**>(fAarray);
   float** fBarray = 0;
+  const float** const fBarray_const = const_cast<const float**>(fBarray);
   float** fCarray = 0;
   float** fTauarray = 0;
 
@@ -228,12 +230,16 @@ int main() {
   double dresult = 0;
 
   double** dAarray = 0;
+  const double** const dAarray_const = const_cast<const double**>(dAarray);
   double** dBarray = 0;
+  const double** const dBarray_const = const_cast<const double**>(dBarray);
   double** dCarray = 0;
   double** dTauarray = 0;
 
   void** voidAarray = nullptr;
+  const void** const voidAarray_const = const_cast<const void**>(voidAarray);
   void** voidBarray = nullptr;
+  const void** const voidBarray_const = const_cast<const void**>(voidBarray);
   void** voidCarray = nullptr;
 
   // NOTE: float CUBLASWINAPI cublasSnrm2(int n, const float* x, int incx) is not supported by HIP
@@ -254,20 +260,28 @@ int main() {
   cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb;
 
   // CHECK: hipComplex** complexAarray = 0;
+  // CHECK: const hipComplex** const complexAarray_const = const_cast<const hipComplex**>(complexAarray);
   // CHECK-NEXT: hipComplex** complexBarray = 0;
+  // CHECK: const hipComplex** const complexBarray_const = const_cast<const hipComplex**>(complexBarray);
   // CHECK-NEXT: hipComplex** complexCarray = 0;
   // CHECK-NEXT: hipComplex** complexTauarray = 0;
   cuComplex** complexAarray = 0;
+  const cuComplex** const complexAarray_const = const_cast<const cuComplex**>(complexAarray);
   cuComplex** complexBarray = 0;
+  const cuComplex** const complexBarray_const = const_cast<const cuComplex**>(complexBarray);
   cuComplex** complexCarray = 0;
   cuComplex** complexTauarray = 0;
 
   // CHECK: hipDoubleComplex** dcomplexAarray = 0;
+  // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast<const hipDoubleComplex**>(dcomplexAarray);
   // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0;
+  // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast<const hipDoubleComplex**>(dcomplexBarray);
   // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0;
   // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0;
   cuDoubleComplex** dcomplexAarray = 0;
+  const cuDoubleComplex** const dcomplexAarray_const = const_cast<const cuDoubleComplex**>(dcomplexAarray);
   cuDoubleComplex** dcomplexBarray = 0;
+  const cuDoubleComplex** const dcomplexBarray_const = const_cast<const cuDoubleComplex**>(dcomplexBarray);
   cuDoubleComplex** dcomplexCarray = 0;
   cuDoubleComplex** dcomplexTauarray = 0;
 
@@ -1041,13 +1055,13 @@ int main() {
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
-  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
+  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
-  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
+  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
 
   // TODO: __half -> hipblasHalf
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount);
@@ -1055,13 +1069,13 @@ int main() {
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
-  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
+  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
-  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
 
   // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc);
@@ -1267,63 +1281,63 @@ int main() {
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount);
-  blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount);
+  blasStatus = cublasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount);
-  blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount);
+  blasStatus = cublasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount);
-  blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount);
+  blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount);
-  blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount);
+  blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount);
-  blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount);
+  blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount);
-  blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount);
+  blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount);
-  blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount);
+  blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount);
-  blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount);
+  blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
-  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
+  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
-  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
+  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
-  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
+  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
-  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
+  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount);
@@ -1528,8 +1542,8 @@ int main() {
 #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
-  // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
-  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu
index dde02d90..21a984d3 100644
--- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu
+++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu
@@ -219,7 +219,9 @@ int main() {
   float fresult = 0;
 
   float** fAarray = 0;
+  const float** const fAarray_const = const_cast<const float**>(fAarray);
   float** fBarray = 0;
+  const float** const fBarray_const = const_cast<const float**>(fBarray);
   float** fCarray = 0;
   float** fTauarray = 0;
 
@@ -239,12 +241,16 @@ int main() {
   double dresult = 0;
 
   double** dAarray = 0;
+  const double** const dAarray_const = const_cast<const double**>(dAarray);
   double** dBarray = 0;
+  const double** const dBarray_const = const_cast<const double**>(dBarray);
   double** dCarray = 0;
   double** dTauarray = 0;
 
   void** voidAarray = nullptr;
+  const void** const voidAarray_const = const_cast<const void**>(voidAarray);
   void** voidBarray = nullptr;
+  const void** const voidBarray_const = const_cast<const void**>(voidBarray);
   void** voidCarray = nullptr;
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result);
@@ -267,20 +273,28 @@ int main() {
   cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb;
 
   // CHECK: hipComplex** complexAarray = 0;
+  // CHECK: const hipComplex** const complexAarray_const = const_cast<const hipComplex**>(complexAarray);
   // CHECK-NEXT: hipComplex** complexBarray = 0;
+  // CHECK: const hipComplex** const complexBarray_const = const_cast<const hipComplex**>(complexBarray);
   // CHECK-NEXT: hipComplex** complexCarray = 0;
   // CHECK-NEXT: hipComplex** complexTauarray = 0;
   cuComplex** complexAarray = 0;
+  const cuComplex** const complexAarray_const = const_cast<const cuComplex**>(complexAarray);
   cuComplex** complexBarray = 0;
+  const cuComplex** const complexBarray_const = const_cast<const cuComplex**>(complexBarray);
   cuComplex** complexCarray = 0;
   cuComplex** complexTauarray = 0;
 
   // CHECK: hipDoubleComplex** dcomplexAarray = 0;
+  // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast<const hipDoubleComplex**>(dcomplexAarray);
   // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0;
+  // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast<const hipDoubleComplex**>(dcomplexBarray);
   // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0;
   // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0;
   cuDoubleComplex** dcomplexAarray = 0;
+  const cuDoubleComplex** const dcomplexAarray_const = const_cast<const cuDoubleComplex**>(dcomplexAarray);
   cuDoubleComplex** dcomplexBarray = 0;
+  const cuDoubleComplex** const dcomplexBarray_const = const_cast<const cuDoubleComplex**>(dcomplexBarray);
   cuDoubleComplex** dcomplexCarray = 0;
   cuDoubleComplex** dcomplexTauarray = 0;
 
@@ -1188,13 +1202,13 @@ int main() {
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
-  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
+  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
-  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
+  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
 
   // TODO: __half -> hipblasHalf
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount);
@@ -1202,13 +1216,13 @@ int main() {
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
-  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
+  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount);
-  // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
-  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* beta, float* CP, int ldc);
@@ -1436,63 +1450,63 @@ int main() {
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount);
-  blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount);
+  blasStatus = cublasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount);
-  blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount);
+  blasStatus = cublasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount);
-  blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount);
+  blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount);
-  blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount);
+  // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount);
+  blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount);
-  blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount);
+  blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount);
-  blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount);
+  blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount);
-  blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount);
+  blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount);
-  // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount);
-  blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount);
+  // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount);
+  blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
-  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
+  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
-  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
+  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
-  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
+  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount);
-  // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
-  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
+  // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
+  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount);
@@ -1697,8 +1711,8 @@ int main() {
 #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
-  // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
-  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
 
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
   // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu
index 7a73c9b2..7589371d 100644
--- a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu
+++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu
@@ -235,7 +235,9 @@ int main() {
   float fresult = 0;
 
   float** fAarray = 0;
+  const float** const fAarray_const = const_cast<const float**>(fAarray);
   float** fBarray = 0;
+  const float** const fBarray_const = const_cast<const float**>(fBarray);
   float** fCarray = 0;
   float** fTauarray = 0;
 
@@ -255,12 +257,16 @@ int main() {
   double dresult = 0;
 
   double** dAarray = 0;
+  const double** const dAarray_const = const_cast<const double**>(dAarray);
   double** dBarray = 0;
+  const double** const dBarray_const = const_cast<const double**>(dBarray);
   double** dCarray = 0;
   double** dTauarray = 0;
 
   void** voidAarray = nullptr;
+  const void** const voidAarray_const = const_cast<const void**>(voidAarray);
   void** voidBarray = nullptr;
+  const void** const voidBarray_const = const_cast<const void**>(voidBarray);
   void** voidCarray = nullptr;
 
   // TODO: #1281
@@ -283,20 +289,28 @@ int main() {
   cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb;
 
   // CHECK: rocblas_float_complex** complexAarray = 0;
+  // CHECK: const rocblas_float_complex** const complexAarray_const = const_cast<const rocblas_float_complex**>(complexAarray);
   // CHECK-NEXT: rocblas_float_complex** complexBarray = 0;
+  // CHECK: const rocblas_float_complex** const complexBarray_const = const_cast<const rocblas_float_complex**>(complexBarray);
   // CHECK-NEXT: rocblas_float_complex** complexCarray = 0;
   // CHECK-NEXT: rocblas_float_complex** complexTauarray = 0;
   cuComplex** complexAarray = 0;
+  const cuComplex** const complexAarray_const = const_cast<const cuComplex**>(complexAarray);
   cuComplex** complexBarray = 0;
+  const cuComplex** const complexBarray_const = const_cast<const cuComplex**>(complexBarray);
   cuComplex** complexCarray = 0;
   cuComplex** complexTauarray = 0;
 
   // CHECK: rocblas_double_complex** dcomplexAarray = 0;
+  // CHECK: const rocblas_double_complex** const dcomplexAarray_const = const_cast<const rocblas_double_complex**>(dcomplexAarray);
   // CHECK-NEXT: rocblas_double_complex** dcomplexBarray = 0;
+  // CHECK: const rocblas_double_complex** const dcomplexBarray_const = const_cast<const rocblas_double_complex**>(dcomplexBarray);
   // CHECK-NEXT: rocblas_double_complex** dcomplexCarray = 0;
   // CHECK-NEXT: rocblas_double_complex** dcomplexTauarray = 0;
   cuDoubleComplex** dcomplexAarray = 0;
+  const cuDoubleComplex** const dcomplexAarray_const = const_cast<const cuDoubleComplex**>(dcomplexAarray);
   cuDoubleComplex** dcomplexBarray = 0;
+  const cuDoubleComplex** const dcomplexBarray_const = const_cast<const cuDoubleComplex**>(dcomplexBarray);
   cuDoubleComplex** dcomplexCarray = 0;
   cuDoubleComplex** dcomplexTauarray = 0;
 
@@ -1194,14 +1208,14 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
-  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
+  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
-  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
+  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
 
   // TODO: #1281
   // TODO: __half -> rocblas_half
@@ -1211,14 +1225,14 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
-  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
+  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
-  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
 
   // TODO: #1281
   // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP
@@ -1465,26 +1479,26 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
-  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
+  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
-  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
+  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
-  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
+  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
-  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
+  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc);
@@ -1651,14 +1665,31 @@ int main() {
 #if CUDA_VERSION >= 9000
   // CHECK: rocblas_gemm_algo BLAS_GEMM_DEFAULT = rocblas_gemm_algo_standard;
   cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT;
+
+  // CHECK: rocblas_math_mode blasMath;
+  // CHECK-NEXT: rocblas_math_mode BLAS_DEFAULT_MATH = rocblas_default_math;
+  // CHECK-NEXT: rocblas_math_mode BLAS_TF32_TENSOR_OP_MATH = rocblas_xf32_xdl_math_op;
+  cublasMath_t blasMath;
+  cublasMath_t BLAS_DEFAULT_MATH = CUBLAS_DEFAULT_MATH;
+  cublasMath_t BLAS_TF32_TENSOR_OP_MATH = CUBLAS_TF32_TENSOR_OP_MATH;
+
+  // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, cublasMath_t* mode);
+  // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_math_mode(rocblas_handle handle, rocblas_math_mode* math_mode);
+  // CHECK: blasStatus = rocblas_get_math_mode(blasHandle, &blasMath);
+  blasStatus = cublasGetMathMode(blasHandle, &blasMath);
+
+  // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode);
+  // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_math_mode(rocblas_handle handle, rocblas_math_mode math_mode);
+  // CHECK: blasStatus = rocblas_set_math_mode(blasHandle, blasMath);
+  blasStatus = cublasSetMathMode(blasHandle, blasMath);
 #endif
 
 #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags);
-  // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
-  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
@@ -1686,6 +1717,11 @@ int main() {
   // CHECK-NEXT: rocblas_datatype C_16BF = rocblas_datatype_bf16_c;
   cublasDataType_t R_16BF = CUDA_R_16BF;
   cublasDataType_t C_16BF = CUDA_C_16BF;
+
+  // CHECK: rocblas_computetype blasComputeType;
+  // CHECK-NEXT: rocblas_computetype BLAS_COMPUTE_32F = rocblas_compute_type_f32;
+  cublasComputeType_t blasComputeType;
+  cublasComputeType_t BLAS_COMPUTE_32F = CUBLAS_COMPUTE_32F;
 #endif
 
 #if CUDA_VERSION >= 11040 && CUBLAS_VERSION >= 11600
diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu
index eab649d8..c03a1f0f 100644
--- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu
+++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu
@@ -243,7 +243,9 @@ int main() {
   float fresult = 0;
 
   float** fAarray = 0;
+  const float** const fAarray_const = const_cast<const float**>(fAarray);
   float** fBarray = 0;
+  const float** const fBarray_const = const_cast<const float**>(fBarray);
   float** fCarray = 0;
   float** fTauarray = 0;
 
@@ -263,12 +265,16 @@ int main() {
   double dresult = 0;
 
   double** dAarray = 0;
+  const double** const dAarray_const = const_cast<const double**>(dAarray);
   double** dBarray = 0;
+  const double** const dBarray_const = const_cast<const double**>(dBarray);
   double** dCarray = 0;
   double** dTauarray = 0;
 
   void** voidAarray = nullptr;
+  const void** const voidAarray_const = const_cast<const void**>(voidAarray);
   void** voidBarray = nullptr;
+  const void** const voidBarray_const = const_cast<const void**>(voidBarray);
   void** voidCarray = nullptr;
 
   // TODO: #1281
@@ -293,20 +299,28 @@ int main() {
   cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb;
 
   // CHECK: rocblas_float_complex** complexAarray = 0;
+  // CHECK: const rocblas_float_complex** const complexAarray_const = const_cast<const rocblas_float_complex**>(complexAarray);
   // CHECK-NEXT: rocblas_float_complex** complexBarray = 0;
+  // CHECK: const rocblas_float_complex** const complexBarray_const = const_cast<const rocblas_float_complex**>(complexBarray);
   // CHECK-NEXT: rocblas_float_complex** complexCarray = 0;
   // CHECK-NEXT: rocblas_float_complex** complexTauarray = 0;
   cuComplex** complexAarray = 0;
+  const cuComplex** const complexAarray_const = const_cast<const cuComplex**>(complexAarray);
   cuComplex** complexBarray = 0;
+  const cuComplex** const complexBarray_const = const_cast<const cuComplex**>(complexBarray);
   cuComplex** complexCarray = 0;
   cuComplex** complexTauarray = 0;
 
   // CHECK: rocblas_double_complex** dcomplexAarray = 0;
+  // CHECK: const rocblas_double_complex** const dcomplexAarray_const = const_cast<const rocblas_double_complex**>(dcomplexAarray);
   // CHECK-NEXT: rocblas_double_complex** dcomplexBarray = 0;
+  // CHECK: const rocblas_double_complex** const dcomplexBarray_const = const_cast<const rocblas_double_complex**>(dcomplexBarray);
   // CHECK-NEXT: rocblas_double_complex** dcomplexCarray = 0;
   // CHECK-NEXT: rocblas_double_complex** dcomplexTauarray = 0;
   cuDoubleComplex** dcomplexAarray = 0;
+  const cuDoubleComplex** const dcomplexAarray_const = const_cast<const cuDoubleComplex**>(dcomplexAarray);
   cuDoubleComplex** dcomplexBarray = 0;
+  const cuDoubleComplex** const dcomplexBarray_const = const_cast<const cuDoubleComplex**>(dcomplexBarray);
   cuDoubleComplex** dcomplexCarray = 0;
   cuDoubleComplex** dcomplexTauarray = 0;
 
@@ -1338,14 +1352,14 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
-  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
+  blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
-  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
+  blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount);
 
   // TODO: #1281
   // TODO: __half -> rocblas_half
@@ -1355,14 +1369,14 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
-  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
+  blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
-  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+  blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc);
@@ -1635,26 +1649,26 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
-  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
+  blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
-  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
+  blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
-  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
+  blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count);
-  // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
-  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
+  // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
+  blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc);
@@ -1827,8 +1841,8 @@ int main() {
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
   // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags);
-  // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
-  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+  blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
 
   // TODO: #1281
   // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
diff --git a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu
index b59e9adc..382bd209 100644
--- a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu
+++ b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu
@@ -125,6 +125,8 @@ int main() {
   int nnzb = 0;
   int innz = 0;
   int blockDim = 0;
+  int cscRowIndA = 0;
+  int cscColPtrA = 0;
   int csrRowPtrA = 0;
   int csrColIndA = 0;
   int ncolors = 0;
@@ -157,6 +159,7 @@ int main() {
   void *indices = nullptr;
   void *values = nullptr;
   void *cooRowInd = nullptr;
+  int icooRowInd = 0;
   void *cscRowInd = nullptr;
   void *csrColInd = nullptr;
   void *cooColInd = nullptr;
@@ -185,6 +188,7 @@ int main() {
   float ffractionToColor = 0.f;
   double bsrValA = 0.f;
   double csrValA = 0.f;
+  float fcsrValA = 0.f;
   double csrValC = 0.f;
   float csrSortedValA = 0.f;
   double dbsrSortedValA = 0.f;
@@ -192,6 +196,9 @@ int main() {
   float fbsrSortedValA = 0.f;
   float fbsrSortedValC = 0.f;
   float fcsrSortedValC = 0.f;
+  double percentage = 0.f;
+
+  pruneInfo_t prune_info;
 
   // CHECK: hipDoubleComplex dcomplex, dComplexbsrSortedValA, dComplexbsrSortedValC;
   cuDoubleComplex dcomplex, dComplexbsrSortedValA, dComplexbsrSortedValC;
@@ -412,6 +419,41 @@ int main() {
   // CHECK: status_t = hipsparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer);
   status_t = cusparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer);
 
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cooRowsA, const int* cooColsA, size_t* pBufferSizeInBytes);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcoosort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* cooRows, const int* cooCols, size_t* pBufferSizeInBytes);
+  // CHECK: status_t = hipsparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize);
+  status_t = cusparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* cscColPtrA, int* cscRowIndA, int* P, void* pBuffer);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcscsort(hipsparseHandle_t handle, int m, int n, int nnz, const hipsparseMatDescr_t descrA, const int* cscColPtr, int* cscRowInd, int* P, void* pBuffer);
+  // CHECK: status_t = hipsparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer);
+  status_t = cusparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cscColPtrA, const int* cscRowIndA, size_t* pBufferSizeInBytes);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcscsort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* cscColPtr, const int* cscRowInd, size_t* pBufferSizeInBytes);
+  // CHECK: status_t = hipsparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize);
+  status_t = cusparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* csrRowPtrA, int* csrColIndA, int* P, void* pBuffer);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcsrsort(hipsparseHandle_t handle, int m, int n, int nnz, const hipsparseMatDescr_t descrA, const int* csrRowPtr, int* csrColInd, int* P, void* pBuffer);
+  // CHECK: status_t = hipsparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer);
+  status_t = cusparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtrA, const int* csrColIndA, size_t* pBufferSizeInBytes);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcsrsort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtr, const int* csrColInd, size_t* pBufferSizeInBytes);
+  // CHECK: status_t = hipsparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize);
+  status_t = cusparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int* p);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCreateIdentityPermutation(hipsparseHandle_t handle, int n, int* p);
+  // CHECK: status_t = hipsparseCreateIdentityPermutation(handle_t, n, P);
+  status_t = cusparseCreateIdentityPermutation(handle_t, n, P);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrSortedRowPtr, cusparseIndexBase_t idxBase);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcoo2csr(hipsparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrRowPtr, hipsparseIndexBase_t idxBase);
+  // CHECK: status_t = hipsparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t);
+  status_t = cusparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t);
+
 #if CUDA_VERSION >= 8000
   // CHECK: hipDataType dataType_t;
   // CHECK-NEXT: hipDataType dataType;
@@ -419,6 +461,13 @@ int main() {
   cudaDataType dataType;
 #endif
 
+#if CUDA_VERSION >= 9000
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(cusparseHandle_t handle, int m, int n, int nnzA, const cusparseMatDescr_t descrA, const double* csrSortedValA, const int* csrSortedRowPtrA, const int* csrSortedColIndA, float percentage, const cusparseMatDescr_t descrC, double* csrSortedValC, const int* csrSortedRowPtrC, int* csrSortedColIndC, pruneInfo_t info, void* pBuffer);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDpruneCsr2csrByPercentage(hipsparseHandle_t handle, int m, int n, int nnzA, const hipsparseMatDescr_t descrA, const double* csrValA, const int* csrRowPtrA, const int* csrColIndA, double percentage, const hipsparseMatDescr_t descrC, double* csrValC, const int* csrRowPtrC, int* csrColIndC, pruneInfo_t info, void* buffer);
+  // CHECK: status_t = hipsparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer);
+  status_t = cusparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer);
+#endif
+
 #if CUDA_VERSION >= 8000 && CUDA_VERSION < 12000
   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src);
   // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCopyMatDescr(hipsparseMatDescr_t dest, const hipsparseMatDescr_t src);
@@ -426,7 +475,7 @@ int main() {
   status_t = cusparseCopyMatDescr(matDescr_t, matDescr_t_2);
 #endif
 
-#if CUDA_VERSION >= 10010
+#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000
   // CHECK: hipsparseSpMatDescr_t spMatDescr_t, matC;
   cusparseSpMatDescr_t spMatDescr_t, matC;
 
@@ -471,11 +520,6 @@ int main() {
   // CHECK: hipsparseSpMMAlg_t spMMAlg_t;
   cusparseSpMMAlg_t spMMAlg_t;
 
-  // CHECK: hipsparseCsr2CscAlg_t Csr2CscAlg_t;
-  // CHECK-NEXT: hipsparseCsr2CscAlg_t CSR2CSC_ALG1 = HIPSPARSE_CSR2CSC_ALG1;
-  cusparseCsr2CscAlg_t Csr2CscAlg_t;
-  cusparseCsr2CscAlg_t CSR2CSC_ALG1 = CUSPARSE_CSR2CSC_ALG1;
-
   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t* spMatDescr, int64_t ows, int64_t cols, int64_t nnz, void* cooRowInd, void* cooColInd, void* cooValues, cusparseIndexType_t cooIdxType, cusparseIndexBase_t idxBase, cudaDataType valueType);
   // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCreateCoo(hipsparseSpMatDescr_t* spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void* cooRowInd, void* cooColInd, void* cooValues, hipsparseIndexType_t cooIdxType, hipsparseIndexBase_t idxBase, hipDataType valueType);
   // CHECK: status_t = hipsparseCreateCoo(&spMatDescr_t, rows, cols, nnz, cooRowInd, cooColInd, cooValues, indexType_t, indexBase_t, dataType);
@@ -527,28 +571,34 @@ int main() {
   status_t = cusparseDnMatSetStridedBatch(dnMatDescr_t, batchCount, batchStride);
 #endif
 
-#if CUDA_VERSION >= 10010 && CUDA_VERSION < 12000
+#if CUDA_VERSION >= 10010
+  // CHECK: hipsparseCsr2CscAlg_t Csr2CscAlg_t;
+  // CHECK-NEXT: hipsparseCsr2CscAlg_t CSR2CSC_ALG1 = HIPSPARSE_CSR2CSC_ALG1;
+  cusparseCsr2CscAlg_t Csr2CscAlg_t;
+  cusparseCsr2CscAlg_t CSR2CSC_ALG1 = CUSPARSE_CSR2CSC_ALG1;
+#endif
+
+#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000)
   // CHECK: hipsparseSpMMAlg_t COOMM_ALG1 = HIPSPARSE_COOMM_ALG1;
   // CHECK-NEXT: hipsparseSpMMAlg_t COOMM_ALG2 = HIPSPARSE_COOMM_ALG2;
   // CHECK-NEXT: hipsparseSpMMAlg_t COOMM_ALG3 = HIPSPARSE_COOMM_ALG3;
   cusparseSpMMAlg_t COOMM_ALG1 = CUSPARSE_COOMM_ALG1;
   cusparseSpMMAlg_t COOMM_ALG2 = CUSPARSE_COOMM_ALG2;
   cusparseSpMMAlg_t COOMM_ALG3 = CUSPARSE_COOMM_ALG3;
+#endif
 
+#if CUDA_VERSION >= 10010 && CUDA_VERSION < 12000
   // CHECK: hipsparseCsr2CscAlg_t CSR2CSC_ALG2 = HIPSPARSE_CSR2CSC_ALG2;
   cusparseCsr2CscAlg_t CSR2CSC_ALG2 = CUSPARSE_CSR2CSC_ALG2;
 #endif
 
-#if CUDA_VERSION >= 10020
+#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000
   // CHECK: hipsparseSpVecDescr_t spVecDescr_t;
   cusparseSpVecDescr_t spVecDescr_t;
 
   // CHECK: hipsparseDnVecDescr_t dnVecDescr_t, vecX, vecY;
   cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY;
 
-  // CHECK: hipsparseStatus_t STATUS_NOT_SUPPORTED = HIPSPARSE_STATUS_NOT_SUPPORTED;
-  cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED;
-
   // CHECK: hipsparseSpMVAlg_t spMVAlg_t;
   cusparseSpMVAlg_t spMVAlg_t;
 
@@ -648,7 +698,12 @@ int main() {
   status_t = cusparseSpMV(handle_t, opA, alpha, spMatDescr_t, vecX, beta, vecY, dataType, spMVAlg_t, tempBuffer);
 #endif
 
-#if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000
+#if CUDA_VERSION >= 10020
+  // CHECK: hipsparseStatus_t STATUS_NOT_SUPPORTED = HIPSPARSE_STATUS_NOT_SUPPORTED;
+  cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED;
+#endif
+
+#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000)
   // CHECK: hipsparseFormat_t FORMAT_COO_AOS = HIPSPARSE_FORMAT_COO_AOS;
   cusparseFormat_t FORMAT_COO_AOS = CUSPARSE_FORMAT_COO_AOS;
 
@@ -772,6 +827,26 @@ int main() {
   // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDestroyHybMat(hipsparseHybMat_t hybA);
   // CHECK: status_t = hipsparseDestroyHybMat(hybMat_t);
   status_t = cusparseDestroyHybMat(hybMat_t);
+
+  // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, cuDoubleComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseZhyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, hipDoubleComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // CHECK: status_t = hipsparseZhyb2csr(handle_t, matDescr_t, hybMat_t, &dComplexbsrSortedValA, &csrRowPtrA, &csrColIndA);
+  status_t = cusparseZhyb2csr(handle_t, matDescr_t, hybMat_t, &dComplexbsrSortedValA, &csrRowPtrA, &csrColIndA);
+
+  // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, cuComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseChyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, hipComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // CHECK: status_t = hipsparseChyb2csr(handle_t, matDescr_t, hybMat_t, &complex, &csrRowPtrA, &csrColIndA);
+  status_t = cusparseChyb2csr(handle_t, matDescr_t, hybMat_t, &complex, &csrRowPtrA, &csrColIndA);
+
+  // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, double* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDhyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, double* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // CHECK: status_t = hipsparseDhyb2csr(handle_t, matDescr_t, hybMat_t, &csrValA, &csrRowPtrA, &csrColIndA);
+  status_t = cusparseDhyb2csr(handle_t, matDescr_t, hybMat_t, &csrValA, &csrRowPtrA, &csrColIndA);
+
+  // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, float* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseShyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, float* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA);
+  // CHECK: status_t = hipsparseShyb2csr(handle_t, matDescr_t, hybMat_t, &fcsrValA, &csrRowPtrA, &csrColIndA);
+  status_t = cusparseShyb2csr(handle_t, matDescr_t, hybMat_t, &fcsrValA, &csrRowPtrA, &csrColIndA);
 #endif
 
 #if CUDA_VERSION >= 11010 && CUSPARSE_VERSION >= 11300
diff --git a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu
index 14a3dc10..9c484183 100644
--- a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu
+++ b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu
@@ -125,6 +125,8 @@ int main() {
   int nnzb = 0;
   int innz = 0;
   int blockDim = 0;
+  int cscRowIndA = 0;
+  int cscColPtrA = 0;
   int csrRowPtrA = 0;
   int csrColIndA = 0;
   int ncolors = 0;
@@ -157,6 +159,7 @@ int main() {
   void *indices = nullptr;
   void *values = nullptr;
   void *cooRowInd = nullptr;
+  int icooRowInd = 0;
   void *cscRowInd = nullptr;
   void *csrColInd = nullptr;
   void *cooColInd = nullptr;
@@ -192,6 +195,10 @@ int main() {
   float fbsrSortedValA = 0.f;
   float fbsrSortedValC = 0.f;
   float fcsrSortedValC = 0.f;
+  double percentage = 0.f;
+
+  // CHECK: rocsparse_mat_info prune_info;
+  pruneInfo_t prune_info;
 
   // TODO: should be rocsparse_double_complex
   // TODO: add to TypeOverloads cuDoubleComplex -> rocsparse_double_complex under a new option --sparse
@@ -416,6 +423,41 @@ int main() {
   // CHECK: status_t = rocsparse_coosort_by_row(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer);
   status_t = cusparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer);
 
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cooRowsA, const int* cooColsA, size_t* pBufferSizeInBytes);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_coosort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, size_t* buffer_size);
+  // CHECK: status_t = rocsparse_coosort_buffer_size(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize);
+  status_t = cusparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* cscColPtrA, int* cscRowIndA, int* P, void* pBuffer);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cscsort(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_mat_descr descr, const rocsparse_int* csc_col_ptr, rocsparse_int* csc_row_ind, rocsparse_int* perm, void* temp_buffer);
+  // CHECK: status_t = rocsparse_cscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer);
+  status_t = cusparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cscColPtrA, const int* cscRowIndA, size_t* pBufferSizeInBytes);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cscsort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csc_col_ptr, const rocsparse_int* csc_row_ind, size_t* buffer_size);
+  // CHECK: status_t = rocsparse_cscsort_buffer_size(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize);
+  status_t = cusparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* csrRowPtrA, int* csrColIndA, int* P, void* pBuffer);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csrsort(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_mat_descr descr, const rocsparse_int* csr_row_ptr, rocsparse_int* csr_col_ind, rocsparse_int* perm, void* temp_buffer);
+  // CHECK: status_t = rocsparse_csrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer);
+  status_t = cusparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtrA, const int* csrColIndA, size_t* pBufferSizeInBytes);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csrsort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, size_t* buffer_size);
+  // CHECK: status_t = rocsparse_csrsort_buffer_size(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize);
+  status_t = cusparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int* p);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_create_identity_permutation(rocsparse_handle handle, rocsparse_int n, rocsparse_int* p);
+  // CHECK: status_t = rocsparse_create_identity_permutation(handle_t, n, P);
+  status_t = cusparseCreateIdentityPermutation(handle_t, n, P);
+
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrSortedRowPtr, cusparseIndexBase_t idxBase);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_coo2csr(rocsparse_handle handle, const rocsparse_int* coo_row_ind, rocsparse_int nnz, rocsparse_int m, rocsparse_int* csr_row_ptr, rocsparse_index_base idx_base);
+  // CHECK: status_t = rocsparse_coo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t);
+  status_t = cusparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t);
+
 #if CUDA_VERSION >= 8000
   // CHECK: hipDataType dataType_t;
   // TODO: [#899] There should be rocsparse_datatype
@@ -424,6 +466,13 @@ int main() {
   cudaDataType dataType;
 #endif
 
+#if CUDA_VERSION >= 9000
+  // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(cusparseHandle_t handle, int m, int n, int nnzA, const cusparseMatDescr_t descrA, const double* csrSortedValA, const int* csrSortedRowPtrA, const int* csrSortedColIndA, float percentage, const cusparseMatDescr_t descrC, double* csrSortedValC, const int* csrSortedRowPtrC, int* csrSortedColIndC, pruneInfo_t info, void* pBuffer);
+  // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_dprune_csr2csr_by_percentage(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz_A, const rocsparse_mat_descr csr_descr_A, const double* csr_val_A, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, double percentage, const rocsparse_mat_descr csr_descr_C, double* csr_val_C, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, rocsparse_mat_info info, void* temp_buffer);
+  // CHECK: status_t = rocsparse_dprune_csr2csr_by_percentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer);
+  status_t = cusparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer);
+#endif
+
 #if CUDA_VERSION >= 8000 && CUDA_VERSION < 12000
   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src);
   // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_copy_mat_descr(rocsparse_mat_descr dest, const rocsparse_mat_descr src);
@@ -431,7 +480,7 @@ int main() {
   status_t = cusparseCopyMatDescr(matDescr_t, matDescr_t_2);
 #endif
 
-#if CUDA_VERSION >= 10010
+#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000
   // CHECK: _rocsparse_spmat_descr *spMatDescr = nullptr;
   // CHECK-NEXT: rocsparse_spmat_descr spMatDescr_t, matC;
   cusparseSpMatDescr *spMatDescr = nullptr;
@@ -531,7 +580,7 @@ int main() {
   status_t = cusparseDnMatSetStridedBatch(dnMatDescr_t, batchCount, batchStride);
 #endif
 
-#if CUDA_VERSION >= 10020
+#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000
   // CHECK: _rocsparse_spvec_descr *spVecDescr = nullptr;
   // CHECK-NEXT: rocsparse_spvec_descr spVecDescr_t;
   cusparseSpVecDescr *spVecDescr = nullptr;
@@ -542,9 +591,6 @@ int main() {
   cusparseDnVecDescr *dnVecDescr = nullptr;
   cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY;
 
-  // CHECK: rocsparse_status STATUS_NOT_SUPPORTED = rocsparse_status_not_implemented;
-  cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED;
-
   // CHECK: rocsparse_spmv_alg spMVAlg_t;
   cusparseSpMVAlg_t spMVAlg_t;
 
@@ -644,7 +690,12 @@ int main() {
   status_t = cusparseSpMV(handle_t, opA, alpha, spMatDescr_t, vecX, beta, vecY, dataType, spMVAlg_t, tempBuffer);
 #endif
 
-#if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000
+#if CUDA_VERSION >= 10020
+  // CHECK: rocsparse_status STATUS_NOT_SUPPORTED = rocsparse_status_not_implemented;
+  cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED;
+#endif
+
+#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000)
   // CHECK: rocsparse_format FORMAT_COO_AOS = rocsparse_format_coo_aos;
   cusparseFormat_t FORMAT_COO_AOS = CUSPARSE_FORMAT_COO_AOS;