Skip to content

Commit

Permalink
use rocBLAS from alpaka
Browse files Browse the repository at this point in the history
  • Loading branch information
mehmetyusufoglu committed Nov 28, 2024
1 parent 7e1b6ce commit a68d489
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 0 deletions.
2 changes: 2 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ add_subdirectory("randomStrategies/")
add_subdirectory("randomCells2D/")
add_subdirectory("reduce/")
add_subdirectory("tagSpecialization/")
add_subdirectory("useHipBLASInAlpaka/")
add_subdirectory("vectorAdd/")

2 changes: 2 additions & 0 deletions example/reduce/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ if(NOT TARGET alpaka::alpaka)
endif()
endif()

find_library(hipblas_LIBRARY hipblas HINTS /opt/rocm-6.2.3/lib)
find_path(hipblas_INCLUDE_DIR hipblas.h HINTS /opt/rocm-6.2.3/include)
#-------------------------------------------------------------------------------
# Add executable.

Expand Down
60 changes: 60 additions & 0 deletions example/useHipBLASInAlpaka/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Copyright 2023 Benjamin Worpitz, Jan Stephan
# SPDX-License-Identifier: ISC
#

################################################################################
# Required CMake version.

cmake_minimum_required(VERSION 3.25)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

################################################################################
# Project.

set(_TARGET_NAME useHipBLASInAlpaka)

project(${_TARGET_NAME} LANGUAGES CXX)

if(NOT alpaka_ACC_GPU_HIP_ONLY_MODE)
# Print a warning and skip target creation
message(WARNING "Skipping build of 'useHipBLASInAlpaka' because alpaka_ACC_GPU_HIP_ONLY_MODE is not enabled.")
return()
endif()


#-------------------------------------------------------------------------------
# Find alpaka.

if(NOT TARGET alpaka::alpaka)
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)

if(alpaka_USE_SOURCE_TREE)
# Don't build the examples recursively
set(alpaka_BUILD_EXAMPLES OFF)
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
else()
find_package(alpaka REQUIRED)
endif()
endif()

#-------------------------------------------------------------------------------
# Add executable.
#-------------------------------------------------------------------------------
# Locate rocBLAS.
find_package(rocblas REQUIRED CONFIG )

if(NOT rocblas_FOUND)
message(FATAL_ERROR "rocBLAS library not found. Ensure it is installed and accessible.")
endif()

alpaka_add_executable(
${_TARGET_NAME}
src/useHipBLASInAlpaka.cpp)
target_link_libraries(
${_TARGET_NAME}
PUBLIC alpaka::alpaka rocblas)

set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
154 changes: 154 additions & 0 deletions example/useHipBLASInAlpaka/src/useHipBLASInAlpaka.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/* Copyright 2023 Mehmet Yusufoglu, Rene Widera,
* SPDX-License-Identifier: ISC
*/
/*
* This example uses rocBLAS library functions in alpaka. A rocBLAS function of mutrix multiplication is called by
* using alpaka buffers and queue. Since the code needs only AccHipCuda backend. Make sure the correct alpaka cmake
* backend flag is set for alpaka.
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <rocblas/rocblas.h>

#include <cmath>
#include <iostream>
#include <vector>

// Index type
using Idx = std::size_t;

// Set data type
using DataType = float;

// Initialize the matrix in column-major order (1D buffer)
void initializeMatrix(DataType* buffer, Idx rows, Idx cols)
{
for(Idx j = 0; j < rows; ++j)
{
for(Idx i = 0; i < cols; ++i)
{
// Generate some values and set buffer
buffer[i + j * cols] = static_cast<DataType>((i + j * cols) % 10);
}
}
}

template<alpaka::concepts::Tag TAccTag>
auto example(TAccTag const&) -> int
{
using Dim1D = alpaka::DimInt<1>;

// Define matrix dimensions, A is MxK and B is KxN
Idx const M = 4; // Rows in A and C
Idx const N = 2; // Columns in B and C
Idx const K = 3; // Columns in A and rows in B

// Define the accelerator and queue
using Acc = alpaka::TagToAcc<TAccTag, Dim1D, Idx>;
using Queue = alpaka::Queue<Acc, alpaka::Blocking>;

auto const platformHost = alpaka::PlatformCpu{};
auto const devHost = alpaka::getDevByIdx(platformHost, 0);
auto const platformAcc = alpaka::Platform<Acc>{};
auto const devAcc = alpaka::getDevByIdx(platformAcc, 0);

Queue queue(devAcc);

// Allocate 1D host memory
auto bufHostA = alpaka::allocBuf<DataType, Idx>(devHost, M * K);
auto bufHostB = alpaka::allocBuf<DataType, Idx>(devHost, K * N);
auto bufHostC = alpaka::allocBuf<DataType, Idx>(devHost, M * N);

DataType* hostA = alpaka::getPtrNative(bufHostA);
DataType* hostB = alpaka::getPtrNative(bufHostB);
DataType* hostC = alpaka::getPtrNative(bufHostC);

// Initialize host matrices with some values
initializeMatrix(hostA, M, K);
initializeMatrix(hostB, K, N);
std::fill(hostC, hostC + (M * N), 0); // Initialize C with 0s

// Allocate 1D device memory
auto bufDevA = alpaka::allocBuf<DataType, Idx>(devAcc, M * K);
auto bufDevB = alpaka::allocBuf<DataType, Idx>(devAcc, K * N);
auto bufDevC = alpaka::allocBuf<DataType, Idx>(devAcc, M * N);

// Copy data to device
alpaka::memcpy(queue, bufDevA, bufHostA, M * K);
alpaka::memcpy(queue, bufDevB, bufHostB, K * N);
alpaka::memcpy(queue, bufDevC, bufHostC, M * N);
alpaka::wait(queue);

// Obtain the native HIP stream from the Alpaka queue
auto rocStream = alpaka::getNativeHandle(queue);
// rocBLAS setup
rocblas_handle rocblasHandle;
rocblas_status status = rocblas_create_handle(&rocblasHandle);

if(status != rocblas_status_success)
{
std::cerr << "rocblas_create_handle failed with status: " << status << std::endl;
return EXIT_FAILURE;
}
// Associate the HIP stream with the rocBLAS handle
status = rocblas_set_stream(rocblasHandle, rocStream);
if(status != rocblas_status_success)
{
std::cerr << "rocblas_set_stream failed with status: " << status << std::endl;
rocblas_destroy_handle(rocblasHandle);
return EXIT_FAILURE;
}
// Perform matrix multiplication: C = alpha * A * B + beta * C
float alpha = 1.0f, beta = 0.0f;

rocblas_sgemm(
rocblasHandle,
rocblas_operation_none,
rocblas_operation_none, // No transpose for A and B
M,
N,
K, // Dimensions: C = A * B
&alpha,
alpaka::getPtrNative(bufDevA),
M, // Leading dimension of A
alpaka::getPtrNative(bufDevB),
K, // Leading dimension of B
&beta,
alpaka::getPtrNative(bufDevC),
M // Leading dimension of C
);
if(status != rocblas_status_success)
{
std::cerr << "rocblas_sgemm failed: " << status << std::endl;
rocblas_destroy_handle(rocblasHandle);
return EXIT_FAILURE;
}
alpaka::wait(queue);

// Copy result back to host
alpaka::memcpy(queue, bufHostC, bufDevC, M * N);
alpaka::wait(queue);

// Verify the result
std::cout << "Matrix C (Host):" << std::endl;
for(Idx j = 0; j < M; ++j)
{
for(Idx i = 0; i < N; ++i)
{
std::cout << hostC[i + j * N] << " ";
}
std::cout << std::endl;
}

// Cleanup rocBLAS
rocblas_destroy_handle(rocblasHandle);
return EXIT_SUCCESS;
}

auto main() -> int
{
std::cout << "Executing example with available Alpaka accelerators:" << std::endl;
alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}

0 comments on commit a68d489

Please sign in to comment.