-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7e1b6ce
commit a68d489
Showing
4 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# | ||
# Copyright 2023 Benjamin Worpitz, Jan Stephan | ||
# SPDX-License-Identifier: ISC | ||
# | ||
|
||
################################################################################ | ||
# Required CMake version. | ||
|
||
cmake_minimum_required(VERSION 3.25) | ||
|
||
set_property(GLOBAL PROPERTY USE_FOLDERS ON) | ||
|
||
################################################################################ | ||
# Project. | ||
|
||
set(_TARGET_NAME useHipBLASInAlpaka) | ||
|
||
project(${_TARGET_NAME} LANGUAGES CXX) | ||
|
||
if(NOT alpaka_ACC_GPU_HIP_ONLY_MODE) | ||
# Print a warning and skip target creation | ||
message(WARNING "Skipping build of 'useHipBLASInAlpaka' because alpaka_ACC_GPU_HIP_ONLY_MODE is not enabled.") | ||
return() | ||
endif() | ||
|
||
|
||
#------------------------------------------------------------------------------- | ||
# Find alpaka. | ||
|
||
if(NOT TARGET alpaka::alpaka) | ||
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF) | ||
|
||
if(alpaka_USE_SOURCE_TREE) | ||
# Don't build the examples recursively | ||
set(alpaka_BUILD_EXAMPLES OFF) | ||
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka") | ||
else() | ||
find_package(alpaka REQUIRED) | ||
endif() | ||
endif() | ||
|
||
#------------------------------------------------------------------------------- | ||
# Add executable. | ||
#------------------------------------------------------------------------------- | ||
# Locate rocBLAS. | ||
find_package(rocblas REQUIRED CONFIG ) | ||
|
||
if(NOT rocblas_FOUND) | ||
message(FATAL_ERROR "rocBLAS library not found. Ensure it is installed and accessible.") | ||
endif() | ||
|
||
alpaka_add_executable( | ||
${_TARGET_NAME} | ||
src/useHipBLASInAlpaka.cpp) | ||
target_link_libraries( | ||
${_TARGET_NAME} | ||
PUBLIC alpaka::alpaka rocblas) | ||
|
||
set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example) | ||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/* Copyright 2023 Mehmet Yusufoglu, Rene Widera, | ||
* SPDX-License-Identifier: ISC | ||
*/ | ||
/* | ||
* This example uses rocBLAS library functions in alpaka. A rocBLAS function of mutrix multiplication is called by | ||
* using alpaka buffers and queue. Since the code needs only AccHipCuda backend. Make sure the correct alpaka cmake | ||
* backend flag is set for alpaka. | ||
*/ | ||
|
||
#include <alpaka/alpaka.hpp> | ||
#include <alpaka/example/ExecuteForEachAccTag.hpp> | ||
|
||
#include <rocblas/rocblas.h> | ||
|
||
#include <cmath> | ||
#include <iostream> | ||
#include <vector> | ||
|
||
// Index type | ||
using Idx = std::size_t; | ||
|
||
// Set data type | ||
using DataType = float; | ||
|
||
// Initialize the matrix in column-major order (1D buffer) | ||
void initializeMatrix(DataType* buffer, Idx rows, Idx cols) | ||
{ | ||
for(Idx j = 0; j < rows; ++j) | ||
{ | ||
for(Idx i = 0; i < cols; ++i) | ||
{ | ||
// Generate some values and set buffer | ||
buffer[i + j * cols] = static_cast<DataType>((i + j * cols) % 10); | ||
} | ||
} | ||
} | ||
|
||
template<alpaka::concepts::Tag TAccTag> | ||
auto example(TAccTag const&) -> int | ||
{ | ||
using Dim1D = alpaka::DimInt<1>; | ||
|
||
// Define matrix dimensions, A is MxK and B is KxN | ||
Idx const M = 4; // Rows in A and C | ||
Idx const N = 2; // Columns in B and C | ||
Idx const K = 3; // Columns in A and rows in B | ||
|
||
// Define the accelerator and queue | ||
using Acc = alpaka::TagToAcc<TAccTag, Dim1D, Idx>; | ||
using Queue = alpaka::Queue<Acc, alpaka::Blocking>; | ||
|
||
auto const platformHost = alpaka::PlatformCpu{}; | ||
auto const devHost = alpaka::getDevByIdx(platformHost, 0); | ||
auto const platformAcc = alpaka::Platform<Acc>{}; | ||
auto const devAcc = alpaka::getDevByIdx(platformAcc, 0); | ||
|
||
Queue queue(devAcc); | ||
|
||
// Allocate 1D host memory | ||
auto bufHostA = alpaka::allocBuf<DataType, Idx>(devHost, M * K); | ||
auto bufHostB = alpaka::allocBuf<DataType, Idx>(devHost, K * N); | ||
auto bufHostC = alpaka::allocBuf<DataType, Idx>(devHost, M * N); | ||
|
||
DataType* hostA = alpaka::getPtrNative(bufHostA); | ||
DataType* hostB = alpaka::getPtrNative(bufHostB); | ||
DataType* hostC = alpaka::getPtrNative(bufHostC); | ||
|
||
// Initialize host matrices with some values | ||
initializeMatrix(hostA, M, K); | ||
initializeMatrix(hostB, K, N); | ||
std::fill(hostC, hostC + (M * N), 0); // Initialize C with 0s | ||
|
||
// Allocate 1D device memory | ||
auto bufDevA = alpaka::allocBuf<DataType, Idx>(devAcc, M * K); | ||
auto bufDevB = alpaka::allocBuf<DataType, Idx>(devAcc, K * N); | ||
auto bufDevC = alpaka::allocBuf<DataType, Idx>(devAcc, M * N); | ||
|
||
// Copy data to device | ||
alpaka::memcpy(queue, bufDevA, bufHostA, M * K); | ||
alpaka::memcpy(queue, bufDevB, bufHostB, K * N); | ||
alpaka::memcpy(queue, bufDevC, bufHostC, M * N); | ||
alpaka::wait(queue); | ||
|
||
// Obtain the native HIP stream from the Alpaka queue | ||
auto rocStream = alpaka::getNativeHandle(queue); | ||
// rocBLAS setup | ||
rocblas_handle rocblasHandle; | ||
rocblas_status status = rocblas_create_handle(&rocblasHandle); | ||
|
||
if(status != rocblas_status_success) | ||
{ | ||
std::cerr << "rocblas_create_handle failed with status: " << status << std::endl; | ||
return EXIT_FAILURE; | ||
} | ||
// Associate the HIP stream with the rocBLAS handle | ||
status = rocblas_set_stream(rocblasHandle, rocStream); | ||
if(status != rocblas_status_success) | ||
{ | ||
std::cerr << "rocblas_set_stream failed with status: " << status << std::endl; | ||
rocblas_destroy_handle(rocblasHandle); | ||
return EXIT_FAILURE; | ||
} | ||
// Perform matrix multiplication: C = alpha * A * B + beta * C | ||
float alpha = 1.0f, beta = 0.0f; | ||
|
||
rocblas_sgemm( | ||
rocblasHandle, | ||
rocblas_operation_none, | ||
rocblas_operation_none, // No transpose for A and B | ||
M, | ||
N, | ||
K, // Dimensions: C = A * B | ||
&alpha, | ||
alpaka::getPtrNative(bufDevA), | ||
M, // Leading dimension of A | ||
alpaka::getPtrNative(bufDevB), | ||
K, // Leading dimension of B | ||
&beta, | ||
alpaka::getPtrNative(bufDevC), | ||
M // Leading dimension of C | ||
); | ||
if(status != rocblas_status_success) | ||
{ | ||
std::cerr << "rocblas_sgemm failed: " << status << std::endl; | ||
rocblas_destroy_handle(rocblasHandle); | ||
return EXIT_FAILURE; | ||
} | ||
alpaka::wait(queue); | ||
|
||
// Copy result back to host | ||
alpaka::memcpy(queue, bufHostC, bufDevC, M * N); | ||
alpaka::wait(queue); | ||
|
||
// Verify the result | ||
std::cout << "Matrix C (Host):" << std::endl; | ||
for(Idx j = 0; j < M; ++j) | ||
{ | ||
for(Idx i = 0; i < N; ++i) | ||
{ | ||
std::cout << hostC[i + j * N] << " "; | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
||
// Cleanup rocBLAS | ||
rocblas_destroy_handle(rocblasHandle); | ||
return EXIT_SUCCESS; | ||
} | ||
|
||
auto main() -> int | ||
{ | ||
std::cout << "Executing example with available Alpaka accelerators:" << std::endl; | ||
alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); }); | ||
} |