Skip to content

Commit

Permalink
Rename oneMKL to oneMath and improve the lesson/exercise (#387)
Browse files Browse the repository at this point in the history
Change the directory name, file names, and all references to oneMKL
to the new name oneMath. Replace the global queue::wait calls with
proper synchronisation, which is automatic in case of buffers and
explicitly passing sycl::events into APIs with USM.
  • Loading branch information
rafbiels authored Jan 15, 2025
1 parent 51b5012 commit bc79770
Show file tree
Hide file tree
Showing 14 changed files with 1,982 additions and 286 deletions.
2 changes: 1 addition & 1 deletion Code_Exercises/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ add_subdirectory(Local_Memory_Tiling)
add_subdirectory(Work_Group_Sizes)
add_subdirectory(Matrix_Transpose)
add_subdirectory(Functors)
add_subdirectory(OneMKL_gemm)
add_subdirectory(oneMath_gemm)
add_subdirectory(More_SYCL_Features)
22 changes: 0 additions & 22 deletions Code_Exercises/OneMKL_gemm/CMakeLists.txt

This file was deleted.

27 changes: 0 additions & 27 deletions Code_Exercises/OneMKL_gemm/README.md

This file was deleted.

22 changes: 22 additions & 0 deletions Code_Exercises/oneMath_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#[[
SYCL Academy (c)
SYCL Academy is licensed under a Creative Commons Attribution-ShareAlike 4.0
International License.
You should have received a copy of the license along with this work. If not,
see <http://creativecommons.org/licenses/by-sa/4.0/>.
]]

add_sycl_executable(oneMath_gemm source_onemath_usm_gemm)
add_sycl_executable(oneMath_gemm source_onemath_buffer_gemm)

target_link_libraries(oneMath_gemm_source_onemath_usm_gemm PUBLIC -lonemath)
target_link_libraries(oneMath_gemm_source_onemath_buffer_gemm PUBLIC -lonemath)
if(SYCL_ACADEMY_ENABLE_SOLUTIONS)
add_sycl_executable(oneMath_gemm solution_onemath_usm_gemm)
add_sycl_executable(oneMath_gemm solution_onemath_buffer_gemm)

target_link_libraries(oneMath_gemm_solution_onemath_usm_gemm PUBLIC -lonemath)
target_link_libraries(oneMath_gemm_solution_onemath_buffer_gemm PUBLIC -lonemath)
endif()
29 changes: 29 additions & 0 deletions Code_Exercises/oneMath_gemm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# SYCL Academy

## Exercise 11: Using the oneMath library for matrix multiplication
---

In this exercise you will learn how to use the API of the oneMath library and
perform a matrix multiplication using the GEMM routines.

The source code provides a template to perform GEMM using oneMath's USM/buffer
API. Please refer to the API here:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm

---
## Exercise `oneMath_usm_gemm/source.cpp`

The `source_*.cpp` already include all the code to create input matrices and
compute a reference result serially on host. The exercise is to fill in the
sections marked with "TODO" comments to perform GEMM on a device using oneMath.

## Build and execution hints

To run the example: ./oneMath_usm_gemm_solution (or) ./oneMath_usm_gemm_source
To verify with CUBLAS debug info, `export CUBLAS_LOGINFO_DB=1` and `export CUBLAS_LOGDEST_DBG=stdout`

For DevCloud via JupiterLab follow these [instructions](../devcloudJupyter.md).

For DPC++: [instructions](../dpcpp.md).

For AdaptiveCpp: [instructions](../adaptivecpp.md).
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
SYCL Quick Reference
Quick Reference
~~~~~~~~~~~~~~~~~~~~
// oneMKL APIs:
https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm
oneMath execution model:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture
// DGEMM:
https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html
oneMath GEMM API:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
*/

#include <iostream>
#include <limits>
#include <oneapi/mkl/blas.hpp>
#include <oneapi/math.hpp>
#include <random>

#include <sycl/sycl.hpp>
Expand Down Expand Up @@ -62,12 +62,12 @@ int VerifyResult(sycl::host_accessor<T, 1>& c_A, T* c_B) {
//////////////////////////////////////////////////////////////////////////////////////////

void print_device_info(sycl::queue& Q) {
std::string sycl_dev_name, sycl_runtime, sycl_driver;
std::string sycl_dev_name, sycl_dev_version, sycl_driver;
sycl_dev_name = Q.get_device().get_info<sycl::info::device::name>();
sycl_driver = Q.get_device().get_info<sycl::info::device::driver_version>();
sycl_runtime = Q.get_device().get_info<sycl::info::device::version>();
std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v"
<< sycl_runtime.c_str()
sycl_dev_version = Q.get_device().get_info<sycl::info::device::version>();
std::cout << "Running on " << sycl_dev_name.c_str()
<< ", version: " << sycl_dev_version.c_str()
<< ", driver version: " << sycl_driver.c_str() << std::endl;
}

Expand Down Expand Up @@ -117,28 +117,30 @@ int main() {
}
}

// Create a SYCL in-order queue targetting GPU device
sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}};
// Create a SYCL queue
sycl::queue Q;
// Prints some basic info related to the hardware
print_device_info(Q);

// TODO: Allocate memory on device, (using sycl::malloc_device APIs)
// Creating 1D buffers for matrices which are bound to host memory array
// Create 1D buffers for matrices which are bound to host memory arrays
sycl::buffer<T, 1> a{A.data(), sycl::range<1>{M * N}};
sycl::buffer<T, 1> b{B.data(), sycl::range<1>{N * P}};
sycl::buffer<T, 1> c{C_host.data(), sycl::range<1>{M * P}};

// TODO: Use oneMKL GEMM USM API
oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
oneapi::mkl::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b,
ldB, a, ldA, beta, c, ldC);
Q.wait();
// Use oneMath GEMM buffer API
oneapi::math::transpose transA = oneapi::math::transpose::nontrans;
oneapi::math::transpose transB = oneapi::math::transpose::nontrans;
oneapi::math::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b,
ldB, a, ldA, beta, c, ldC);

// Host accessor ensures synchronisation: a read operation on the accessor
// will wait until all kernels writing to buffer "c" finished executing and
// then copy the data back to host
sycl::host_accessor C_device{c};

// Verify results from oneMKL APIs
// Verify results from oneMath
int result = 0;
std::cout << "Verify results between OneMKL & Serial: ";
std::cout << "Verify results between oneMath & serial: ";
result = VerifyResult(C_device, C_host.data());

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
SYCL Quick Reference
Quick Reference
~~~~~~~~~~~~~~~~~~~~
// oneMKL APIs:
https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm
oneMath execution model:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture
// DGEMM:
https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html
oneMath GEMM API:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
*/

#include <iostream>
#include <limits>
#include <oneapi/mkl/blas.hpp>
#include <oneapi/math.hpp>
#include <random>

#include <sycl/sycl.hpp>
Expand Down Expand Up @@ -62,12 +62,12 @@ int VerifyResult(T* c_A, T* c_B) {
//////////////////////////////////////////////////////////////////////////////////////////

void print_device_info(sycl::queue& Q) {
std::string sycl_dev_name, sycl_runtime, sycl_driver;
std::string sycl_dev_name, sycl_dev_version, sycl_driver;
sycl_dev_name = Q.get_device().get_info<sycl::info::device::name>();
sycl_driver = Q.get_device().get_info<sycl::info::device::driver_version>();
sycl_runtime = Q.get_device().get_info<sycl::info::device::version>();
std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v"
<< sycl_runtime.c_str()
sycl_dev_version = Q.get_device().get_info<sycl::info::device::version>();
std::cout << "Running on " << sycl_dev_name.c_str()
<< ", version: " << sycl_dev_version.c_str()
<< ", driver version: " << sycl_driver.c_str() << std::endl;
}

Expand Down Expand Up @@ -117,35 +117,42 @@ int main() {
}
}

// Create a SYCL in-order queue targetting GPU device
sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}};
// Create a SYCL queue
sycl::queue Q;
// Prints some basic info related to the hardware
print_device_info(Q);

// TODO: Allocate memory on device, (using sycl::malloc_device APIs)
// Allocate memory on device, (using sycl::malloc_device APIs)
T* a = sycl::malloc_device<T>((M * N), Q);
T* b = sycl::malloc_device<T>((N * P), Q);
T* c = sycl::malloc_device<T>((M * P), Q);
Q.memcpy(a, A.data(), sizeof(T) * M * N);
Q.memcpy(b, B.data(), sizeof(T) * N * P);

// TODO: Use oneMKL GEMM USM API
oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
oneapi::mkl::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b,
ldB, a, ldA, beta, c,
ldC); // row-major

sycl::event eventCopyA = Q.memcpy(a, A.data(), sizeof(T) * M * N);
sycl::event eventCopyB = Q.memcpy(b, B.data(), sizeof(T) * N * P);

// Use oneMath GEMM USM API
oneapi::math::transpose transA = oneapi::math::transpose::nontrans;
oneapi::math::transpose transB = oneapi::math::transpose::nontrans;
// Pass the synchronisation events to ensure GEMM starts after inputs are
// fully copied to the device
sycl::event eventGEMM = oneapi::math::blas::column_major::gemm(
Q, transA, transB, n, m, k, alpha, b, ldB, a, ldA, beta, c, ldC,
{eventCopyA, eventCopyB}); // row-major

// Copy the results from device to host for verification
std::vector<T> C_device(M * P);
Q.memcpy(C_device.data(), c, sizeof(T) * M * P);
Q.wait();
// Pass the synchronisation event for the copy to wait until GEMM is finished
sycl::event eventCopyC =
Q.memcpy(C_device.data(), c, sizeof(T) * M * P, eventGEMM);

// Wait for the copy to finish
eventCopyC.wait();

// Verify results from oneMKL APIs
// Verify results from oneMath
int result = 0;
std::cout << "Verify results between OneMKL & Serial: ";
std::cout << "Verify results between oneMath & serial: ";
result = VerifyResult(C_device.data(), C_host.data());

// TODO: Free memory from device
// Free memory from device
sycl::free(a, Q);
sycl::free(b, Q);
sycl::free(c, Q);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
SYCL Quick Reference
Quick Reference
~~~~~~~~~~~~~~~~~~~~
// oneMKL APIs:
https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm
oneMath execution model:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture
// DGEMM:
https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html
oneMath GEMM API:
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
*/

#include <iostream>
#include <limits>
#include <oneapi/mkl/blas.hpp>
#include <oneapi/math.hpp>
#include <random>

#include <sycl/sycl.hpp>
Expand Down Expand Up @@ -62,12 +62,12 @@ int VerifyResult(sycl::host_accessor<T, 1>& c_A, T* c_B) {
//////////////////////////////////////////////////////////////////////////////////////////

void print_device_info(sycl::queue& Q) {
std::string sycl_dev_name, sycl_runtime, sycl_driver;
std::string sycl_dev_name, sycl_dev_version, sycl_driver;
sycl_dev_name = Q.get_device().get_info<sycl::info::device::name>();
sycl_driver = Q.get_device().get_info<sycl::info::device::driver_version>();
sycl_runtime = Q.get_device().get_info<sycl::info::device::version>();
std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v"
<< sycl_runtime.c_str()
sycl_dev_version = Q.get_device().get_info<sycl::info::device::version>();
std::cout << "Running on " << sycl_dev_name.c_str()
<< ", version: " << sycl_dev_version.c_str()
<< ", driver version: " << sycl_driver.c_str() << std::endl;
}

Expand Down Expand Up @@ -117,22 +117,21 @@ int main() {
}
}

// Create a SYCL in-order queue targetting GPU device
sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}};
// Create a SYCL queue
sycl::queue Q;
// Prints some basic info related to the hardware
print_device_info(Q);

// TODO: Allocate memory on device
// Creating 1D buffers for matrices which are bound to host memory array
// TODO: Create 1D buffers for matrices which are bound to host memory arrays

// TODO: Use oneMKL GEMM USM API
// TODO: Use oneMath GEMM buffer API

// TODO: Copy the results from device to host for verification
// Hint: Use sycl::host_accessor

// Verify results from oneMKL APIs
// Verify results from oneMath
int result = 0;
std::cout << "Verify results between OneMKL & Serial: ";
std::cout << "Verify results between oneMath & serial: ";
// TODO: Uncomment the following line verify the results
// result = VerifyResult(C_device, C_host);

Expand Down
Loading

0 comments on commit bc79770

Please sign in to comment.