Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ready] kokkos impl gemm #197

Merged
merged 9 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion tests/kokkos-based/gtest_fixtures.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class _blas2_signed_fixture : public ::testing::Test
// extents are arbitrarily chosen but not trivially small
const std::size_t myExtent0 = 77;
const std::size_t myExtent1 = 41;
const std::size_t myExtent2 = 53;

public:
using value_type = T;
Expand All @@ -180,6 +181,15 @@ class _blas2_signed_fixture : public ::testing::Test
B_e0e1_view("B_e0e1_view", myExtent0, myExtent1),
B_e0e1(B_e0e1_view.data(), myExtent0, myExtent1),
//
B_e1e2_view("B_e1e2_view", myExtent1, myExtent2),
B_e1e2(B_e1e2_view.data(), myExtent1, myExtent2),
//
C_e0e2_view("C_e0e2_view", myExtent0, myExtent2),
C_e0e2(C_e0e2_view.data(), myExtent0, myExtent2),
//
E_e0e2_view("E_e0e2_view", myExtent0, myExtent2),
E_e0e2(E_e0e2_view.data(), myExtent0, myExtent2),
//
A_sym_e0_view("A_sym_e0_view", myExtent0, myExtent0),
A_sym_e0(A_sym_e0_view.data(), myExtent0, myExtent0),
//
Expand Down Expand Up @@ -235,6 +245,17 @@ class _blas2_signed_fixture : public ::testing::Test
A_e0e1(i,j) = {randObj_r(), randObj_i()};
B_e0e1(i,j) = {randObj_r(), randObj_i()};
}

for (std::size_t j=0; j < myExtent2; ++j) {
C_e0e2(i,j) = {randObj_r(), randObj_i()};
E_e0e2(i,j) = {randObj_r(), randObj_i()};
}
}

for (std::size_t i=0; i < myExtent1; ++i) {
for (std::size_t j=0; j < myExtent2; ++j) {
B_e1e2(i,j) = {randObj_r(), randObj_i()};
}
}

// fill vectors with extent = extent0
Expand Down Expand Up @@ -278,6 +299,17 @@ class _blas2_signed_fixture : public ::testing::Test
A_e0e1_view(i,j) = randObj();
B_e0e1_view(i,j) = randObj();
}

for (std::size_t j=0; j < myExtent2; ++j) {
C_e0e2(i,j) = randObj();
E_e0e2(i,j) = randObj();
}
}

for (std::size_t i=0; i < myExtent1; ++i) {
for (std::size_t j=0; j < myExtent2; ++j) {
B_e1e2(i,j) = randObj();
}
}

// fill vectors with extent = extent0
Expand All @@ -297,6 +329,9 @@ class _blas2_signed_fixture : public ::testing::Test

Kokkos::View<value_type**, Kokkos::HostSpace> A_e0e1_view;
Kokkos::View<value_type**, Kokkos::HostSpace> B_e0e1_view;
Kokkos::View<value_type**, Kokkos::HostSpace> B_e1e2_view;
Kokkos::View<value_type**, Kokkos::HostSpace> C_e0e2_view;
Kokkos::View<value_type**, Kokkos::HostSpace> E_e0e2_view;
Kokkos::View<value_type**, Kokkos::HostSpace> A_sym_e0_view;
Kokkos::View<value_type**, Kokkos::HostSpace> A_hem_e0_view;
Kokkos::View<value_type*, Kokkos::HostSpace> x_e0_view;
Expand All @@ -308,9 +343,11 @@ class _blas2_signed_fixture : public ::testing::Test
using mdspan_r2_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
mdspan_r2_t A_e0e1; //e0 x e1
mdspan_r2_t B_e0e1; //e0 x e1
mdspan_r2_t B_e1e2; //e1 x e2
mdspan_r2_t C_e0e2; //e0 x e2
mdspan_r2_t E_e0e2; //e0 x e2
mdspan_r2_t A_sym_e0; //e0 x e0, symmetric
mdspan_r2_t A_hem_e0; //e0 x e0, hermitian

mdspan_r1_t x_e0; // x vector with extent == e0
mdspan_r1_t x_e1; // x vector with extent == e1
mdspan_r1_t y_e0; // y vector with extent == e0
Expand Down
145 changes: 145 additions & 0 deletions tests/kokkos-based/overwriting_matrix_matrix_product.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@

#include "gtest_fixtures.hpp"
#include "helpers.hpp"

namespace
{

template<class A_t, class B_t, class C_t>
void gemm_gold_solution(A_t A, B_t B, C_t C)
mhoemmen marked this conversation as resolved.
Show resolved Hide resolved
{
for (std::size_t i=0; i<C.extent(0); ++i){
for (std::size_t j=0; j<C.extent(1); ++j){
C(i,j) = typename C_t::value_type{};
for (std::size_t k=0; k<B.extent(0); ++k){
C(i,j) += A(i,k) * B(k,j);
}
}
}
}

template<class A_t, class B_t, class C_t>
void kokkos_blas_overwriting_gemm_impl(A_t A, B_t B, C_t C)
{
namespace stdla = std::experimental::linalg;

using value_type = typename A_t::value_type;
const std::size_t extent0 = A.extent(0);
const std::size_t extent1 = A.extent(1);
const std::size_t extent2 = B.extent(1);

// copy operands before running the kernel
auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A);
auto B_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(B);
auto C_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(C);

// compute gold gemm
std::vector<value_type> gold(extent0*extent2);
using mdspan_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
mdspan_t C_gold(gold.data(), extent0, extent2);
gemm_gold_solution(A, B, C_gold);

stdla::matrix_product(KokkosKernelsSTD::kokkos_exec<>(), A, B, C);

// after kernel, A,B should be unchanged, C should be equal to C_gold.
// note that for A we need to visit all elements rowwise
// since that is how we stored above the preKernel values

if constexpr(std::is_same_v<value_type, float>){
// check A
std::size_t count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent1; ++j){
EXPECT_FLOAT_EQ(A(i,j), A_preKernel[count++]);
}
}

// check B
count=0;
for (std::size_t i=0; i<extent1; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_FLOAT_EQ(B(i,j), B_preKernel[count++]);
}
}

// check C
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-3);
}
}
}

else if constexpr(std::is_same_v<value_type, double>){
// check A
std::size_t count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent1; ++j){
EXPECT_DOUBLE_EQ(A(i,j), A_preKernel[count++]);
}
}

// check B
count=0;
for (std::size_t i=0; i<extent1; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_DOUBLE_EQ(B(i,j), B_preKernel[count++]);
}
}

// check C
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-9);
}
}
}

else if constexpr(std::is_same_v<value_type, std::complex<double>>){
// check A
std::size_t count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent1; ++j){
EXPECT_DOUBLE_EQ(A(i,j).real(), A_preKernel[count].real());
EXPECT_DOUBLE_EQ(A(i,j).imag(), A_preKernel[count++].imag());
}
}

// check B
count=0;
for (std::size_t i=0; i<extent1; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_DOUBLE_EQ(B(i,j).real(), B_preKernel[count].real());
EXPECT_DOUBLE_EQ(B(i,j).imag(), B_preKernel[count++].imag());
}
}

// check C
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_NEAR(C(i,j).real(), C_gold(i,j).real(), 1e-9);
EXPECT_NEAR(C(i,j).imag(), C_gold(i,j).imag(), 1e-9);
}
}
}
}
}//end anonym namespace

TEST_F(blas2_signed_float_fixture, kokkos_overwriting_matrix_matrix_product)
{
kokkos_blas_overwriting_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
}

TEST_F(blas2_signed_double_fixture, kokkos_overwriting_matrix_vector_product)
{
kokkos_blas_overwriting_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
}

TEST_F(blas2_signed_complex_double_fixture, kokkos_overwriting_matrix_vector_product)
{
using kc_t = Kokkos::complex<double>;
using stdc_t = value_type;
if constexpr (alignof(value_type) == alignof(kc_t)){
kokkos_blas_overwriting_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
}
}
fnrizzi marked this conversation as resolved.
Show resolved Hide resolved
154 changes: 154 additions & 0 deletions tests/kokkos-based/updating_matrix_matrix_product.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@

#include "gtest_fixtures.hpp"
#include "helpers.hpp"

namespace
{

template<class A_t, class B_t, class E_t, class C_t>
void gemm_gold_solution(A_t A, B_t B, E_t E, C_t C)
{
for (std::size_t i=0; i<C.extent(0); ++i){
for (std::size_t j=0; j<C.extent(1); ++j){
C(i,j) = E(i,j);
for (std::size_t k=0; k<B.extent(0); ++k){
C(i,j) += A(i,k) * B(k,j);
}
}
}
}

template<class A_t, class B_t, class E_t, class C_t>
void kokkos_blas_updating_gemm_impl(A_t A, B_t B, E_t E, C_t C)
{
namespace stdla = std::experimental::linalg;

using value_type = typename A_t::value_type;
const std::size_t extent0 = A.extent(0);
const std::size_t extent1 = A.extent(1);
const std::size_t extent2 = B.extent(1);

// copy operands before running the kernel
auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A);
auto B_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(B);
auto E_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(E);
auto C_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(C);

// compute gold gemm
std::vector<value_type> gold(extent0*extent2);
using mdspan_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
mdspan_t C_gold(gold.data(), extent0, extent2);
gemm_gold_solution(A, B, E, C_gold);

stdla::matrix_product(KokkosKernelsSTD::kokkos_exec<>(), A, B, E, C);

// after kernel, A,B should be unchanged, C should be equal to C_gold.
// note that for A we need to visit all elements rowwise
// since that is how we stored above the preKernel values

if constexpr(std::is_same_v<value_type, float>){
// check A
std::size_t count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent1; ++j){
EXPECT_FLOAT_EQ(A(i,j), A_preKernel[count++]);
}
}

// check B
count=0;
for (std::size_t i=0; i<extent1; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_FLOAT_EQ(B(i,j), B_preKernel[count++]);
}
}

// check C, E
count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_FLOAT_EQ(E(i,j), E_preKernel[count++]);
EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-3);
}
}
}

else if constexpr(std::is_same_v<value_type, double>){
// check A
std::size_t count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent1; ++j){
EXPECT_DOUBLE_EQ(A(i,j), A_preKernel[count++]);
}
}

// check B
count=0;
for (std::size_t i=0; i<extent1; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_DOUBLE_EQ(B(i,j), B_preKernel[count++]);
}
}

// check C, E
count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_DOUBLE_EQ(E(i,j), E_preKernel[count++]);
EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-9);
}
}
}

else if constexpr(std::is_same_v<value_type, std::complex<double>>){
// check A
std::size_t count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent1; ++j){
EXPECT_DOUBLE_EQ(A(i,j).real(), A_preKernel[count].real());
EXPECT_DOUBLE_EQ(A(i,j).imag(), A_preKernel[count++].imag());
}
}

// check B
count=0;
for (std::size_t i=0; i<extent1; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_DOUBLE_EQ(B(i,j).real(), B_preKernel[count].real());
EXPECT_DOUBLE_EQ(B(i,j).imag(), B_preKernel[count++].imag());
}
}

// check C, E
count=0;
for (std::size_t i=0; i<extent0; ++i){
for (std::size_t j=0; j<extent2; ++j){
EXPECT_DOUBLE_EQ(E(i,j).real(), E_preKernel[count].real());
EXPECT_DOUBLE_EQ(E(i,j).imag(), E_preKernel[count++].imag());

EXPECT_NEAR(C(i,j).real(), C_gold(i,j).real(), 1e-9);
EXPECT_NEAR(C(i,j).imag(), C_gold(i,j).imag(), 1e-9);
}
}
}
}
}//end anonym namespace

TEST_F(blas2_signed_float_fixture, kokkos_updating_matrix_matrix_product)
{
kokkos_blas_updating_gemm_impl(A_e0e1, B_e1e2, E_e0e2, C_e0e2);
}

// TEST_F(blas2_signed_double_fixture, kokkos_updating_matrix_vector_product)
// {
// kokkos_blas_updating_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
// }

// TEST_F(blas2_signed_complex_double_fixture, kokkos_updating_matrix_vector_product)
// {
// using kc_t = Kokkos::complex<double>;
// using stdc_t = value_type;
// if constexpr (alignof(value_type) == alignof(kc_t)){
// kokkos_blas_updating_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
// }
// }
Loading