kokkos · mhoemmen · May 5, 2022 · May 2, 2022 · May 3, 2022 · May 3, 2022
diff --git a/tests/kokkos-based/gtest_fixtures.hpp b/tests/kokkos-based/gtest_fixtures.hpp
@@ -169,6 +169,7 @@ class _blas2_signed_fixture : public ::testing::Test
   // extents are arbitrarily chosen but not trivially small
   const std::size_t myExtent0 = 77;
   const std::size_t myExtent1 = 41;
+  const std::size_t myExtent2 = 53;
 
 public:
   using value_type = T;
@@ -180,6 +181,15 @@ class _blas2_signed_fixture : public ::testing::Test
       B_e0e1_view("B_e0e1_view", myExtent0, myExtent1),
       B_e0e1(B_e0e1_view.data(), myExtent0, myExtent1),
       //
+      B_e1e2_view("B_e1e2_view", myExtent1, myExtent2),
+      B_e1e2(B_e1e2_view.data(), myExtent1, myExtent2),
+      //
+      C_e0e2_view("C_e0e2_view", myExtent0, myExtent2),
+      C_e0e2(C_e0e2_view.data(), myExtent0, myExtent2),
+      //
+      E_e0e2_view("E_e0e2_view", myExtent0, myExtent2),
+      E_e0e2(E_e0e2_view.data(), myExtent0, myExtent2),
+      //
       A_sym_e0_view("A_sym_e0_view", myExtent0, myExtent0),
       A_sym_e0(A_sym_e0_view.data(), myExtent0, myExtent0),
       //
@@ -235,6 +245,17 @@ class _blas2_signed_fixture : public ::testing::Test
 	  A_e0e1(i,j) = {randObj_r(), randObj_i()};
 	  B_e0e1(i,j) = {randObj_r(), randObj_i()};
 	}
+
+	for (std::size_t j=0; j < myExtent2; ++j) {
+	  C_e0e2(i,j) = {randObj_r(), randObj_i()};
+	  E_e0e2(i,j) = {randObj_r(), randObj_i()};
+	}
+      }
+
+      for (std::size_t i=0; i < myExtent1; ++i) {
+	for (std::size_t j=0; j < myExtent2; ++j) {
+	  B_e1e2(i,j) = {randObj_r(), randObj_i()};
+	}
       }
 
       // fill vectors with extent = extent0
@@ -278,6 +299,17 @@ class _blas2_signed_fixture : public ::testing::Test
 	  A_e0e1_view(i,j) = randObj();
 	  B_e0e1_view(i,j) = randObj();
 	}
+
+	for (std::size_t j=0; j < myExtent2; ++j) {
+	  C_e0e2(i,j) = randObj();
+	  E_e0e2(i,j) = randObj();
+	}
+      }
+
+      for (std::size_t i=0; i < myExtent1; ++i) {
+	for (std::size_t j=0; j < myExtent2; ++j) {
+	  B_e1e2(i,j) = randObj();
+	}
       }
 
       // fill vectors with extent = extent0
@@ -297,6 +329,9 @@ class _blas2_signed_fixture : public ::testing::Test
 
   Kokkos::View<value_type**, Kokkos::HostSpace> A_e0e1_view;
   Kokkos::View<value_type**, Kokkos::HostSpace> B_e0e1_view;
+  Kokkos::View<value_type**, Kokkos::HostSpace> B_e1e2_view;
+  Kokkos::View<value_type**, Kokkos::HostSpace> C_e0e2_view;
+  Kokkos::View<value_type**, Kokkos::HostSpace> E_e0e2_view;
   Kokkos::View<value_type**, Kokkos::HostSpace> A_sym_e0_view;
   Kokkos::View<value_type**, Kokkos::HostSpace> A_hem_e0_view;
   Kokkos::View<value_type*,  Kokkos::HostSpace> x_e0_view;
@@ -308,9 +343,11 @@ class _blas2_signed_fixture : public ::testing::Test
   using mdspan_r2_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
   mdspan_r2_t A_e0e1; //e0 x e1
   mdspan_r2_t B_e0e1; //e0 x e1
+  mdspan_r2_t B_e1e2; //e1 x e2
+  mdspan_r2_t C_e0e2; //e0 x e2
+  mdspan_r2_t E_e0e2; //e0 x e2
   mdspan_r2_t A_sym_e0; //e0 x e0, symmetric
   mdspan_r2_t A_hem_e0; //e0 x e0, hermitian
-
   mdspan_r1_t x_e0;  // x vector with extent == e0
   mdspan_r1_t x_e1;  // x vector with extent == e1
   mdspan_r1_t y_e0;  // y vector with extent == e0

diff --git a/tests/kokkos-based/overwriting_matrix_matrix_product.cpp b/tests/kokkos-based/overwriting_matrix_matrix_product.cpp
@@ -0,0 +1,145 @@
+
+#include "gtest_fixtures.hpp"
+#include "helpers.hpp"
+
+namespace
+{
+
+template<class A_t, class B_t, class C_t>
+void gemm_gold_solution(A_t A, B_t B, C_t C)
+{
+  for (std::size_t i=0; i<C.extent(0); ++i){
+    for (std::size_t j=0; j<C.extent(1); ++j){
+      C(i,j) = typename C_t::value_type{};
+      for (std::size_t k=0; k<B.extent(0); ++k){
+	C(i,j) += A(i,k) * B(k,j);
+      }
+    }
+  }
+}
+
+template<class A_t, class B_t, class C_t>
+void kokkos_blas_overwriting_gemm_impl(A_t A, B_t B, C_t C)
+{
+  namespace stdla = std::experimental::linalg;
+
+  using value_type = typename A_t::value_type;
+  const std::size_t extent0 = A.extent(0);
+  const std::size_t extent1 = A.extent(1);
+  const std::size_t extent2 = B.extent(1);
+
+  // copy operands before running the kernel
+  auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A);
+  auto B_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(B);
+  auto C_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(C);
+
+  // compute gold gemm
+  std::vector<value_type> gold(extent0*extent2);
+  using mdspan_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
+  mdspan_t C_gold(gold.data(), extent0, extent2);
+  gemm_gold_solution(A, B, C_gold);
+
+  stdla::matrix_product(KokkosKernelsSTD::kokkos_exec<>(), A, B, C);
+
+  // after kernel, A,B should be unchanged, C should be equal to C_gold.
+  // note that for A we need to visit all elements rowwise
+  // since that is how we stored above the preKernel values
+
+  if constexpr(std::is_same_v<value_type, float>){
+    // check A
+    std::size_t count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent1; ++j){
+	EXPECT_FLOAT_EQ(A(i,j), A_preKernel[count++]);
+      }
+    }
+
+    // check B
+    count=0;
+    for (std::size_t i=0; i<extent1; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_FLOAT_EQ(B(i,j), B_preKernel[count++]);
+      }
+    }
+
+    // check C
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-3);
+      }
+    }
+  }
+
+  else if constexpr(std::is_same_v<value_type, double>){
+    // check A
+    std::size_t count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent1; ++j){
+	EXPECT_DOUBLE_EQ(A(i,j), A_preKernel[count++]);
+      }
+    }
+
+    // check B
+    count=0;
+    for (std::size_t i=0; i<extent1; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_DOUBLE_EQ(B(i,j), B_preKernel[count++]);
+      }
+    }
+
+    // check C
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-9);
+      }
+    }
+  }
+
+  else if constexpr(std::is_same_v<value_type, std::complex<double>>){
+    // check A
+    std::size_t count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent1; ++j){
+	EXPECT_DOUBLE_EQ(A(i,j).real(), A_preKernel[count].real());
+	EXPECT_DOUBLE_EQ(A(i,j).imag(), A_preKernel[count++].imag());
+      }
+    }
+
+    // check B
+    count=0;
+    for (std::size_t i=0; i<extent1; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_DOUBLE_EQ(B(i,j).real(), B_preKernel[count].real());
+	EXPECT_DOUBLE_EQ(B(i,j).imag(), B_preKernel[count++].imag());
+      }
+    }
+
+    // check C
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_NEAR(C(i,j).real(), C_gold(i,j).real(), 1e-9);
+	EXPECT_NEAR(C(i,j).imag(), C_gold(i,j).imag(), 1e-9);
+      }
+    }
+  }
+}
+}//end anonym namespace
+
+TEST_F(blas2_signed_float_fixture, kokkos_overwriting_matrix_matrix_product)
+{
+  kokkos_blas_overwriting_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
+}
+
+TEST_F(blas2_signed_double_fixture, kokkos_overwriting_matrix_vector_product)
+{
+  kokkos_blas_overwriting_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
+}
+
+TEST_F(blas2_signed_complex_double_fixture, kokkos_overwriting_matrix_vector_product)
+{
+  using kc_t = Kokkos::complex<double>;
+  using stdc_t = value_type;
+  if constexpr (alignof(value_type) == alignof(kc_t)){
+    kokkos_blas_overwriting_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
+  }
+}
diff --git a/tests/kokkos-based/updating_matrix_matrix_product.cpp b/tests/kokkos-based/updating_matrix_matrix_product.cpp
@@ -0,0 +1,154 @@
+
+#include "gtest_fixtures.hpp"
+#include "helpers.hpp"
+
+namespace
+{
+
+template<class A_t, class B_t, class E_t, class C_t>
+void gemm_gold_solution(A_t A, B_t B, E_t E, C_t C)
+{
+  for (std::size_t i=0; i<C.extent(0); ++i){
+    for (std::size_t j=0; j<C.extent(1); ++j){
+      C(i,j) = E(i,j);
+      for (std::size_t k=0; k<B.extent(0); ++k){
+	C(i,j) += A(i,k) * B(k,j);
+      }
+    }
+  }
+}
+
+template<class A_t, class B_t, class E_t, class C_t>
+void kokkos_blas_updating_gemm_impl(A_t A, B_t B, E_t E, C_t C)
+{
+  namespace stdla = std::experimental::linalg;
+
+  using value_type = typename A_t::value_type;
+  const std::size_t extent0 = A.extent(0);
+  const std::size_t extent1 = A.extent(1);
+  const std::size_t extent2 = B.extent(1);
+
+  // copy operands before running the kernel
+  auto A_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(A);
+  auto B_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(B);
+  auto E_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(E);
+  auto C_preKernel = kokkostesting::create_stdvector_and_copy_rowwise(C);
+
+  // compute gold gemm
+  std::vector<value_type> gold(extent0*extent2);
+  using mdspan_t = mdspan<value_type, extents<dynamic_extent, dynamic_extent>>;
+  mdspan_t C_gold(gold.data(), extent0, extent2);
+  gemm_gold_solution(A, B, E, C_gold);
+
+  stdla::matrix_product(KokkosKernelsSTD::kokkos_exec<>(), A, B, E, C);
+
+  // after kernel, A,B should be unchanged, C should be equal to C_gold.
+  // note that for A we need to visit all elements rowwise
+  // since that is how we stored above the preKernel values
+
+  if constexpr(std::is_same_v<value_type, float>){
+    // check A
+    std::size_t count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent1; ++j){
+	EXPECT_FLOAT_EQ(A(i,j), A_preKernel[count++]);
+      }
+    }
+
+    // check B
+    count=0;
+    for (std::size_t i=0; i<extent1; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_FLOAT_EQ(B(i,j), B_preKernel[count++]);
+      }
+    }
+
+    // check C, E
+    count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_FLOAT_EQ(E(i,j), E_preKernel[count++]);
+	EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-3);
+      }
+    }
+  }
+
+  else if constexpr(std::is_same_v<value_type, double>){
+    // check A
+    std::size_t count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent1; ++j){
+	EXPECT_DOUBLE_EQ(A(i,j), A_preKernel[count++]);
+      }
+    }
+
+    // check B
+    count=0;
+    for (std::size_t i=0; i<extent1; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_DOUBLE_EQ(B(i,j), B_preKernel[count++]);
+      }
+    }
+
+    // check C, E
+    count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_DOUBLE_EQ(E(i,j), E_preKernel[count++]);
+	EXPECT_NEAR(C(i,j), C_gold(i,j), 1e-9);
+      }
+    }
+  }
+
+  else if constexpr(std::is_same_v<value_type, std::complex<double>>){
+    // check A
+    std::size_t count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent1; ++j){
+	EXPECT_DOUBLE_EQ(A(i,j).real(), A_preKernel[count].real());
+	EXPECT_DOUBLE_EQ(A(i,j).imag(), A_preKernel[count++].imag());
+      }
+    }
+
+    // check B
+    count=0;
+    for (std::size_t i=0; i<extent1; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_DOUBLE_EQ(B(i,j).real(), B_preKernel[count].real());
+	EXPECT_DOUBLE_EQ(B(i,j).imag(), B_preKernel[count++].imag());
+      }
+    }
+
+    // check C, E
+    count=0;
+    for (std::size_t i=0; i<extent0; ++i){
+      for (std::size_t j=0; j<extent2; ++j){
+	EXPECT_DOUBLE_EQ(E(i,j).real(), E_preKernel[count].real());
+	EXPECT_DOUBLE_EQ(E(i,j).imag(), E_preKernel[count++].imag());
+
+	EXPECT_NEAR(C(i,j).real(), C_gold(i,j).real(), 1e-9);
+	EXPECT_NEAR(C(i,j).imag(), C_gold(i,j).imag(), 1e-9);
+      }
+    }
+  }
+}
+}//end anonym namespace
+
+TEST_F(blas2_signed_float_fixture, kokkos_updating_matrix_matrix_product)
+{
+  kokkos_blas_updating_gemm_impl(A_e0e1, B_e1e2, E_e0e2, C_e0e2);
+}
+
+// TEST_F(blas2_signed_double_fixture, kokkos_updating_matrix_vector_product)
+// {
+//   kokkos_blas_updating_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
+// }
+
+// TEST_F(blas2_signed_complex_double_fixture, kokkos_updating_matrix_vector_product)
+// {
+//   using kc_t = Kokkos::complex<double>;
+//   using stdc_t = value_type;
+//   if constexpr (alignof(value_type) == alignof(kc_t)){
+//     kokkos_blas_updating_gemm_impl(A_e0e1, B_e1e2, C_e0e2);
+//   }
+// }