diff --git a/include/experimental/__p1673_bits/blas3_matrix_product.hpp b/include/experimental/__p1673_bits/blas3_matrix_product.hpp index 0cfb8465..660606d4 100644 --- a/include/experimental/__p1673_bits/blas3_matrix_product.hpp +++ b/include/experimental/__p1673_bits/blas3_matrix_product.hpp @@ -40,6 +40,8 @@ //@HEADER */ +#include + #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS3_MATRIX_PRODUCT_HPP_ #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS3_MATRIX_PRODUCT_HPP_ @@ -537,7 +539,7 @@ void matrix_product( matrix_product(A, B, E, C); } -// Overwriting symmetric matrix-matrix product +// Overwriting triangular matrix-matrix product template::size_type numRows_A, @@ -545,7 +547,7 @@ template::size_type numRows_B, extents<>::size_type numCols_B, @@ -556,55 +558,128 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void symmetric_matrix_product( +void triangular_matrix_left_product( std::experimental::mdspan, Layout_A, Accessor_A> A, - Triangle t, - Side s, + Triangle /* t */, + DiagonalStorage /* d */, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_C, Accessor_C> C) { using size_type = typename extents<>::size_type; + constexpr bool explicitDiagonal = + std::is_same_v; - if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } + if constexpr (std::is_same_v) { + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + const ptrdiff_t k_upper = explicitDiagonal ? i : i - ptrdiff_t(1); + for (ptrdiff_t k = 0; k <= k_upper; ++k) { + C(i,j) += A(i,k) * B(k,j); + } + if constexpr (! explicitDiagonal) { + C(i,j) += /* 1 times */ B(i,j); } } } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } + } + else { // upper_triangle_t + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + const size_type k_lower = explicitDiagonal ? i : i + 1; + for (size_type k = k_lower; k < C.extent(0); ++k) { + C(i,j) += A(i,k) * B(k,j); + } + if constexpr (! explicitDiagonal) { + C(i,j) += /* 1 times */ B(i,j); } } } } - else { // right_side_t - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void triangular_matrix_left_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + triangular_matrix_left_product (A, t, d, B, C); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void triangular_matrix_right_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + DiagonalStorage /* d */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + using size_type = typename extents<>::size_type; + constexpr bool explicitDiagonal = + std::is_same_v; + + if constexpr (std::is_same_v) { + for (size_type j = 0; j < C.extent(1); ++j) { + const size_type k_lower = explicitDiagonal ? j : j + 1; + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = k_lower; k < C.extent(1); ++k) { + C(i,j) += B(i,k) * A(k,j); + } + if constexpr (! explicitDiagonal) { + C(i,j) += /* 1 times */ B(i,j); } } } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } + } + else { // upper_triangle_t + for (size_type j = 0; j < C.extent(1); ++j) { + const ptrdiff_t k_upper = explicitDiagonal ? j : j - ptrdiff_t(1); + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (ptrdiff_t k = 0; k <= k_upper; ++k) { + C(i,j) += B(i,k) * A(k,j); + } + if constexpr (! explicitDiagonal) { + C(i,j) += /* 1 times */ B(i,j); } } } @@ -618,7 +693,7 @@ template::size_type numRows_B, extents<>::size_type numCols_B, @@ -629,18 +704,166 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void symmetric_matrix_product( +void triangular_matrix_right_product( ExecutionPolicy&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, - Side s, + DiagonalStorage d, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_C, Accessor_C> C) { - symmetric_matrix_product(A, t, s, B, C); + triangular_matrix_right_product (A, t, d, B, C); } -// Updating symmetric matrix-matrix product +// Updating triangular matrix-matrix product + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void triangular_matrix_left_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + DiagonalStorage /* d */, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + using size_type = typename extents<>::size_type; + constexpr bool explicitDiagonal = + std::is_same_v; + + if constexpr (std::is_same_v) { + for (size_type j=0; j < C.extent(1); ++j) { + for (size_type k=0; k < C.extent(0); ++k) { + for (size_type i=0; i < k; ++i) { + C(i,j) += A(i,k) * C(k,j); + } + if constexpr (explicitDiagonal) { + C(k,j) = A(k,k) * C(k,j); + } + } + } + } + else { // lower_triangle_t + for (size_type j=0; j < C.extent(1); ++j) { + for (size_type k=C.extent(0); k > 0; --k) { + for (size_type i=k; i < C.extent(0); i++) { + C(i,j) += A(i,k-1) * C(k-1,j); + } + if constexpr (explicitDiagonal) { + C(k-1,j) = A(k-1,k-1) * C(k-1,j); + } + } + } + } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void triangular_matrix_left_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + triangular_matrix_left_product (A, t, d, C); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void triangular_matrix_right_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + DiagonalStorage /* d */, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + using size_type = typename extents<>::size_type; + constexpr bool explicitDiagonal = + std::is_same_v; + + if constexpr (std::is_same_v) { + for (size_type j=C.extent(1); j > 0; --j) { + if constexpr (explicitDiagonal) { + for(size_type i=0; i < C.extent(0); ++i) { + C(i,j-1) = C(i,j-1) * A(j-1,j-1); + } + } + for (size_type k=0; k < j-1; k++) { + for(size_type i=0; i < C.extent(0); ++i) { + C(i,j-1) += C(i,k) * A(k,j-1); + } + } + } + } + else { // lower_triangle_t + for (size_type j=0; j < C.extent(1); ++j) { + if constexpr (explicitDiagonal) { + for (size_type i=0; i < C.extent(0); ++i) { + C(i,j) = C(i,j) * A(j,j); + } + } + for (size_type k=j+1; k < C.extent(1); ++k) { + for (size_type i=0; i < C.extent(0); i++) { + C(i,j) += C(i,k) * A(k,j); + } + } + } + } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void triangular_matrix_right_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + triangular_matrix_right_product (A, t, d, C); +} + +// Overwriting symmetric matrix-matrix product template::size_type numRows_A, @@ -648,72 +871,117 @@ template::size_type numRows_B, extents<>::size_type numCols_B, class Layout_B, class Accessor_B, - class ElementType_E, - extents<>::size_type numRows_E, - extents<>::size_type numCols_E, - class Layout_E, - class Accessor_E, class ElementType_C, extents<>::size_type numRows_C, extents<>::size_type numCols_C, class Layout_C, class Accessor_C> -void symmetric_matrix_product( +void symmetric_matrix_left_product( std::experimental::mdspan, Layout_A, Accessor_A> A, - Triangle t, - Side s, + Triangle /* t */, std::experimental::mdspan, Layout_B, Accessor_B> B, - std::experimental::mdspan, Layout_E, Accessor_E> E, std::experimental::mdspan, Layout_C, Accessor_C> C) { using size_type = typename extents<>::size_type; - if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } + if constexpr (std::is_same_v) { + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A aik = i <= k ? A(k,i) : A(i,k); + C(i,j) += aik * B(k,j); } } } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } + } + else { // upper_triangle_t + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A aik = i >= k ? A(k,i) : A(i,k); + C(i,j) += aik * B(k,j); } } } } - else { // right_side_t - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void symmetric_matrix_left_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + symmetric_matrix_left_product (A, t, B, C); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void symmetric_matrix_right_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + using size_type = typename extents<>::size_type; + + if constexpr (std::is_same_v) { + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A akj = j <= k ? A(k,j) : A(j,k); + C(i,j) += B(i,k) * akj; } } } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } + } + else { // upper_triangle_t + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A akj = j >= k ? A(k,j) : A(j,k); + C(i,j) += B(i,k) * akj; } } } @@ -727,7 +995,130 @@ template::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void symmetric_matrix_right_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + symmetric_matrix_right_product (A, t, B, C); +} + +// Updating symmetric matrix-matrix product + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_E, + extents<>::size_type numRows_E, + extents<>::size_type numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void symmetric_matrix_left_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_E, Accessor_E> E, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + assert(false); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_E, + extents<>::size_type numRows_E, + extents<>::size_type numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void symmetric_matrix_left_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_E, Accessor_E> E, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + symmetric_matrix_left_product (A, t, B, E, C); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_E, + extents<>::size_type numRows_E, + extents<>::size_type numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void symmetric_matrix_right_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_E, Accessor_E> E, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + assert(false); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, class ElementType_B, extents<>::size_type numRows_B, extents<>::size_type numCols_B, @@ -743,16 +1134,15 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void symmetric_matrix_product( +void symmetric_matrix_right_product( ExecutionPolicy&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, - Side s, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_E, Accessor_E> E, std::experimental::mdspan, Layout_C, Accessor_C> C) { - symmetric_matrix_product(A, t, s, B, E, C); + symmetric_matrix_right_product (A, t, B, E, C); } // Overwriting Hermitian matrix-matrix product @@ -763,7 +1153,6 @@ template::size_type numRows_B, extents<>::size_type numCols_B, @@ -774,55 +1163,109 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void hermitian_matrix_product( +void hermitian_matrix_left_product( std::experimental::mdspan, Layout_A, Accessor_A> A, - Triangle t, - Side s, + Triangle /* t */, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_C, Accessor_C> C) { using size_type = typename extents<>::size_type; + using std::conj; - if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } + if constexpr (std::is_same_v) { + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A aik = i <= k ? conj(A(k,i)) : A(i,k); + C(i,j) += aik * B(k,j); } } } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } + } + else { // upper_triangle_t + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A aik = i >= k ? conj(A(k,i)) : A(i,k); + C(i,j) += aik * B(k,j); } } } } - else { // right_side_t - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void hermitian_matrix_left_product( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + hermitian_matrix_left_product (A, t, B, C); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void hermitian_matrix_right_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + using size_type = typename extents<>::size_type; + using std::conj; + + if constexpr (std::is_same_v) { + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A akj = j <= k ? A(k,j) : conj(A(j,k)); + C(i,j) += B(i,k) * akj; } } } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = ElementType_C{}; - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } + } + else { // upper_triangle_t + for (size_type j = 0; j < C.extent(1); ++j) { + for (size_type i = 0; i < C.extent(0); ++i) { + C(i,j) = ElementType_C{}; + for (size_type k = 0; k < A.extent(1); ++k) { + ElementType_A akj = j >= k ? A(k,j) : conj(A(j,k)); + C(i,j) += B(i,k) * akj; } } } @@ -836,7 +1279,6 @@ template::size_type numRows_B, extents<>::size_type numCols_B, @@ -847,15 +1289,14 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void hermitian_matrix_product( +void hermitian_matrix_right_product( ExecutionPolicy&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, - Side s, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_C, Accessor_C> C) { - hermitian_matrix_product(A, t, s, B, C); + hermitian_matrix_right_product (A, t, B, C); } // Updating Hermitian matrix-matrix product @@ -866,7 +1307,6 @@ template::size_type numRows_B, extents<>::size_type numCols_B, @@ -882,60 +1322,78 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void hermitian_matrix_product( +void hermitian_matrix_left_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_E, Accessor_E> E, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + assert(false); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_E, + extents<>::size_type numRows_E, + extents<>::size_type numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void hermitian_matrix_left_product( + ExecutionPolicy&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, - Side s, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_E, Accessor_E> E, std::experimental::mdspan, Layout_C, Accessor_C> C) { - using size_type = typename extents<>::size_type; + hermitian_matrix_left_product (A, t, B, E, C); +} - if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } - } - } - } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += A(i,k) * B(k,j); - } - } - } - } - } - else { // right_side_t - if constexpr (std::is_same_v) { - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = j; i < C.extent(0); ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } - } - } - } - else { // upper_triangle_t - for (size_type j = 0; j < C.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { - C(i,j) = E(i,j); - for (size_type k = 0; k < A.extent(1); ++k) { - C(i,j) += B(i,k) * A(k,j); - } - } - } - } - } +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_E, + extents<>::size_type numRows_E, + extents<>::size_type numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + extents<>::size_type numRows_C, + extents<>::size_type numCols_C, + class Layout_C, + class Accessor_C> +void hermitian_matrix_right_product( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle /* t */, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_E, Accessor_E> E, + std::experimental::mdspan, Layout_C, Accessor_C> C) +{ + assert(false); } template::size_type numRows_B, extents<>::size_type numCols_B, @@ -961,16 +1418,15 @@ template::size_type numCols_C, class Layout_C, class Accessor_C> -void hermitian_matrix_product( +void hermitian_matrix_right_product( ExecutionPolicy&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, - Side s, std::experimental::mdspan, Layout_B, Accessor_B> B, std::experimental::mdspan, Layout_E, Accessor_E> E, std::experimental::mdspan, Layout_C, Accessor_C> C) { - hermitian_matrix_product(A, t, s, B, E, C); + hermitian_matrix_right_product (A, t, B, E, C); } } // end namespace linalg diff --git a/include/experimental/__p1673_bits/blas3_triangular_matrix_matrix_solve.hpp b/include/experimental/__p1673_bits/blas3_triangular_matrix_matrix_solve.hpp index be7db2f5..004c7f3f 100644 --- a/include/experimental/__p1673_bits/blas3_triangular_matrix_matrix_solve.hpp +++ b/include/experimental/__p1673_bits/blas3_triangular_matrix_matrix_solve.hpp @@ -246,6 +246,128 @@ void trsm_lower_triangular_right_side( } // end anonymous namespace +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_X, + extents<>::size_type numRows_X, + extents<>::size_type numCols_X, + class Layout_X, + class Accessor_X> +void triangular_matrix_matrix_left_solve( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_X, Accessor_X> X) +{ + if (std::is_same_v) { + trsm_lower_triangular_left_side (A, d, B, X); + } + else { + trsm_upper_triangular_left_side (A, d, B, X); + } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_X, + extents<>::size_type numRows_X, + extents<>::size_type numCols_X, + class Layout_X, + class Accessor_X> +void triangular_matrix_matrix_left_solve( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_X, Accessor_X> X) +{ + triangular_matrix_matrix_left_solve (A, t, d, B, X); +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_X, + extents<>::size_type numRows_X, + extents<>::size_type numCols_X, + class Layout_X, + class Accessor_X> +void triangular_matrix_matrix_right_solve( + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_X, Accessor_X> X) +{ + if (std::is_same_v) { + trsm_lower_triangular_right_side (A, d, B, X); + } + else { + trsm_upper_triangular_right_side (A, d, B, X); + } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type numRows_B, + extents<>::size_type numCols_B, + class Layout_B, + class Accessor_B, + class ElementType_X, + extents<>::size_type numRows_X, + extents<>::size_type numCols_X, + class Layout_X, + class Accessor_X> +void triangular_matrix_matrix_right_solve( + ExecutionPolicy&& /* exec */, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, Accessor_B> B, + std::experimental::mdspan, Layout_X, Accessor_X> X) +{ + triangular_matrix_matrix_right_solve (A, t, d, B, X); +} + template::size_type numRows_A, extents<>::size_type numCols_A, @@ -273,20 +395,10 @@ void triangular_matrix_matrix_solve( std::experimental::mdspan, Layout_X, Accessor_X> X) { if (std::is_same_v) { - if (std::is_same_v) { - trsm_lower_triangular_left_side (A, d, B, X); - } - else { - trsm_upper_triangular_left_side (A, d, B, X); - } + triangular_matrix_matrix_left_solve (A, d, B, X); } else { - if (std::is_same_v) { - trsm_lower_triangular_right_side (A, d, B, X); - } - else { - trsm_upper_triangular_right_side (A, d, B, X); - } + triangular_matrix_matrix_right_solve (A, d, B, X); } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 41968c9a..fb7893a4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,6 +43,7 @@ linalg_add_test(dot) linalg_add_test(gemm) linalg_add_test(gemv) linalg_add_test(givens) +linalg_add_test(hemm) linalg_add_test(idx_abs_max) # (AMK 6/7/21) Temporarily removing iterator test since it doesn't build # [ 73%] Building CXX object tests/CMakeFiles/iterator.dir/iterator.cpp.o @@ -55,4 +56,6 @@ linalg_add_test(norm2) linalg_add_test(scale) linalg_add_test(scaled) linalg_add_test(swap) +linalg_add_test(symm) linalg_add_test(transposed) +linalg_add_test(trmm) diff --git a/tests/gemm.cpp b/tests/gemm.cpp index 101c6d31..cb9e98c7 100644 --- a/tests/gemm.cpp +++ b/tests/gemm.cpp @@ -18,8 +18,12 @@ namespace { using std::experimental::dynamic_extent; using std::experimental::extents; using std::experimental::layout_left; + using std::experimental::linalg::explicit_diagonal; + using std::experimental::linalg::implicit_unit_diagonal; + using std::experimental::linalg::lower_triangle; using std::experimental::linalg::matrix_product; using std::experimental::linalg::transposed; + using std::experimental::linalg::upper_triangle; using std::cout; using std::endl; @@ -235,4 +239,5 @@ namespace { { test_matrix_product(); } -} + +} // end anonymous namespace \ No newline at end of file diff --git a/tests/hemm.cpp b/tests/hemm.cpp new file mode 100644 index 00000000..dc7d10cd --- /dev/null +++ b/tests/hemm.cpp @@ -0,0 +1,260 @@ +#include +#include + +// FIXME I can't actually test the executor overloads, since my GCC +// (9.1.0, via Homebrew) isn't set up correctly: +// +// .../gcc/9.1.0/include/c++/9.1.0/pstl/parallel_backend_tbb.h:19:10: fatal error: tbb/blocked_range.h: No such file or directory +// 19 | #include +// | ^~~~~~~~~~~~~~~~~~~~~ + +//#include +#include +#include +#include "gtest/gtest.h" +#include + +namespace { + using std::experimental::mdspan; + using std::experimental::dynamic_extent; + using std::experimental::extents; + using std::experimental::layout_left; + using std::experimental::linalg::explicit_diagonal; + using std::experimental::linalg::implicit_unit_diagonal; + using std::experimental::linalg::lower_triangle; + using std::experimental::linalg::matrix_product; + using std::experimental::linalg::transposed; + using std::experimental::linalg::upper_triangle; + using std::complex; + using std::cout; + using std::endl; + using namespace std::complex_literals; + + #define EXPECT_COMPLEX_NEAR(a, b, tol) \ + EXPECT_NEAR(a.real(), b.real(), tol); \ + EXPECT_NEAR(a.imag(), b.imag(), tol) + + TEST(BLAS3_hemm, left_lower_tri) + { + /* C = A * B, where A is hermitian mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), m, n); + cmatrix_t C(C_mem.data(), m, n); + cmatrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,0) = -4.0; + A(1,0) = 4.0 + 4.4i; + A(1,1) = 3.5; + A(2,0) = 4.4 + 2.2i; + A(2,1) = -2.8 - 4.0i; + A(2,2) = -1.2; + + // Fill B + B(0,0) = 1.3; + B(0,1) = 2.5; + B(1,0) = -4.6; + B(1,1) = -3.7; + B(2,0) = 3.1; + B(2,1) = -1.5; + + // Fill GS + gs(0,0) = -9.96 + 13.42i; + gs(0,1) = -31.4 + 19.58i; + gs(1,0) = -19.58 + 18.12i; + gs(1,1) = 1.25 + 5.0i; + gs(2,0) = 14.88 + 21.26i; + gs(2,1) = 23.16 + 20.3i; + + hermitian_matrix_left_product(A, lower_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + + TEST(BLAS3_hemm, left_upper_tri) + { + /* C = A * B, where A is hermitian mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), m, n); + cmatrix_t C(C_mem.data(), m, n); + cmatrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,0) = -4.0; + A(0,1) = 4.0 - 4.4i; + A(1,1) = 3.5; + A(0,2) = 4.4 - 2.2i; + A(1,2) = -2.8 + 4.0i; + A(2,2) = -1.2; + + // Fill B + B(0,0) = 1.3; + B(0,1) = 2.5; + B(1,0) = -4.6; + B(1,1) = -3.7; + B(2,0) = 3.1; + B(2,1) = -1.5; + + // Fill GS + gs(0,0) = -9.96 + 13.42i; + gs(0,1) = -31.4 + 19.58i; + gs(1,0) = -19.58 + 18.12i; + gs(1,1) = 1.25 + 5.0i; + gs(2,0) = 14.88 + 21.26i; + gs(2,1) = 23.16 + 20.3i; + + hermitian_matrix_left_product(A, upper_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + + TEST(BLAS3_hemm, right_lower_tri) + { + /* C = B * A, where A is hermitian mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), n, m); + cmatrix_t C(C_mem.data(), n, m); + cmatrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,0) = -4.0; + A(1,0) = 4.0 + 4.4i; + A(1,1) = 3.5; + A(2,0) = 4.4 + 2.2i; + A(2,1) = -2.8 - 4.0i; + A(2,2) = -1.2; + + // Fill B + B(0,0) = 1.3; + B(1,0) = 2.5; + B(0,1) = -4.6; + B(1,1) = -3.7; + B(0,2) = 3.1; + B(1,2) = -1.5; + + // Fill GS + gs(0,0) = -9.96 - 13.42i; + gs(1,0) = -31.4 - 19.58i; + gs(0,1) = -19.58 - 18.12i; + gs(1,1) = 1.25 - 5.0i; + gs(0,2) = 14.88 - 21.26i; + gs(1,2) = 23.16 - 20.3i; + + hermitian_matrix_right_product(A, lower_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + + TEST(BLAS3_hemm, right_upper_tri) + { + /* C = B * A, where A is hermitian mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), n, m); + cmatrix_t C(C_mem.data(), n, m); + cmatrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,0) = -4.0; + A(0,1) = 4.0 - 4.4i; + A(1,1) = 3.5; + A(0,2) = 4.4 - 2.2i; + A(1,2) = -2.8 + 4.0i; + A(2,2) = -1.2; + + // Fill B + B(0,0) = 1.3; + B(1,0) = 2.5; + B(0,1) = -4.6; + B(1,1) = -3.7; + B(0,2) = 3.1; + B(1,2) = -1.5; + + // Fill GS + gs(0,0) = -9.96 - 13.42i; + gs(1,0) = -31.4 - 19.58i; + gs(0,1) = -19.58 - 18.12i; + gs(1,1) = 1.25 - 5.0i; + gs(0,2) = 14.88 - 21.26i; + gs(1,2) = 23.16 - 20.3i; + + hermitian_matrix_right_product(A, upper_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } +} // end anonymous namespace \ No newline at end of file diff --git a/tests/symm.cpp b/tests/symm.cpp new file mode 100644 index 00000000..f2fa7ecd --- /dev/null +++ b/tests/symm.cpp @@ -0,0 +1,260 @@ +#include +#include + +// FIXME I can't actually test the executor overloads, since my GCC +// (9.1.0, via Homebrew) isn't set up correctly: +// +// .../gcc/9.1.0/include/c++/9.1.0/pstl/parallel_backend_tbb.h:19:10: fatal error: tbb/blocked_range.h: No such file or directory +// 19 | #include +// | ^~~~~~~~~~~~~~~~~~~~~ + +//#include +#include +#include +#include "gtest/gtest.h" +#include + +namespace { + using std::experimental::mdspan; + using std::experimental::dynamic_extent; + using std::experimental::extents; + using std::experimental::layout_left; + using std::experimental::linalg::explicit_diagonal; + using std::experimental::linalg::implicit_unit_diagonal; + using std::experimental::linalg::lower_triangle; + using std::experimental::linalg::matrix_product; + using std::experimental::linalg::transposed; + using std::experimental::linalg::upper_triangle; + using std::complex; + using std::cout; + using std::endl; + using namespace std::complex_literals; + + #define EXPECT_COMPLEX_NEAR(a, b, tol) \ + EXPECT_NEAR(a.real(), b.real(), tol); \ + EXPECT_NEAR(a.imag(), b.imag(), tol) + + TEST(BLAS3_symm, left_lower_tri) + { + /* C = A * B, where A is symmetric mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), m, n); + cmatrix_t C(C_mem.data(), m, n); + cmatrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,0) = -4.0 + 0.9i; + A(1,0) = 4.0 + 4.4i; + A(1,1) = 3.5 - 4.2i; + A(2,0) = 4.4 + 2.2i; + A(2,1) = -2.8 - 4.0i; + A(2,2) = -1.2 + 1.7i; + + // Fill B + B(0,0) = 1.3; + B(0,1) = 2.5; + B(1,0) = -4.6; + B(1,1) = -3.7; + B(2,0) = 3.1; + B(2,1) = -1.5; + + // Fill GS + gs(0,0) = -9.96 - 12.25i; + gs(0,1) = -31.4 - 17.33i; + gs(1,0) = -19.58 + 12.64i; + gs(1,1) = 1.25 + 32.54i; + gs(2,0) = 14.88 + 26.53i; + gs(2,1) = 23.16 + 17.75i; + + symmetric_matrix_left_product(A, lower_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + + TEST(BLAS3_symm, left_upper_tri) + { + /* C = A * B, where A is symmetric mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), m, n); + cmatrix_t C(C_mem.data(), m, n); + cmatrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,0) = -4.0 + 0.9i; + A(0,1) = 4.0 + 4.4i; + A(1,1) = 3.5 - 4.2i; + A(0,2) = 4.4 + 2.2i; + A(1,2) = -2.8 - 4.0i; + A(2,2) = -1.2 + 1.7i; + + // Fill B + B(0,0) = 1.3; + B(0,1) = 2.5; + B(1,0) = -4.6; + B(1,1) = -3.7; + B(2,0) = 3.1; + B(2,1) = -1.5; + + // Fill GS + gs(0,0) = -9.96 - 12.25i; + gs(0,1) = -31.4 - 17.33i; + gs(1,0) = -19.58 + 12.64i; + gs(1,1) = 1.25 + 32.54i; + gs(2,0) = 14.88 + 26.53i; + gs(2,1) = 23.16 + 17.75i; + + symmetric_matrix_left_product(A, upper_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + + TEST(BLAS3_symm, right_lower_tri) + { + /* C = B * A, where A is symmetric mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), n, m); + cmatrix_t C(C_mem.data(), n, m); + cmatrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,0) = -4.0 + 0.9i; + A(1,0) = 4.0 + 4.4i; + A(1,1) = 3.5 - 4.2i; + A(2,0) = 4.4 + 2.2i; + A(2,1) = -2.8 - 4.0i; + A(2,2) = -1.2 + 1.7i; + + // Fill B + B(0,0) = 1.3; + B(1,0) = 2.5; + B(0,1) = -4.6; + B(1,1) = -3.7; + B(0,2) = 3.1; + B(1,2) = -1.5; + + // Fill GS + gs(0,0) = -9.96 - 12.25i; + gs(1,0) = -31.4 - 17.33i; + gs(0,1) = -19.58 + 12.64i; + gs(1,1) = 1.25 + 32.54i; + gs(0,2) = 14.88 + 26.53i; + gs(1,2) = 23.16 + 17.75i; + + symmetric_matrix_right_product(A, lower_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + + TEST(BLAS3_symm, right_upper_tri) + { + /* C = B * A, where A is symmetric mxm */ + using extents_t = extents; + using cmatrix_t = mdspan, extents_t, layout_left>; + using dmatrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector> A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector> C_mem(m*n, snan); + std::vector> gs_mem(m*n); + + cmatrix_t A(A_mem.data(), m, m); + dmatrix_t B(B_mem.data(), n, m); + cmatrix_t C(C_mem.data(), n, m); + cmatrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,0) = -4.0 + 0.9i; + A(0,1) = 4.0 + 4.4i; + A(1,1) = 3.5 - 4.2i; + A(0,2) = 4.4 + 2.2i; + A(1,2) = -2.8 - 4.0i; + A(2,2) = -1.2 + 1.7i; + + // Fill B + B(0,0) = 1.3; + B(1,0) = 2.5; + B(0,1) = -4.6; + B(1,1) = -3.7; + B(0,2) = 3.1; + B(1,2) = -1.5; + + // Fill GS + gs(0,0) = -9.96 - 12.25i; + gs(1,0) = -31.4 - 17.33i; + gs(0,1) = -19.58 + 12.64i; + gs(1,1) = 1.25 + 32.54i; + gs(0,2) = 14.88 + 26.53i; + gs(1,2) = 23.16 + 17.75i; + + symmetric_matrix_right_product(A, upper_triangle, B, C); + + // TODO: Choose a more reasonable value + constexpr double TOL = 1e-9; + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_COMPLEX_NEAR(gs(i,j), C(i,j), TOL) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } +} // end anonymous namespace \ No newline at end of file diff --git a/tests/trmm.cpp b/tests/trmm.cpp new file mode 100644 index 00000000..b4dae376 --- /dev/null +++ b/tests/trmm.cpp @@ -0,0 +1,550 @@ +#include +#include + +// FIXME I can't actually test the executor overloads, since my GCC +// (9.1.0, via Homebrew) isn't set up correctly: +// +// .../gcc/9.1.0/include/c++/9.1.0/pstl/parallel_backend_tbb.h:19:10: fatal error: tbb/blocked_range.h: No such file or directory +// 19 | #include +// | ^~~~~~~~~~~~~~~~~~~~~ + +//#include +#include +#include "gtest/gtest.h" +#include + +namespace { + using std::experimental::mdspan; + using std::experimental::dynamic_extent; + using std::experimental::extents; + using std::experimental::layout_left; + using std::experimental::linalg::explicit_diagonal; + using std::experimental::linalg::implicit_unit_diagonal; + using std::experimental::linalg::lower_triangle; + using std::experimental::linalg::matrix_product; + using std::experimental::linalg::transposed; + using std::experimental::linalg::upper_triangle; + using std::cout; + using std::endl; + + TEST(BLAS3_trmm, left_lower_tri_explicit_diag) + { + /* C = A * B, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), m, n); + matrix_t C(C_mem.data(), m, n); + matrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,0) = 3.5; + A(1,0) = -2.0; + A(1,1) = 1.2; + A(2,0) = -0.1; + A(2,1) = 4.5; + A(2,2) = -1.0; + + // Fill B + B(0,0) = -4.4; + B(0,1) = 1.8; + B(1,0) = -1.4; + B(1,1) = 3.4; + B(2,0) = 1.8; + B(2,1) = 1.6; + + // Fill GS + gs(0,0) = -15.4; + gs(0,1) = 6.3; + gs(1,0) = 7.12; + gs(1,1) = 0.48; + gs(2,0) = -7.66; + gs(2,1) = 13.52; + + // Check the non-overwriting version + triangular_matrix_left_product(A, lower_triangle, explicit_diagonal, B, C); + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), C(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_left_product(A, lower_triangle, explicit_diagonal, B); + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), B(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, left_lower_tri_implicit_diag) + { + /* C = A * B, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), m, n); + matrix_t C(C_mem.data(), m, n); + matrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(1,0) = -2.0; + A(2,0) = -0.1; + A(2,1) = 4.5; + + // Fill B + B(0,0) = -4.4; + B(0,1) = 1.8; + B(1,0) = -1.4; + B(1,1) = 3.4; + B(2,0) = 1.8; + B(2,1) = 1.6; + + triangular_matrix_left_product(A, lower_triangle, implicit_unit_diagonal, B, C); + + // Fill GS + gs(0,0) = -4.4; + gs(0,1) = 1.8; + gs(1,0) = 7.4; + gs(1,1) = -0.2; + gs(2,0) = -4.06; + gs(2,1) = 16.72; + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), C(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_left_product(A, lower_triangle, implicit_unit_diagonal, B); + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), B(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, left_upper_tri_explicit_diag) + { + /* C = A * B, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), m, n); + matrix_t C(C_mem.data(), m, n); + matrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,0) = 3.5; + A(0,1) = -2.0; + A(1,1) = 1.2; + A(0,2) = -0.1; + A(1,2) = 4.5; + A(2,2) = -1.0; + + // Fill B + B(0,0) = -4.4; + B(0,1) = 1.8; + B(1,0) = -1.4; + B(1,1) = 3.4; + B(2,0) = 1.8; + B(2,1) = 1.6; + + // Fill GS + gs(0,0) = -12.78; + gs(0,1) = -0.66; + gs(1,0) = 6.42; + gs(1,1) = 11.28; + gs(2,0) = -1.8; + gs(2,1) = -1.6; + + // Check the non-overwriting version + triangular_matrix_left_product(A, upper_triangle, explicit_diagonal, B, C); + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), C(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_left_product(A, upper_triangle, explicit_diagonal, B); + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), B(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, left_upper_tri_implicit_diag) + { + /* C = A * B, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), m, n); + matrix_t C(C_mem.data(), m, n); + matrix_t gs(gs_mem.data(), m, n); + + // Fill A + A(0,1) = -2.0; + A(0,2) = -0.1; + A(1,2) = 4.5; + + // Fill B + B(0,0) = -4.4; + B(0,1) = 1.8; + B(1,0) = -1.4; + B(1,1) = 3.4; + B(2,0) = 1.8; + B(2,1) = 1.6; + + triangular_matrix_left_product(A, upper_triangle, implicit_unit_diagonal, B, C); + + // Fill GS + gs(0,0) = -1.78; + gs(0,1) = -5.16; + gs(1,0) = 6.7; + gs(1,1) = 10.6; + gs(2,0) = 1.8; + gs(2,1) = 1.6; + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), C(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_left_product(A, upper_triangle, implicit_unit_diagonal, B); + + for (ptrdiff_t j = 0; j < n; ++j) { + for (ptrdiff_t i = 0; i < m; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), B(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, right_lower_tri_explicit_diag) + { + /* C = B * A, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), n, m); + matrix_t C(C_mem.data(), n, m); + matrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,0) = 3.5; + A(1,0) = -2.0; + A(1,1) = 1.2; + A(2,0) = -0.1; + A(2,1) = 4.5; + A(2,2) = -1.0; + + // Fill B + B(0,0) = -4.4; + B(1,0) = 1.8; + B(0,1) = -1.4; + B(1,1) = 3.4; + B(0,2) = 1.8; + B(1,2) = 1.6; + + // Fill GS + gs(0,0) = -12.78; + gs(1,0) = -0.66; + gs(0,1) = 6.42; + gs(1,1) = 11.28; + gs(0,2) = -1.8; + gs(1,2) = -1.6; + + // Check the non-overwriting version + triangular_matrix_right_product(A, lower_triangle, explicit_diagonal, B, C); + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), C(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_right_product(A, lower_triangle, explicit_diagonal, B); + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), B(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, right_lower_tri_implicit_diag) + { + /* C = A * B, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), n, m); + matrix_t C(C_mem.data(), n, m); + matrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(1,0) = -2.0; + A(2,0) = -0.1; + A(2,1) = 4.5; + + // Fill B + B(0,0) = -4.4; + B(1,0) = 1.8; + B(0,1) = -1.4; + B(1,1) = 3.4; + B(0,2) = 1.8; + B(1,2) = 1.6; + + triangular_matrix_right_product(A, lower_triangle, implicit_unit_diagonal, B, C); + + // Fill GS + gs(0,0) = -1.78; + gs(1,0) = -5.16; + gs(0,1) = 6.7; + gs(1,1) = 10.6; + gs(0,2) = 1.8; + gs(1,2) = 1.6; + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), C(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_right_product(A, lower_triangle, implicit_unit_diagonal, B); + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), B(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, right_upper_tri_explicit_diag) + { + /* C = B*A, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), n, m); + matrix_t C(C_mem.data(), n, m); + matrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,0) = 3.5; + A(0,1) = -2.0; + A(1,1) = 1.2; + A(0,2) = -0.1; + A(1,2) = 4.5; + A(2,2) = -1.0; + + // Fill B + B(0,0) = -4.4; + B(1,0) = 1.8; + B(0,1) = -1.4; + B(1,1) = 3.4; + B(0,2) = 1.8; + B(1,2) = 1.6; + + // Fill GS + gs(0,0) = -15.4; + gs(1,0) = 6.3; + gs(0,1) = 7.12; + gs(1,1) = 0.48; + gs(0,2) = -7.66; + gs(1,2) = 13.52; + + // Check the non-overwriting version + triangular_matrix_right_product(A, upper_triangle, explicit_diagonal, B, C); + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), C(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_right_product(A, upper_triangle, explicit_diagonal, B); + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + EXPECT_DOUBLE_EQ(gs(i,j), B(i,j)) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +TEST(BLAS3_trmm, right_upper_tri_implicit_diag) + { + /* C = B * A, where A is triangular mxm */ + using extents_t = extents; + using matrix_t = mdspan; + constexpr double snan = std::numeric_limits::signaling_NaN(); + + int m = 3, n = 2; + std::vector A_mem(m*m, snan); + std::vector B_mem(m*n); + std::vector C_mem(m*n, snan); + std::vector gs_mem(m*n); + + matrix_t A(A_mem.data(), m, m); + matrix_t B(B_mem.data(), n, m); + matrix_t C(C_mem.data(), n, m); + matrix_t gs(gs_mem.data(), n, m); + + // Fill A + A(0,1) = -2.0; + A(0,2) = -0.1; + A(1,2) = 4.5; + + // Fill B + B(0,0) = -4.4; + B(1,0) = 1.8; + B(0,1) = -1.4; + B(1,1) = 3.4; + B(0,2) = 1.8; + B(1,2) = 1.6; + + triangular_matrix_right_product(A, upper_triangle, implicit_unit_diagonal, B, C); + + // Fill GS + gs(0,0) = -4.4; + gs(1,0) = 1.8; + gs(0,1) = 7.4; + gs(1,1) = -0.2; + gs(0,2) = -4.06; + gs(1,2) = 16.72; + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), C(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + + // Check the overwriting version + triangular_matrix_right_product(A, upper_triangle, implicit_unit_diagonal, B); + + for (ptrdiff_t j = 0; j < m; ++j) { + for (ptrdiff_t i = 0; i < n; ++i) { + // FIXME: Choose a more reasonable value for the tolerance + constexpr double tol = 1e-9; + EXPECT_NEAR(gs(i,j), B(i,j), tol) + << "Matrices differ at index (" + << i << "," << j << ")\n"; + } + } + } + +} // end anonymous namespace \ No newline at end of file