diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c2d0fa782..f1b8a242e 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -39,7 +39,7 @@ jobs: run: | mkdir build cd build - cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="/EHsc /bigobj" -DKokkos_ROOT="C:\kokkos-install" ${{ steps.vcpkg.outputs.vcpkg-cmake-config }} -DARBORX_ENABLE_MPI=OFF -DARBORX_ENABLE_TESTS=ON -DARBORX_ENABLE_EXAMPLES=ON -DARBORX_ENABLE_BENCHMARKS=ON .. + cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="/EHsc /bigobj" -DKokkos_ROOT="C:\kokkos-install" ${{ steps.vcpkg.outputs.vcpkg-cmake-config }} -DARBORX_ENABLE_MPI=OFF -DARBORX_ENABLE_TESTS=ON -DARBORX_ENABLE_EXAMPLES=ON -DARBORX_ENABLE_BENCHMARKS=ON -DARBORX_ENABLE_HEADER_SELF_CONTAINMENT_TESTS=OFF .. - name: Build ArborX shell: bash run: | diff --git a/.gitignore b/.gitignore index 48439bce0..488cc1fd5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.swp .#* /build* +.vscode \ No newline at end of file diff --git a/src/interpolation/details/ArborX_InterpDetailsSymmetricPseudoInverseSVD.hpp b/src/interpolation/details/ArborX_InterpDetailsSymmetricPseudoInverseSVD.hpp new file mode 100644 index 000000000..dbd40c3d8 --- /dev/null +++ b/src/interpolation/details/ArborX_InterpDetailsSymmetricPseudoInverseSVD.hpp @@ -0,0 +1,259 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP +#define ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP + +#include +#include + +#include + +namespace ArborX::Interpolation::Details +{ + +template +KOKKOS_INLINE_FUNCTION void +ensureIsSquareMatrix([[maybe_unused]] Matrix const &mat) +{ + static_assert(Kokkos::is_view_v, "Matrix must be a view"); + static_assert(Matrix::rank == 2, "Matrix must be 2D"); + KOKKOS_ASSERT(mat.extent(0) == mat.extent(1)); +} + +template +KOKKOS_INLINE_FUNCTION void ensureIsSquareSymmetricMatrix(Matrix const &mat) +{ + ensureIsSquareMatrix(mat); + + [[maybe_unused]] auto is_symmetric = [&]() { + int const size = mat.extent(0); + for (int i = 0; i < size; i++) + for (int j = i + 1; j < size; j++) + if (mat(i, j) != mat(j, i)) + return false; + return true; + }; + + KOKKOS_ASSERT(is_symmetric()); +} + +// Gets the argmax from the upper triangle part of a matrix +template +KOKKOS_FUNCTION auto argmaxUpperTriangle(Matrix const &mat) +{ + ensureIsSquareMatrix(mat); + using value_t = typename Matrix::non_const_value_type; + + struct + { + value_t max = 0; + int row = 0; + int col = 0; + } result; + + int const size = mat.extent(0); + for (int i = 0; i < size; i++) + for (int j = i + 1; j < size; j++) + { + value_t val = Kokkos::abs(mat(i, j)); + if (result.max < val) + { + result.max = val; + result.row = i; + result.col = j; + } + } + + return result; +} + +// Pseudo-inverse of symmetric matrices using SVD +// We must find U, E (diagonal and positive) and V such that A = U.E.V^T +// We also suppose, as the input, that A is symmetric, so U = SV where S is +// a sign matrix (only 1 or -1 on the diagonal, 0 elsewhere). +// Thus A = U.ES.U^T and A^-1 = U.[ ES^-1 ].U^T +template +KOKKOS_FUNCTION void +symmetricPseudoInverseSVDSerialKernel(AMatrix &A, ESMatrix &ES, UMatrix &U) +{ + ensureIsSquareSymmetricMatrix(A); + static_assert(!std::is_const_v, + "A must be writable"); + ensureIsSquareMatrix(ES); + static_assert(!std::is_const_v, + "ES must be writable"); + ensureIsSquareMatrix(U); + static_assert(!std::is_const_v, + "U must be writable"); + static_assert(std::is_same_v && + std::is_same_v, + "All input matrices must have the same value type"); + KOKKOS_ASSERT(A.extent(0) == ES.extent(0) && ES.extent(0) == U.extent(0)); + using value_t = typename AMatrix::non_const_value_type; + int const size = A.extent(0); + + // We first initialize U as the identity matrix and copy A to ES + for (int i = 0; i < size; i++) + for (int j = 0; j < size; j++) + { + U(i, j) = value_t(i == j); + ES(i, j) = A(i, j); + } + + static constexpr value_t epsilon = Kokkos::Experimental::epsilon_v; + while (true) + { + // We have a guarantee that p < q + auto const [max_val, p, q] = argmaxUpperTriangle(ES); + if (max_val <= epsilon) + break; + + auto const a = ES(p, p); + auto const b = ES(p, q); + auto const c = ES(q, q); + + // Our submatrix is now + // +----------+----------+ +---+---+ + // | ES(p, p) | ES(p, q) | | a | b | + // +----------+----------+ = +---+---+ + // | ES(q, p) | ES(q, q) | | b | c | + // +----------+----------+ +---+---+ + + // Let's compute x, y and theta such that + // +---+---+ +---+---+ + // | a | b | | x | 0 | + // +---+---+ = R(theta) * +---+---+ * R(theta)^T + // | b | c | | 0 | y | + // +---+---+ +---+---+ + + value_t cos_theta; + value_t sin_theta; + value_t x; + value_t y; + if (a == c) + { + cos_theta = Kokkos::sqrt(value_t(2)) / 2; + sin_theta = cos_theta; + x = a + b; + y = a - b; + } + else + { + auto const u = (2 * b) / (a - c); + auto const v = 1 / Kokkos::sqrt(u * u + 1); + cos_theta = Kokkos::sqrt((1 + v) / 2); + sin_theta = Kokkos::copysign(Kokkos::sqrt((1 - v) / 2), u); + x = (a + c + (a - c) / v) / 2; + y = a + c - x; + } + + // Now let's compute the following new values for U and ES + // ES <- R'(theta)^T . ES . R'(theta) + // U <- U . R'(theta) + + // R'(theta)^T . ES . R'(theta) + for (int i = 0; i < p; i++) + { + auto const es_ip = ES(i, p); + auto const es_iq = ES(i, q); + ES(i, p) = cos_theta * es_ip + sin_theta * es_iq; + ES(i, q) = -sin_theta * es_ip + cos_theta * es_iq; + } + ES(p, p) = x; + for (int i = p + 1; i < q; i++) + { + auto const es_pi = ES(p, i); + auto const es_iq = ES(i, q); + ES(p, i) = cos_theta * es_pi + sin_theta * es_iq; + ES(i, q) = -sin_theta * es_pi + cos_theta * es_iq; + } + ES(q, q) = y; + for (int i = q + 1; i < size; i++) + { + auto const es_pi = ES(p, i); + auto const es_qi = ES(q, i); + ES(p, i) = cos_theta * es_pi + sin_theta * es_qi; + ES(q, i) = -sin_theta * es_pi + cos_theta * es_qi; + } + ES(p, q) = 0; + + // U . R'(theta) + for (int i = 0; i < size; i++) + { + auto const u_ip = U(i, p); + auto const u_iq = U(i, q); + U(i, p) = cos_theta * u_ip + sin_theta * u_iq; + U(i, q) = -sin_theta * u_ip + cos_theta * u_iq; + } + } + + // We compute the max to get a range of the invertible eigenvalues + auto max_eigen = epsilon; + for (int i = 0; i < size; i++) + max_eigen = Kokkos::max(Kokkos::abs(ES(i, i)), max_eigen); + + // We invert the diagonal of ES, except if "0" is found + for (int i = 0; i < size; i++) + ES(i, i) = (Kokkos::abs(ES(i, i)) < max_eigen * epsilon) ? 0 : 1 / ES(i, i); + + // Then we fill out A as the pseudo inverse + for (int i = 0; i < size; i++) + for (int j = 0; j < size; j++) + { + value_t tmp = 0; + for (int k = 0; k < size; k++) + tmp += ES(k, k) * U(i, k) * U(j, k); + A(i, j) = tmp; + } +} + +template +void symmetricPseudoInverseSVD(ExecutionSpace const &space, + InOutMatrices &matrices) +{ + // InOutMatrices is a list of square symmetric matrices (3D view) + static_assert(Kokkos::is_view_v, "matrices must be a view"); + static_assert(!std::is_const_v, + "matrices must be writable"); + static_assert(InOutMatrices::rank == 3, + "matrices must be a list of square matrices"); + static_assert( + KokkosExt::is_accessible_from::value, + "matrices must be accessible from the execution space"); + + ARBORX_ASSERT(matrices.extent(1) == matrices.extent(2)); // Must be square + + InOutMatrices ESs( + Kokkos::view_alloc(space, Kokkos::WithoutInitializing, + "ArborX::SymmetricPseudoInverseSVD::ESs"), + matrices.extent(0), matrices.extent(1), matrices.extent(2)); + InOutMatrices Us(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, + "ArborX::SymmetricPseudoInverseSVD::Us"), + matrices.extent(0), matrices.extent(1), matrices.extent(2)); + + Kokkos::parallel_for( + "ArborX::SymmetricPseudoInverseSVD::computations", + Kokkos::RangePolicy(space, 0, matrices.extent(0)), + KOKKOS_LAMBDA(int const i) { + auto A = Kokkos::subview(matrices, i, Kokkos::ALL, Kokkos::ALL); + auto ES = Kokkos::subview(ESs, i, Kokkos::ALL, Kokkos::ALL); + auto U = Kokkos::subview(Us, i, Kokkos::ALL, Kokkos::ALL); + symmetricPseudoInverseSVDSerialKernel(A, ES, U); + }); +} + +} // namespace ArborX::Interpolation::Details + +#endif \ No newline at end of file diff --git a/test/ArborX_EnableViewComparison.hpp b/test/ArborX_EnableViewComparison.hpp index 1097d3e32..26b741298 100644 --- a/test/ArborX_EnableViewComparison.hpp +++ b/test/ArborX_EnableViewComparison.hpp @@ -17,8 +17,92 @@ #include #include "BoostTest_CUDA_clang_workarounds.hpp" +#include #include +template +using CommonValueType = + typename boost::common_type::type; + +template +void arborxViewCheck(U const &u, V const &v, std::string const &u_name, + std::string const &v_name, CommonValueType tol = 0) +{ + static constexpr int rank = U::rank; + + bool same_dim_size = true; + for (int i = 0; i < rank; i++) + { + int ui = u.extent_int(i); + int vi = v.extent_int(i); + BOOST_TEST(ui == vi, "check " << u_name << " == " << v_name + << " failed at dimension " << i << " size [" + << ui << " != " << vi << "]"); + same_dim_size = (ui == vi) && same_dim_size; + } + + if (!same_dim_size) + return; + + int index[8]{0, 0, 0, 0, 0, 0, 0, 0}; + auto make_index = [&]() { + std::stringstream sstr; + sstr << "("; + for (int i = 0; i < rank - 1; i++) + sstr << index[i] << ", "; + sstr << index[rank - 1] << ")"; + return sstr.str(); + }; + + while (index[0] != u.extent_int(0)) + { + auto uval = u.access(index[0], index[1], index[2], index[3], index[4], + index[5], index[6], index[7]); + auto vval = v.access(index[0], index[1], index[2], index[3], index[4], + index[5], index[6], index[7]); + std::string index_str = make_index(); + + // Can "tol" be used as a tolerance value? If not, go back to regular + // comparison + if constexpr (boost::math::fpc::tolerance_based::value) + BOOST_TEST(uval == vval, u_name << " == " << v_name << " at " << index_str + << boost::test_tools::tolerance(tol)); + else + BOOST_TEST(uval == vval, "check " << u_name << " == " << v_name + << " failed at " << index_str << " [" + << uval << " != " << vval << "]"); + + index[rank - 1]++; + for (int i = rank - 1; i > 0; i--) + if (index[i] == u.extent_int(i)) + { + index[i] = 0; + index[i - 1]++; + } + } +} + +#define ARBORX_MDVIEW_TEST_TOL(VIEWA, VIEWB, TOL) \ + [](decltype(VIEWA) const &u, decltype(VIEWB) const &v) { \ + auto view_a = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, u); \ + auto view_b = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v); \ + \ + static_assert(unsigned(std::decay_t::rank) == \ + unsigned(std::decay_t::rank), \ + "'" #VIEWA "' and '" #VIEWB "' must have the same rank"); \ + \ + std::string view_a_name(#VIEWA); \ + view_a_name += " (" + view_a.label() + ")"; \ + \ + std::string view_b_name(#VIEWB); \ + view_b_name += " (" + view_b.label() + ")"; \ + \ + arborxViewCheck(view_a, view_b, view_a_name, view_b_name, TOL); \ + }(VIEWA, VIEWB) + +#define ARBORX_MDVIEW_TEST(VIEWA, VIEWB) ARBORX_MDVIEW_TEST_TOL(VIEWA, VIEWB, 0) + // Enable element-wise comparison for views that are accessible from the host namespace boost { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7afac2d56..d2a8a3cde 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -236,6 +236,12 @@ target_link_libraries(ArborX_Test_BoostAdapters.exe PRIVATE ArborX Boost::unit_t target_compile_definitions(ArborX_Test_BoostAdapters.exe PRIVATE BOOST_TEST_DYN_LINK) add_test(NAME ArborX_Test_BoostAdapters COMMAND ArborX_Test_BoostAdapters.exe) +add_executable(ArborX_Test_InterpDetailsSVD.exe tstInterpDetailsSVD.cpp utf_main.cpp) +target_link_libraries(ArborX_Test_InterpDetailsSVD.exe PRIVATE ArborX Boost::unit_test_framework) +target_compile_definitions(ArborX_Test_InterpDetailsSVD.exe PRIVATE BOOST_TEST_DYN_LINK) +target_include_directories(ArborX_Test_InterpDetailsSVD.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +add_test(NAME ArborX_Test_InterpDetailsSVD COMMAND ArborX_Test_InterpDetailsSVD.exe) + if(ARBORX_ENABLE_HEADER_SELF_CONTAINMENT_TESTS) add_subdirectory(headers_self_contained) endif() diff --git a/test/tstInterpDetailsSVD.cpp b/test/tstInterpDetailsSVD.cpp new file mode 100644 index 000000000..e180bf790 --- /dev/null +++ b/test/tstInterpDetailsSVD.cpp @@ -0,0 +1,128 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include "ArborX_EnableDeviceTypes.hpp" +#include "ArborX_EnableViewComparison.hpp" +#include + +#include "BoostTest_CUDA_clang_workarounds.hpp" +#include + +template +void makeCase(ES const &es, V const (&src_arr)[M][N][N], + V const (&ref_arr)[M][N][N]) +{ + using device_view = Kokkos::View; + using host_view = typename device_view::HostMirror; + + host_view src("Testing::src", M, N, N); + host_view ref("Testing::ref", M, N, N); + device_view inv("Testing::inv", M, N, N); + + for (int i = 0; i < M; i++) + for (int j = 0; j < N; j++) + for (int k = 0; k < N; k++) + { + src(i, j, k) = src_arr[i][j][k]; + ref(i, j, k) = ref_arr[i][j][k]; + } + + Kokkos::deep_copy(es, inv, src); + ArborX::Interpolation::Details::symmetricPseudoInverseSVD(es, inv); + ARBORX_MDVIEW_TEST_TOL(ref, inv, Kokkos::Experimental::epsilon_v); +} + +// Pseudo-inverses were computed using numpy's "linalg.pinv" solver and +// simplified to be ratios + +BOOST_AUTO_TEST_CASE_TEMPLATE(pseudo_inv_symm2, DeviceType, ARBORX_DEVICE_TYPES) +{ + using ExecutionSpace = typename DeviceType::execution_space; + using MemorySpace = typename DeviceType::memory_space; + ExecutionSpace space{}; + + double mat[5][2][2] = {{{1, 2}, {2, 3}}, + {{4, 0}, {0, 4}}, + {{0, 5}, {5, 0}}, + {{2, 2}, {2, 2}}, + {{1, -3}, {-3, 1}}}; + double inv[5][2][2] = {{{-3, 2}, {2, -1}}, + {{1 / 4., 0}, {0, 1 / 4.}}, + {{0, 1 / 5.}, {1 / 5., 0}}, + {{1 / 8., 1 / 8.}, {1 / 8., 1 / 8.}}, + {{-1 / 8., -3 / 8.}, {-3 / 8., -1 / 8.}}}; + makeCase(space, mat, inv); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(pseudo_inv_symm3, DeviceType, ARBORX_DEVICE_TYPES) +{ + using ExecutionSpace = typename DeviceType::execution_space; + using MemorySpace = typename DeviceType::memory_space; + ExecutionSpace space{}; + + double mat[3][3][3] = {{{2, 2, 3}, {2, 0, 1}, {3, 1, -2}}, + {{0, 1, 2}, {1, 2, 3}, {2, 3, 4}}, + {{5, 5, 5}, {5, 5, 5}, {5, 5, 5}}}; + double inv[3][3][3] = {{{-1 / 18., 7 / 18., 2 / 18.}, + {7 / 18., -13 / 18., 4 / 18.}, + {2 / 18., 4 / 18., -4 / 18.}}, + {{-5 / 6., -1 / 6., 3 / 6.}, + {-1 / 6., 0, 1 / 6.}, + {3 / 6., 1 / 6., -1 / 6.}}, + {{1 / 45., 1 / 45., 1 / 45.}, + {1 / 45., 1 / 45., 1 / 45.}, + {1 / 45., 1 / 45., 1 / 45.}}}; + makeCase(space, mat, inv); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(pseudo_inv_symm128, DeviceType, + ARBORX_DEVICE_TYPES) +{ + using ExecutionSpace = typename DeviceType::execution_space; + using MemorySpace = typename DeviceType::memory_space; + ExecutionSpace space{}; + + // 128x128 matrix full of -2 + // eigenvalues are -2*128 and 127 0s + // pseudo-inverse is a 128x128 matrix of 1 / (128 * 128 * -2) + double mat[1][128][128] = {}; + double inv[1][128][128] = {}; + for (int i = 0; i < 128; i++) + for (int j = 0; j < 128; j++) + { + mat[0][i][j] = -2; + inv[0][i][j] = 1 / (128 * 128 * -2.); + } + makeCase(space, mat, inv); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(pseudo_inv_scalar_like, DeviceType, + ARBORX_DEVICE_TYPES) +{ + using ExecutionSpace = typename DeviceType::execution_space; + using MemorySpace = typename DeviceType::memory_space; + ExecutionSpace space{}; + + double mat[2][1][1] = {{{2}}, {{0}}}; + double inv[2][1][1] = {{{1 / 2.}}, {{0}}}; + makeCase(space, mat, inv); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(pseudo_inv_empty, DeviceType, ARBORX_DEVICE_TYPES) +{ + using ExecutionSpace = typename DeviceType::execution_space; + using MemorySpace = typename DeviceType::memory_space; + ExecutionSpace space{}; + + Kokkos::View mat("mat", 0, 0, 0); + ArborX::Interpolation::Details::symmetricPseudoInverseSVD(space, mat); + BOOST_TEST(mat.size() == 0); +} \ No newline at end of file