Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RBILUK: Use new KK::sptrsv block support instead of KK::trsv #13540

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix

//! The inverse of the diagonal
Teuchos::RCP<block_crs_matrix_type> D_block_inverse_;

Kokkos::View<scalar_type*, typename values_device_view_type::device_type> tmp_;
};


Expand Down
90 changes: 58 additions & 32 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "Ifpack2_LocalFilter.hpp"
#include "Ifpack2_Utilities.hpp"
#include "Ifpack2_RILUK.hpp"
#include "KokkosSparse_trsv.hpp"
#include "KokkosSparse_sptrsv.hpp"

//#define IFPACK2_RBILUK_INITIAL
//#define IFPACK2_RBILUK_INITIAL_NOKK
Expand Down Expand Up @@ -194,6 +194,11 @@ void RBILUK<MatrixType>::allocate_L_and_U_blocks ()
U_block_->setAllToScalar (STM::zero ());
D_block_->setAllToScalar (STM::zero ());

// Allocate temp space for apply
if (this->isKokkosKernelsSpiluk_) {
const auto numRows = L_block_->getLocalNumRows();
tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_);
}
}
this->isAllocated_ = true;
}
Expand Down Expand Up @@ -1070,7 +1075,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm without KokkosKernels. ");
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
}
}
else { // alpha != 1 or beta != 0
Expand All @@ -1088,42 +1093,63 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
}
}
else {
// Kokkos kernels impl. For now, the only block trsv available is Sequential
// and must be done on host.
using row_map_type = typename local_matrix_host_type::row_map_type;
using index_type = typename local_matrix_host_type::index_type;
using values_type = typename local_matrix_host_type::values_type;

auto X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
auto Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);

auto L_row_ptrs_host = L_block_->getCrsGraph().getLocalRowPtrsHost();
auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost();
auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost();
auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost();
auto L_values_host = L_block_->getValuesHost();
auto U_values_host = U_block_->getValuesHost();

row_map_type* L_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&L_row_ptrs_host);
index_type* L_entries_host_ri = reinterpret_cast<index_type*>(&L_entries_host);
row_map_type* U_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&U_row_ptrs_host);
index_type* U_entries_host_ri = reinterpret_cast<index_type*>(&U_entries_host);
values_type* L_values_host_ri = reinterpret_cast<values_type*>(&L_values_host);
values_type* U_values_host_ri = reinterpret_cast<values_type*>(&U_values_host);
// Kokkos kernels impl.
auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);

auto lclL = L_block_->getLocalMatrixDevice();
auto L_rowmap = lclL.graph.row_map;
auto L_entries = lclL.graph.entries;
auto L_values = lclL.values;

auto lclU = U_block_->getLocalMatrixDevice();
auto U_rowmap = lclU.graph.row_map;
auto U_entries = lclU.graph.entries;
auto U_values = lclU.values;

const auto numRows = L_block_->getLocalNumRows();
local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_);
local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_);
local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries.size(), L_values, L_rowmap, L_entries, blockSize_);
local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries.size(), U_values, U_rowmap, U_entries, blockSize_);

if (mode == Teuchos::NO_TRANS) {
KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view);
KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view);
KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP;
{
KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
Kokkos::fence();

const LO numVecs = X.getNumVectors();
for (LO vec = 0; vec < numVecs; ++vec) {
auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
}
Kokkos::fence();

KernelHandle_->destroy_sptrsv_handle();
}

{
KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
Kokkos::fence();

const LO numVecs = X.getNumVectors();
for (LO vec = 0; vec < numVecs; ++vec) {
auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
}
Kokkos::fence();

KernelHandle_->destroy_sptrsv_handle();
}

KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
}
else {
KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view);
KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view);
KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
}

//Y.getWrappedDualView().sync();
Expand Down
Loading