Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ginkgo ILU #1684

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions common/cuda_hip/factorization/ilu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ namespace ilu_factorization {


template <typename ValueType, typename IndexType>
void compute_lu(std::shared_ptr<const DefaultExecutor> exec,
matrix::Csr<ValueType, IndexType>* m)
void sparselib_ilu(std::shared_ptr<const DefaultExecutor> exec,
matrix::Csr<ValueType, IndexType>* m)
{
const auto id = exec->get_device_id();
auto handle = exec->get_sparselib_handle();
Expand Down Expand Up @@ -55,7 +55,7 @@ void compute_lu(std::shared_ptr<const DefaultExecutor> exec,
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_ILU_COMPUTE_LU_KERNEL);
GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL);


} // namespace ilu_factorization
Expand Down
36 changes: 27 additions & 9 deletions common/cuda_hip/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ __global__ __launch_bounds__(default_block_size) void initialize(
}


template <typename ValueType, typename IndexType>
template <bool has_all_fillin, typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void factorize(
const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ cols,
const IndexType* __restrict__ storage_offsets,
Expand Down Expand Up @@ -130,8 +130,16 @@ __global__ __launch_bounds__(default_block_size) void factorize(
upper_nz += config::warp_size) {
const auto upper_col = cols[upper_nz];
const auto upper_val = vals[upper_nz];
const auto output_pos = lookup.lookup_unsafe(upper_col) + row_begin;
vals[output_pos] -= scale * upper_val;
if (!has_all_fillin) {
const auto pos = lookup[upper_col];
if (pos != invalid_index<IndexType>()) {
vals[row_begin + pos] -= scale * upper_val;
}
} else {
const auto output_pos =
lookup.lookup_unsafe(upper_col) + row_begin;
vals[output_pos] -= scale * upper_val;
}
}
}
scheduler.mark_ready();
Expand Down Expand Up @@ -252,19 +260,29 @@ template <typename ValueType, typename IndexType>
void factorize(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* lookup_offsets, const int64* lookup_descs,
const int32* lookup_storage, const IndexType* diag_idxs,
matrix::Csr<ValueType, IndexType>* factors,
matrix::Csr<ValueType, IndexType>* factors, bool has_all_fillin,
array<int>& tmp_storage)
{
const auto num_rows = factors->get_size()[0];
if (num_rows > 0) {
syncfree_storage storage(exec, tmp_storage, num_rows);
const auto num_blocks =
ceildiv(num_rows, default_block_size / config::warp_size);
kernel::factorize<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(),
lookup_offsets, lookup_storage, lookup_descs, diag_idxs,
as_device_type(factors->get_values()), storage, num_rows);
if (!has_all_fillin) {
kernel::factorize<false>
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
factors->get_const_row_ptrs(),
factors->get_const_col_idxs(), lookup_offsets,
lookup_storage, lookup_descs, diag_idxs,
as_device_type(factors->get_values()), storage, num_rows);
} else {
kernel::factorize<true>
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
factors->get_const_row_ptrs(),
factors->get_const_col_idxs(), lookup_offsets,
lookup_storage, lookup_descs, diag_idxs,
as_device_type(factors->get_values()), storage, num_rows);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL);
namespace ilu_factorization {


GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU_COMPUTE_LU_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL);


} // namespace ilu_factorization
Expand Down
38 changes: 28 additions & 10 deletions core/factorization/factorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,25 @@ GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l);

template <typename ValueType, typename IndexType>
std::unique_ptr<Factorization<ValueType, IndexType>>
Factorization<ValueType, IndexType>::unpack() const
Factorization<ValueType, IndexType>::unpack(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to improve the strategy type with 2.0 (or earlier), and the strategies have no impact on triangular solvers, so I think we should avoid adding new strategy-related functionality.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think 2.0 happens soon
if we do not want to introduce here, I can only unpack them by original code path not the unpack them.
@nbeams it should be fine, right? the unpacking codes are the same.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean revert back to before 57c0051? It should be fine, as far as I know. I tested it before this commit, anyway.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

std::shared_ptr<typename matrix_type::strategy_type> lower_factor_strategy,
std::shared_ptr<typename matrix_type::strategy_type> upper_factor_strategy)
const
{
const auto exec = this->get_executor();
const auto size = this->get_size();
auto create_matrix = [](auto exec, auto size, auto vals, auto col_idxs,
auto row_ptrs, auto strategy) {
if (strategy == nullptr) {
return matrix_type::create(exec, size, std::move(vals),
std::move(col_idxs),
std::move(row_ptrs));
} else {
return matrix_type::create(exec, size, std::move(vals),
std::move(col_idxs), std::move(row_ptrs),
strategy);
}
};
switch (this->get_storage_type()) {
case storage_type::empty:
GKO_NOT_SUPPORTED(nullptr);
Expand All @@ -53,12 +68,14 @@ Factorization<ValueType, IndexType>::unpack() const
const auto u_nnz =
static_cast<size_type>(get_element(u_row_ptrs, size[0]));
// create matrices
auto l_mtx = matrix_type::create(
exec, size, array<value_type>{exec, l_nnz},
array<index_type>{exec, l_nnz}, std::move(l_row_ptrs));
auto u_mtx = matrix_type::create(
exec, size, array<value_type>{exec, u_nnz},
array<index_type>{exec, u_nnz}, std::move(u_row_ptrs));
auto l_mtx =
create_matrix(exec, size, array<value_type>{exec, l_nnz},
array<index_type>{exec, l_nnz}, std::move(l_row_ptrs),
lower_factor_strategy);
auto u_mtx =
create_matrix(exec, size, array<value_type>{exec, u_nnz},
array<index_type>{exec, u_nnz}, std::move(u_row_ptrs),
upper_factor_strategy);
// fill matrices
exec->run(make_initialize_l_u(mtx.get(), l_mtx.get(), u_mtx.get()));
return create_from_composition(
Expand All @@ -72,9 +89,10 @@ Factorization<ValueType, IndexType>::unpack() const
const auto l_nnz =
static_cast<size_type>(get_element(l_row_ptrs, size[0]));
// create matrices
auto l_mtx = matrix_type::create(
exec, size, array<value_type>{exec, l_nnz},
array<index_type>{exec, l_nnz}, std::move(l_row_ptrs));
auto l_mtx =
create_matrix(exec, size, array<value_type>{exec, l_nnz},
array<index_type>{exec, l_nnz}, std::move(l_row_ptrs),
lower_factor_strategy);
// fill matrices
exec->run(make_initialize_l(mtx.get(), l_mtx.get(), false));
auto u_mtx = l_mtx->conj_transpose();
Expand Down
38 changes: 36 additions & 2 deletions core/factorization/ilu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/config/config.hpp>
#include <ginkgo/core/config/registry.hpp>
#include <ginkgo/core/factorization/lu.hpp>
#include <ginkgo/core/matrix/sparsity_csr.hpp>

#include "core/base/array_access.hpp"
#include "core/config/config_helper.hpp"
Expand All @@ -24,7 +26,7 @@ namespace ilu_factorization {
namespace {


GKO_REGISTER_OPERATION(compute_ilu, ilu_factorization::compute_lu);
GKO_REGISTER_OPERATION(compute_ilu, ilu_factorization::sparselib_ilu);
GKO_REGISTER_OPERATION(add_diagonal_elements,
factorization::add_diagonal_elements);
GKO_REGISTER_OPERATION(initialize_row_ptrs_l_u,
Expand Down Expand Up @@ -52,6 +54,17 @@ Ilu<ValueType, IndexType>::parse(const config::pnode& config,
if (auto& obj = config.get("skip_sorting")) {
params.with_skip_sorting(config::get_value<bool>(obj));
}
if (auto& obj = config.get("algorithm")) {
using gko::factorization::factorize_algorithm;
auto str = obj.get_string();
if (str == "sparselib") {
params.with_algorithm(factorize_algorithm::sparselib);
} else if (str == "syncfree") {
params.with_algorithm(factorize_algorithm::syncfree);
} else {
GKO_INVALID_CONFIG_VALUE("algorithm", str);
}
}
return params;
}

Expand All @@ -66,7 +79,7 @@ std::unique_ptr<Composition<ValueType>> Ilu<ValueType, IndexType>::generate_l_u(

// Converts the system matrix to CSR.
// Throws an exception if it is not convertible.
auto local_system_matrix = matrix_type::create(exec);
auto local_system_matrix = share(matrix_type::create(exec));
as<ConvertibleTo<matrix_type>>(system_matrix.get())
->convert_to(local_system_matrix);

Expand All @@ -79,6 +92,27 @@ std::unique_ptr<Composition<ValueType>> Ilu<ValueType, IndexType>::generate_l_u(
local_system_matrix.get(), false));

// Compute LU factorization
if (std::dynamic_pointer_cast<const OmpExecutor>(exec) ||
parameters_.algorithm == factorize_algorithm::syncfree) {
auto sparsity =
share(gko::matrix::SparsityCsr<ValueType, IndexType>::create_const(
exec, local_system_matrix->get_size(),
make_const_array_view(
exec, local_system_matrix->get_num_stored_elements(),
local_system_matrix->get_const_col_idxs()),
make_const_array_view(
exec, local_system_matrix->get_size()[0] + 1,
local_system_matrix->get_const_row_ptrs())));
auto unpack =
gko::experimental::factorization::Lu<ValueType, IndexType>::build()
.with_has_all_fillin(false)
.with_symbolic_factorization(sparsity)
.on(exec)
->generate(local_system_matrix)
->unpack(parameters_.l_strategy, parameters_.u_strategy);
return Composition<ValueType>::create(unpack->get_lower_factor(),
unpack->get_upper_factor());
}
exec->run(ilu_factorization::make_compute_ilu(local_system_matrix.get()));

// Separate L and U factors: nnz
Expand Down
8 changes: 4 additions & 4 deletions core/factorization/ilu_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ namespace gko {
namespace kernels {


#define GKO_DECLARE_ILU_COMPUTE_LU_KERNEL(ValueType, IndexType) \
void compute_lu(std::shared_ptr<const DefaultExecutor> exec, \
matrix::Csr<ValueType, IndexType>* system_matrix)
#define GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL(ValueType, IndexType) \
void sparselib_ilu(std::shared_ptr<const DefaultExecutor> exec, \
matrix::Csr<ValueType, IndexType>* system_matrix)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_ILU_COMPUTE_LU_KERNEL(ValueType, IndexType)
GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL(ValueType, IndexType)


GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(ilu_factorization,
Expand Down
15 changes: 7 additions & 8 deletions core/factorization/lu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ Lu<ValueType, IndexType>::parse(const config::pnode& config,
if (auto& obj = config.get("skip_sorting")) {
params.with_skip_sorting(config::get_value<bool>(obj));
}
if (auto& obj = config.get("has_all_fillin")) {
params.with_has_all_fillin(config::get_value<bool>(obj));
}

return params;
}
Expand Down Expand Up @@ -151,19 +154,15 @@ std::unique_ptr<LinOp> Lu<ValueType, IndexType>::generate_impl(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows,
allowed_sparsity, storage_offsets.get_const_data(),
row_descs.get_data(), storage.get_data()));
// initialize factors
exec->run(make_fill_array(factors->get_values(),
factors->get_num_stored_elements(),
zero<ValueType>()));
exec->run(make_initialize(
mtx.get(), storage_offsets.get_const_data(), row_descs.get_const_data(),
storage.get_const_data(), diag_idxs.get_data(), factors.get()));
// run numerical factorization
array<int> tmp{exec};
exec->run(make_factorize(storage_offsets.get_const_data(),
row_descs.get_const_data(),
storage.get_const_data(),
diag_idxs.get_const_data(), factors.get(), tmp));
exec->run(make_factorize(
storage_offsets.get_const_data(), row_descs.get_const_data(),
storage.get_const_data(), diag_idxs.get_const_data(), factors.get(),
parameters_.has_all_fillin, tmp));
return factorization_type::create_from_combined_lu(std::move(factors));
}

Expand Down
2 changes: 1 addition & 1 deletion core/factorization/lu_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace kernels {
const IndexType* lookup_offsets, const int64* lookup_descs, \
const int32* lookup_storage, const IndexType* diag_idxs, \
matrix::Csr<ValueType, IndexType>* factors, \
array<int>& tmp_storage)
bool has_all_fillin, array<int>& tmp_storage)


#define GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE(IndexType) \
Expand Down
14 changes: 9 additions & 5 deletions core/matrix/csr_lookup.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ struct device_sparsity_lookup {
result = lookup_search_unsafe(col);
break;
}
GKO_ASSERT(local_cols[result] == col);
GKO_ASSERT(result >= 0 && result < row_nnz &&
local_cols[result] == col);
return result;
}

Expand Down Expand Up @@ -230,7 +231,8 @@ struct device_sparsity_lookup {
const auto out_idx =
block_bases[block] +
gko::detail::popcount(block_bitmaps[block] & prefix_mask);
GKO_ASSERT(local_cols[out_idx] == col);
GKO_ASSERT(out_idx >= 0 && out_idx < row_nnz &&
local_cols[out_idx] == col);
return out_idx;
}

Expand Down Expand Up @@ -262,15 +264,17 @@ struct device_sparsity_lookup {
(static_cast<unsigned_index_type>(col) * hash_param) % hashmap_size;
GKO_ASSERT(hashmap[hash] >= 0);
GKO_ASSERT(hashmap[hash] < row_nnz);
while (local_cols[hashmap[hash]] != col) {
auto out_idx = hashmap[hash];
// linear probing with invalid_index sentinel to avoid infinite loop
while (out_idx >= 0 && local_cols[out_idx] != col) {
hash++;
if (hash >= hashmap_size) {
hash = 0;
}
GKO_ASSERT(hashmap[hash] >= 0);
out_idx = hashmap[hash];
GKO_ASSERT(hashmap[hash] < row_nnz);
}
const auto out_idx = hashmap[hash];
// out_idx is either correct or invalid_index, the hashmap sentinel
Comment on lines +267 to +277
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am confused here. With this change, lookup_hash_unsafe is the same as lookup_hash (except for one extra assertion for unsafe -- the >=0 -- not sure why?). So...lookup_hash_unsafe isn't really unsafe anymore? Why do we need any changes to unsafe at all, when in the factorization, we switch to use the safe lookup if requested? (lookup[col] rather than lookup.lookup_unsafe(col))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lookup_hash_unsafe is mostly equal to lookup_hash up to one assert as you mentioned.
in my mind. when you call lookup_unsafe, you will assume it always returns a valid value back without any checks.
when calling lookup, you will always checks whether it is valid or not.
I keep them because of the extra assert. unsafe should report the index is invalid in debug mode because it may not be checked in the code.
I want to change the unsafe lookup because the infinite loop leads debug difficult and this change does not affect the performance.
BTW, I do not think there's performance difference between safe and unsafe version, but I will keep this behavior first.
Then, we can see whether to use safe-only lookup.

return out_idx;
}

Expand Down
6 changes: 6 additions & 0 deletions core/test/config/factorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ struct Ilu : FactorizationConfigTest<gko::factorization::Ilu<float, int>,
typename gko::matrix::Csr<float, int>::sparselib>());
config_map["skip_sorting"] = pnode{true};
param.with_skip_sorting(true);
config_map["algorithm"] = pnode{"syncfree"};
param.with_algorithm(gko::factorization::factorize_algorithm::syncfree);
}

template <typename AnswerType>
Expand All @@ -122,6 +124,7 @@ struct Ilu : FactorizationConfigTest<gko::factorization::Ilu<float, int>,
check_strategy(res_param.l_strategy, ans_param.l_strategy);
check_strategy(res_param.u_strategy, ans_param.u_strategy);
ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting);
ASSERT_EQ(res_param.algorithm, ans_param.algorithm);
}
};

Expand Down Expand Up @@ -178,6 +181,8 @@ struct Lu : FactorizationConfigTest<
gko::experimental::factorization::symbolic_type::near_symmetric);
config_map["skip_sorting"] = pnode{true};
param.with_skip_sorting(true);
config_map["has_all_fillin"] = pnode{false};
param.with_has_all_fillin(false);
}

template <typename AnswerType>
Expand All @@ -190,6 +195,7 @@ struct Lu : FactorizationConfigTest<
ans_param.symbolic_factorization);
ASSERT_EQ(res_param.symbolic_algorithm, ans_param.symbolic_algorithm);
ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting);
ASSERT_EQ(res_param.has_all_fillin, ans_param.has_all_fillin);
}
};

Expand Down
6 changes: 3 additions & 3 deletions dpcpp/factorization/ilu_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ namespace ilu_factorization {


template <typename ValueType, typename IndexType>
void compute_lu(std::shared_ptr<const DefaultExecutor> exec,
matrix::Csr<ValueType, IndexType>* m) GKO_NOT_IMPLEMENTED;
void sparselib_ilu(std::shared_ptr<const DefaultExecutor> exec,
matrix::Csr<ValueType, IndexType>* m) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_ILU_COMPUTE_LU_KERNEL);
GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL);


} // namespace ilu_factorization
Expand Down
2 changes: 1 addition & 1 deletion dpcpp/factorization/lu_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ template <typename ValueType, typename IndexType>
void factorize(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* lookup_offsets, const int64* lookup_descs,
const int32* lookup_storage, const IndexType* diag_idxs,
matrix::Csr<ValueType, IndexType>* factors,
matrix::Csr<ValueType, IndexType>* factors, bool has_all_fillin,
array<int>& tmp_storage) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
Expand Down
Loading
Loading