diff --git a/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc index 540478ba427..f03fc4a59b3 100644 --- a/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc @@ -47,7 +47,7 @@ __global__ __launch_bounds__(default_block_size) void match_edge_kernel( } auto neighbor = strongest_neighbor_vals[tidx]; if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx && - tidx < neighbor) { + tidx <= neighbor) { // Use the smaller index as agg point agg_vals[tidx] = tidx; agg_vals[neighbor] = tidx; diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp index 83421932016..e38d10d27c3 100644 --- a/core/multigrid/amgx_pgm.cpp +++ b/core/multigrid/amgx_pgm.cpp @@ -93,8 +93,8 @@ void AmgxPgm::generate() // Initial agg = -1 exec->run(amgx_pgm::make_fill_array(agg_.get_data(), agg_.get_num_elems(), -one())); - IndexType num_unagg{0}; - IndexType num_unagg_prev{0}; + IndexType num_unagg = num_rows; + IndexType num_unagg_prev = num_rows; // TODO: if mtx is a hermitian matrix, weight_mtx = abs(mtx) // compute weight_mtx = (abs(mtx) + abs(mtx'))/2; auto abs_mtx = amgxpgm_op->compute_absolute(); @@ -128,9 +128,11 @@ void AmgxPgm::generate() // copy the agg to intermediate_agg intermediate_agg = agg_; } - // Assign all left points - exec->run(amgx_pgm::make_assign_to_exist_agg(weight_mtx.get(), diag.get(), - agg_, intermediate_agg)); + if (num_unagg != 0) { + // Assign all left points + exec->run(amgx_pgm::make_assign_to_exist_agg( + weight_mtx.get(), diag.get(), agg_, intermediate_agg)); + } IndexType num_agg = 0; // Renumber the index exec->run(amgx_pgm::make_renumber(agg_, &num_agg)); @@ -153,9 +155,9 @@ void AmgxPgm::generate() // TODO: use less memory footprint to improve it auto coarse_matrix = share(matrix_type::create(exec, gko::dim<2>{coarse_dim, coarse_dim})); - auto tmp = matrix_type::create(exec, gko::dim<2>{coarse_dim, fine_dim}); - restrict_op->apply(amgxpgm_op, tmp.get()); - tmp->apply(prolong_op.get(), coarse_matrix.get()); + auto tmp = matrix_type::create(exec, gko::dim<2>{fine_dim, coarse_dim}); + amgxpgm_op->apply(prolong_op.get(), tmp.get()); + restrict_op->apply(tmp.get(), coarse_matrix.get()); this->set_multigrid_level(prolong_op, coarse_matrix, restrict_op); } diff --git a/cuda/preconditioner/jacobi_kernels.cu b/cuda/preconditioner/jacobi_kernels.cu index 66b8b7a13eb..77303eb16d7 100644 --- a/cuda/preconditioner/jacobi_kernels.cu +++ b/cuda/preconditioner/jacobi_kernels.cu @@ -65,8 +65,6 @@ constexpr int default_num_warps = 32; // current GPUs have at most 84 SMs) constexpr int default_grid_size = 32 * 32 * 128; -constexpr int default_block_size = 512; - #include "common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc" diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 07672359615..493192c8630 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -127,8 +127,11 @@ class AmgxPgm : public ::testing::Test { int nrhs = 3; agg = gen_agg_array(m, n); - unfinished_agg = gen_array(m, -1, n - 1); - strongest_neighbor = gen_array(m, 0, n - 1); + // only use 0 ~ n-2 and ensure the end isolated and not yet finished + unfinished_agg = gen_array(m, -1, n - 2); + unfinished_agg.get_data()[n - 1] = -1; + strongest_neighbor = gen_array(m, 0, n - 2); + strongest_neighbor.get_data()[n - 1] = n - 1; coarse_vector = gen_mtx(n, nrhs); fine_vector = gen_mtx(m, nrhs); auto weight = gen_mtx(m, m); diff --git a/hip/preconditioner/jacobi_kernels.hip.cpp b/hip/preconditioner/jacobi_kernels.hip.cpp index f6f4445326e..40e3aff69c5 100644 --- a/hip/preconditioner/jacobi_kernels.hip.cpp +++ b/hip/preconditioner/jacobi_kernels.hip.cpp @@ -72,7 +72,6 @@ constexpr int default_num_warps = 32; // current GPUs have at most 84 SMs) constexpr int default_grid_size = 32 * 32 * 128; -constexpr int default_block_size = 512; #include "common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc" diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index 879eae1876e..923e8e0765a 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -126,8 +126,11 @@ class AmgxPgm : public ::testing::Test { int nrhs = 3; agg = gen_agg_array(m, n); - unfinished_agg = gen_array(m, -1, n - 1); - strongest_neighbor = gen_array(m, 0, n - 1); + // only use 0 ~ n-2 and ensure the end isolated and not yet finished + unfinished_agg = gen_array(m, -1, n - 2); + unfinished_agg.get_data()[n - 1] = -1; + strongest_neighbor = gen_array(m, 0, n - 2); + strongest_neighbor.get_data()[n - 1] = n - 1; coarse_vector = gen_mtx(n, nrhs); fine_vector = gen_mtx(m, nrhs); auto weight = gen_mtx(m, m); diff --git a/include/ginkgo/core/multigrid/amgx_pgm.hpp b/include/ginkgo/core/multigrid/amgx_pgm.hpp index 5c5b36ebf7c..4afc049aa60 100644 --- a/include/ginkgo/core/multigrid/amgx_pgm.hpp +++ b/include/ginkgo/core/multigrid/amgx_pgm.hpp @@ -121,7 +121,7 @@ class AmgxPgm : public EnableLinOp>, * NVIDIA AMGX Reference Manual (October 2017, API Version 2, * https://github.com/NVIDIA/AMGX/blob/main/doc/AMGX_Reference.pdf). */ - unsigned GKO_FACTORY_PARAMETER(max_iterations, 15u); + unsigned GKO_FACTORY_PARAMETER_SCALAR(max_iterations, 15u); /** * The maximum ratio of unassigned number, which is valid in the @@ -129,7 +129,7 @@ class AmgxPgm : public EnableLinOp>, * Reference Manual (October 2017, API Version 2, * https://github.com/NVIDIA/AMGX/blob/main/doc/AMGX_Reference.pdf). */ - double GKO_FACTORY_PARAMETER(max_unassigned_ratio, 0.05); + double GKO_FACTORY_PARAMETER_SCALAR(max_unassigned_ratio, 0.05); /** * Use the deterministic assign_to_exist_agg method or not. @@ -138,7 +138,7 @@ class AmgxPgm : public EnableLinOp>, * from the same matrix. Otherwise, the aggregated group might be * different depending on the execution ordering. */ - bool GKO_FACTORY_PARAMETER(deterministic, false); + bool GKO_FACTORY_PARAMETER_SCALAR(deterministic, false); }; GKO_ENABLE_LIN_OP_FACTORY(AmgxPgm, parameters, Factory); GKO_ENABLE_BUILD_METHOD(Factory); diff --git a/include/ginkgo/core/multigrid/multigrid_level.hpp b/include/ginkgo/core/multigrid/multigrid_level.hpp index e86e51bc53b..b170b01f0a7 100644 --- a/include/ginkgo/core/multigrid/multigrid_level.hpp +++ b/include/ginkgo/core/multigrid/multigrid_level.hpp @@ -46,6 +46,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { +/** + * @brief The multigrid components namespace. + * + * @ingroup gko + */ namespace multigrid { diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 07e84630d85..94c7f9b983d 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -78,7 +78,7 @@ void match_edge(std::shared_ptr exec, if (agg_vals[i] == -1) { auto neighbor = strongest_neighbor_vals[i]; if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i && - i < neighbor) { + i <= neighbor) { // Use the smaller index as agg point agg_vals[i] = i; agg_vals[neighbor] = i; diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 59b40657703..68fdd9ca893 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -117,8 +117,11 @@ class AmgxPgm : public ::testing::Test { int nrhs = 3; agg = gen_agg_array(m, n); - unfinished_agg = gen_array(m, -1, n - 1); - strongest_neighbor = gen_array(m, 0, n - 1); + // only use 0 ~ n-2 and ensure the end isolated and not yet finished + unfinished_agg = gen_array(m, -1, n - 2); + unfinished_agg.get_data()[n - 1] = -1; + strongest_neighbor = gen_array(m, 0, n - 2); + strongest_neighbor.get_data()[n - 1] = n - 1; coarse_vector = gen_mtx(n, nrhs); fine_vector = gen_mtx(m, nrhs); auto weight = gen_mtx(m, m); diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index 5ad1ff8d108..bd696164c4f 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -74,7 +74,7 @@ void match_edge(std::shared_ptr exec, auto neighbor = strongest_neighbor_vals[i]; // i < neighbor always holds when neighbor is not -1 if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i && - i < neighbor) { + i <= neighbor) { // Use the smaller index as agg point agg_vals[i] = i; agg_vals[neighbor] = i; diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp index c0270312d6c..89efb56c890 100644 --- a/reference/test/multigrid/amgx_pgm_kernels.cpp +++ b/reference/test/multigrid/amgx_pgm_kernels.cpp @@ -304,7 +304,8 @@ TYPED_TEST(AmgxPgm, MatchEdge) snb_val[1] = 0; snb_val[2] = 0; snb_val[3] = 1; - snb_val[4] = 2; + // isolated item + snb_val[4] = 4; gko::kernels::reference::amgx_pgm::match_edge(this->exec, snb, agg); @@ -312,7 +313,8 @@ TYPED_TEST(AmgxPgm, MatchEdge) ASSERT_EQ(agg_val[1], -1); ASSERT_EQ(agg_val[2], 0); ASSERT_EQ(agg_val[3], -1); - ASSERT_EQ(agg_val[4], -1); + // isolated item should be self aggregation + ASSERT_EQ(agg_val[4], 4); }