ginkgo-project · pratikvn · Oct 17, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023
diff --git a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    simple_apply_kernel<<<num_blocks, default_block_size, 0,
+                          exec->get_stream()>>>(mat_ub, b_ub, x_ub);
+}
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    advanced_apply_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
+                                                  beta_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,156 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType, typename IndexType>
+__device__ __forceinline__ void simple_apply(
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
+{
+    const auto num_rows = mat.num_rows;
+    const auto num_stored_elements_per_row = mat.num_stored_elems_per_row;
+    const auto stride = mat.stride;
+    const auto val = mat.values;
+    const auto col = mat.col_idxs;
+    for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
+            const auto ind = tidx + idx * stride;
+            const auto col_idx = col[ind];
+            if (col_idx == invalid_index<IndexType>()) {
+                break;
+            } else {
+                temp += val[ind] * b[col_idx];
+            }
+        }
+        x[tidx] = temp;
+    }
+}
+
+template <typename ValueType, typename IndexType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
+                                                      ell::uniform_batch<
+                                                          const ValueType,
+                                                          IndexType>
+                                                          mat,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              const ValueType>
+                                                              b,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              ValueType>
+                                                              x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        simple_apply(mat_b, b_b.values, x_b.values);
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+__device__ __forceinline__ void advanced_apply(
+    const ValueType alpha,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const ValueType* const __restrict__ b, const ValueType beta,
+    ValueType* const __restrict__ x)
+{
+    const auto num_rows = mat.num_rows;
+    const auto num_stored_elements_per_row = mat.num_stored_elems_per_row;
+    const auto stride = mat.stride;
+    const auto val = mat.values;
+    const auto col = mat.col_idxs;
+    for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
+            const auto ind = tidx + idx * stride;
+            const auto col_idx = col[ind];
+            if (col_idx == invalid_index<IndexType>()) {
+                break;
+            } else {
+                temp += alpha * val[ind] * b[col_idx];
+            }
+        }
+        x[tidx] = temp + beta * x[tidx];
+    }
+}
+
+template <typename ValueType, typename IndexType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void advanced_apply_kernel(const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                alpha,
+                                                    const gko::batch::matrix::
+                                                        ell::uniform_batch<
+                                                            const ValueType,
+                                                            IndexType>
+                                                            mat,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                b,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                beta,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                ValueType>
+                                                                x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto beta_b = gko::batch::extract_batch_item(beta, batch_id);
+        advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0],
+                       x_b.values);
+    }
+}
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -40,6 +40,7 @@ target_sources(ginkgo
     log/record.cpp
     log/stream.cpp
     matrix/batch_dense.cpp
+    matrix/batch_ell.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
@@ -291,27 +291,6 @@ void MultiVector<ValueType>::move_to(
 }
 
 
-template <typename ValueType>
-void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
-{
-    auto exec = result->get_executor() == nullptr ? this->get_executor()
-                                                  : result->get_executor();
-    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
-        exec, this->get_size(),
-        make_const_array_view(this->get_executor(),
-                              this->get_num_stored_elements(),
-                              this->get_const_values()));
-    result->copy_from(tmp);
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
-{
-    this->convert_to(result);
-}
-
-
 #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);