diff --git a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
new file mode 100644
index 00000000000..f8da432aa4d
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    simple_apply_kernel<<<num_blocks, default_block_size, 0,
+                          exec->get_stream()>>>(mat_ub, b_ub, x_ub);
+}
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    advanced_apply_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
+                                                  beta_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
new file mode 100644
index 00000000000..de6ca879890
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,156 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType, typename IndexType>
+__device__ __forceinline__ void simple_apply(
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
+{
+    const auto num_rows = mat.num_rows;
+    const auto num_stored_elements_per_row = mat.num_stored_elems_per_row;
+    const auto stride = mat.stride;
+    const auto val = mat.values;
+    const auto col = mat.col_idxs;
+    for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
+            const auto ind = tidx + idx * stride;
+            const auto col_idx = col[ind];
+            if (col_idx == invalid_index<IndexType>()) {
+                break;
+            } else {
+                temp += val[ind] * b[col_idx];
+            }
+        }
+        x[tidx] = temp;
+    }
+}
+
+template <typename ValueType, typename IndexType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
+                                                      ell::uniform_batch<
+                                                          const ValueType,
+                                                          IndexType>
+                                                          mat,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              const ValueType>
+                                                              b,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              ValueType>
+                                                              x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        simple_apply(mat_b, b_b.values, x_b.values);
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+__device__ __forceinline__ void advanced_apply(
+    const ValueType alpha,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const ValueType* const __restrict__ b, const ValueType beta,
+    ValueType* const __restrict__ x)
+{
+    const auto num_rows = mat.num_rows;
+    const auto num_stored_elements_per_row = mat.num_stored_elems_per_row;
+    const auto stride = mat.stride;
+    const auto val = mat.values;
+    const auto col = mat.col_idxs;
+    for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
+            const auto ind = tidx + idx * stride;
+            const auto col_idx = col[ind];
+            if (col_idx == invalid_index<IndexType>()) {
+                break;
+            } else {
+                temp += alpha * val[ind] * b[col_idx];
+            }
+        }
+        x[tidx] = temp + beta * x[tidx];
+    }
+}
+
+template <typename ValueType, typename IndexType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void advanced_apply_kernel(const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                alpha,
+                                                    const gko::batch::matrix::
+                                                        ell::uniform_batch<
+                                                            const ValueType,
+                                                            IndexType>
+                                                            mat,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                b,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                beta,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                ValueType>
+                                                                x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto beta_b = gko::batch::extract_batch_item(beta, batch_id);
+        advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0],
+                       x_b.values);
+    }
+}
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 46ea67abc65..ae8035bcbf9 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -40,6 +40,7 @@ target_sources(ginkgo
     log/record.cpp
     log/stream.cpp
     matrix/batch_dense.cpp
+    matrix/batch_ell.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 6a14919bf2f..6dcf8dd90b5 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -291,27 +291,6 @@ void MultiVector<ValueType>::move_to(
 }
 
 
-template <typename ValueType>
-void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
-{
-    auto exec = result->get_executor() == nullptr ? this->get_executor()
-                                                  : result->get_executor();
-    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
-        exec, this->get_size(),
-        make_const_array_view(this->get_executor(),
-                              this->get_num_stored_elements(),
-                              this->get_const_values()));
-    result->copy_from(tmp);
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
-{
-    this->convert_to(result);
-}
-
-
 #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index 834e89c8358..b4e380a4162 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -47,21 +47,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/base/utils_helper.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 namespace gko {
 namespace batch {
 
 
-template <typename OutputType>
+/**
+ * Duplicate a given input batch object.
+ */
+template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
                                       size_type num_duplications,
-                                      const OutputType* input)
+                                      const OutputType* input,
+                                      TArgs&&... create_args)
 {
     auto num_batch_items = input->get_num_batch_items();
-    auto tmp = OutputType::create(
-        exec, batch_dim<2>(num_batch_items * num_duplications,
-                           input->get_common_size()));
+    auto tmp =
+        OutputType::create(exec,
+                           batch_dim<2>(num_batch_items * num_duplications,
+                                        input->get_common_size()),
+                           std::forward<TArgs>(create_args)...);
 
     for (size_type i = 0; i < num_duplications; ++i) {
         for (size_type b = 0; b < num_batch_items; ++b) {
@@ -74,14 +81,18 @@ std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
 }
 
 
-template <typename OutputType>
+/**
+ * Duplicate a monolithic matrix and create a batch object.
+ */
+template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec, const size_type num_duplications,
-    const typename OutputType::unbatch_type* input)
+    const typename OutputType::unbatch_type* input, TArgs&&... create_args)
 {
     auto num_batch_items = num_duplications;
     auto tmp = OutputType::create(
-        exec, batch_dim<2>(num_batch_items, input->get_size()));
+        exec, batch_dim<2>(num_batch_items, input->get_size()),
+        std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->copy_from(input);
@@ -91,14 +102,23 @@ std::unique_ptr<OutputType> create_from_item(
 }
 
 
-template <typename OutputType>
+/**
+ * Create a batch object from a vector of monolithic object that share the same
+ * sparsity pattern.
+ *
+ * @note The sparsity of the elements in the input vector of matrices needs to
+ * be the same. TODO: Check for same sparsity among the different input items
+ */
+template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec,
-    const std::vector<typename OutputType::unbatch_type*>& input)
+    const std::vector<typename OutputType::unbatch_type*>& input,
+    TArgs&&... create_args)
 {
     auto num_batch_items = input.size();
     auto tmp = OutputType::create(
-        exec, batch_dim<2>(num_batch_items, input[0]->get_size()));
+        exec, batch_dim<2>(num_batch_items, input[0]->get_size()),
+        std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->copy_from(input[b]);
@@ -108,6 +128,9 @@ std::unique_ptr<OutputType> create_from_item(
 }
 
 
+/**
+ * Unbatch a batched object into a vector of items of its unbatch_type.
+ */
 template <typename InputType>
 auto unbatch(const InputType* batch_object)
 {
@@ -121,14 +144,61 @@ auto unbatch(const InputType* batch_object)
 }
 
 
-template <typename ValueType, typename IndexType, typename OutputType>
+namespace detail {
+
+
+template <typename ValueType, typename IndexType>
+void assert_same_sparsity_in_batched_data(
+    const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
+{
+    if (data.empty()) {
+        return;
+    }
+    auto num_nnz = data.at(0).nonzeros.size();
+    auto base_data = data.at(0);
+    base_data.ensure_row_major_order();
+    for (int b = 1; b < data.size(); ++b) {
+        if (data[b].nonzeros.size() != num_nnz) {
+            GKO_NOT_IMPLEMENTED;
+        }
+        auto temp_data = data.at(b);
+        temp_data.ensure_row_major_order();
+        for (int nnz = 0; nnz < num_nnz; ++nnz) {
+            if (temp_data.nonzeros.at(nnz).row !=
+                    base_data.nonzeros.at(nnz).row ||
+                temp_data.nonzeros.at(nnz).column !=
+                    base_data.nonzeros.at(nnz).column) {
+                GKO_NOT_IMPLEMENTED;
+            }
+        }
+    }
+}
+
+
+}  // namespace detail
+
+
+/**
+ * Create a batch object from a vector of gko::matrix_data objects. Each item of
+ * the vector needs to store the same sparsity pattern.
+ */
+template <typename ValueType, typename IndexType, typename OutputType,
+          typename... TArgs>
 std::unique_ptr<OutputType> read(
     std::shared_ptr<const Executor> exec,
-    const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
+    const std::vector<gko::matrix_data<ValueType, IndexType>>& data,
+    TArgs&&... create_args)
 {
     auto num_batch_items = data.size();
+    // Throw if all the items in the batch dont have same sparsity.
+    if (!std::is_same<OutputType,
+                      gko::batch::matrix::Dense<ValueType>>::value &&
+        !std::is_same<OutputType, gko::batch::MultiVector<ValueType>>::value) {
+        detail::assert_same_sparsity_in_batched_data(data);
+    }
     auto tmp =
-        OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size));
+        OutputType::create(exec, batch_dim<2>(num_batch_items, data.at(0).size),
+                           std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->read(data[b]);
@@ -138,6 +208,9 @@ std::unique_ptr<OutputType> read(
 }
 
 
+/**
+ * Write a vector of matrix data objects from an input batch object.
+ */
 template <typename ValueType, typename IndexType, typename OutputType>
 std::vector<gko::matrix_data<ValueType, IndexType>> write(
     const OutputType* mvec)
@@ -154,6 +227,220 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 }
 
 
+/**
+ * Creates and initializes a batch of the specified Matrix type from a series of
+ * single column-vectors.
+ *
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param vals  values used to initialize the batch vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    size_type num_batch_items = vals.size();
+    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
+    auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
+    auto common_size = dim<2>(common_num_rows, 1);
+    for (auto& val : vals) {
+        GKO_ASSERT_EQ(common_num_rows, val.size());
+    }
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
+    size_type batch = 0;
+    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
+    for (const auto& b : vals) {
+        input_mat_data[batch].nonzeros.reserve(b.size());
+        size_type idx = 0;
+        for (const auto& elem : b) {
+            if (elem != zero<value_type>()) {
+                input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem);
+            }
+            ++idx;
+        }
+        ++batch;
+    }
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a batch of matrices.
+ *
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param vals  values used to initialize the matrix
+ * @param exec  Executor associated with the matrix
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    std::initializer_list<std::initializer_list<
+        std::initializer_list<typename Matrix::value_type>>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    size_type num_batch_items = vals.size();
+    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
+    auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
+    size_type common_num_cols =
+        vals_begin->begin() ? vals_begin->begin()->size() : 0;
+    auto common_size = dim<2>(common_num_rows, common_num_cols);
+    for (const auto& b : vals) {
+        auto num_rows = b.size();
+        auto num_cols = begin(b)->size();
+        auto b_size = dim<2>(num_rows, num_cols);
+        GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
+    }
+
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
+    size_type batch = 0;
+    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
+    for (const auto& b : vals) {
+        size_type ridx = 0;
+        for (const auto& row : b) {
+            size_type cidx = 0;
+            for (const auto& elem : row) {
+                if (elem != zero<value_type>()) {
+                    input_mat_data[batch].nonzeros.emplace_back(ridx, cidx,
+                                                                elem);
+                }
+                ++cidx;
+            }
+            ++ridx;
+        }
+        ++batch;
+    }
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a batch of specified Matrix type with a single
+ * column-vector by making copies of the single input column vector.
+ *
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param num_batch_items  The number of times the input vector is to be
+ *                         duplicated
+ * @param vals  values used to initialize each vector in the temp. batch
+ * @param exec  Executor associated with the matrix
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    const size_type num_batch_items,
+    std::initializer_list<typename Matrix::value_type> vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
+                         "Input data is empty");
+    auto num_rows = begin(vals) ? vals.size() : 0;
+    auto common_size = dim<2>(num_rows, 1);
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
+    mat_data single_mat_data(common_size);
+    single_mat_data.nonzeros.reserve(num_rows);
+    size_type idx = 0;
+    for (const auto& elem : vals) {
+        if (elem != zero<value_type>()) {
+            single_mat_data.nonzeros.emplace_back(idx, 0, elem);
+        }
+        ++idx;
+    }
+    std::vector<mat_data> input_mat_data(num_batch_items, single_mat_data);
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a matrix from copies of a given matrix.
+ *
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param num_batch_items  The number of times the input matrix is duplicated
+ * @param vals  values used to initialize each matrix in the temp. batch
+ * @param exec  Executor associated to the matrix
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    const size_type num_batch_items,
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
+                         "Input data is empty");
+    auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
+                              begin(vals) ? begin(vals)->size() : 0);
+    batch_dim<2> b_size(num_batch_items, common_size);
+    mat_data single_mat_data(common_size);
+    size_type ridx = 0;
+    for (const auto& row : vals) {
+        size_type cidx = 0;
+        for (const auto& elem : row) {
+            if (elem != zero<value_type>()) {
+                single_mat_data.nonzeros.emplace_back(ridx, cidx, elem);
+            }
+            ++cidx;
+        }
+        ++ridx;
+    }
+    std::vector<mat_data> input_mat_data(num_batch_items, single_mat_data);
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
 }  // namespace batch
 }  // namespace gko
 
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 87cab3dcf0b..462675c15db 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -58,6 +58,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/factorization/par_ilu_kernels.hpp"
 #include "core/factorization/par_ilut_kernels.hpp"
 #include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_ell_kernels.hpp"
 #include "core/matrix/coo_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
@@ -137,6 +138,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \
     GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(_macro)
 
+#define GKO_STUB_VALUE_AND_INT32_TYPE(_macro)                       \
+    template <typename ValueType, typename IndexType>               \
+    _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \
+    GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro)
+
 #define GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE(_macro)                     \
     template <typename InputValueType, typename MatrixValueType,        \
               typename OutputValueType, typename IndexType>             \
@@ -310,6 +316,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
 }  // namespace batch_dense
 
 
+namespace batch_ell {
+
+
+GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+
+
 namespace dense {
 
 
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 758635cea7f..58c7fa25cea 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -96,14 +96,6 @@ Dense<ValueType>::create_const_view_for_item(size_type item_id) const
 }
 
 
-template <typename ValueType>
-std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_config_of(
-    ptr_param<const Dense<ValueType>> other)
-{
-    return Dense<ValueType>::create(other->get_executor(), other->get_size());
-}
-
-
 template <typename ValueType>
 std::unique_ptr<const Dense<ValueType>> Dense<ValueType>::create_const(
     std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
@@ -124,11 +116,72 @@ Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
 {}
 
 
+template <typename ValueType>
+Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+const Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+const Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
 template <typename ValueType>
 void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
                                   MultiVector<ValueType>* x) const
 {
-    this->validate_application_parameters(b, x);
     this->get_executor()->run(dense::make_simple_apply(this, b, x));
 }
 
@@ -139,7 +192,6 @@ void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
                                   const MultiVector<ValueType>* beta,
                                   MultiVector<ValueType>* x) const
 {
-    this->validate_application_parameters(alpha, b, beta, x);
     this->get_executor()->run(
         dense::make_advanced_apply(alpha, this, b, beta, x));
 }
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
new file mode 100644
index 00000000000..b2987e741d9
--- /dev/null
+++ b/core/matrix/batch_ell.cpp
@@ -0,0 +1,229 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+namespace ell {
+namespace {
+
+
+GKO_REGISTER_OPERATION(simple_apply, batch_ell::simple_apply);
+GKO_REGISTER_OPERATION(advanced_apply, batch_ell::advanced_apply);
+
+
+}  // namespace
+}  // namespace ell
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_view_for_item(size_type item_id)
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[0];
+    auto mat = unbatch_type::create(
+        exec, this->get_common_size(),
+        make_array_view(exec, this->get_num_elements_per_item(),
+                        this->get_values_for_item(item_id)),
+        make_array_view(exec, this->get_num_elements_per_item(),
+                        this->get_col_idxs()),
+        this->get_num_stored_elements_per_row(), stride);
+    return mat;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<const gko::matrix::Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_const_view_for_item(size_type item_id) const
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[0];
+    auto mat = unbatch_type::create_const(
+        exec, this->get_common_size(),
+        make_const_array_view(exec, this->get_num_elements_per_item(),
+                              this->get_const_values_for_item(item_id)),
+        make_const_array_view(exec, this->get_num_elements_per_item(),
+                              this->get_const_col_idxs()),
+        this->get_num_stored_elements_per_row(), stride);
+    return mat;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<const Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_const(
+    std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+    const IndexType num_elems_per_row,
+    gko::detail::const_array_view<ValueType>&& values,
+    gko::detail::const_array_view<IndexType>&& col_idxs)
+{
+    // cast const-ness away, but return a const object afterwards,
+    // so we can ensure that no modifications take place.
+    return std::unique_ptr<const Ell>(
+        new Ell{exec, sizes, num_elems_per_row,
+                gko::detail::array_const_cast(std::move(values)),
+                gko::detail::array_const_cast(std::move(col_idxs))});
+}
+
+
+template <typename ValueType, typename IndexType>
+Ell<ValueType, IndexType>::Ell(std::shared_ptr<const Executor> exec,
+                               const batch_dim<2>& size,
+                               IndexType num_elems_per_row)
+    : EnableBatchLinOp<Ell<ValueType, IndexType>>(exec, size),
+      num_elems_per_row_(num_elems_per_row == 0 ? size.get_common_size()[1]
+                                                : num_elems_per_row),
+      values_(exec, compute_num_elems(size, num_elems_per_row_)),
+      col_idxs_(exec, this->get_common_size()[0] * num_elems_per_row_)
+{}
+
+
+template <typename ValueType, typename IndexType>
+Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->apply(b, x);
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->apply(alpha, b, beta, x);
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* b,
+                                           MultiVector<ValueType>* x) const
+{
+    this->get_executor()->run(ell::make_simple_apply(this, b, x));
+}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* alpha,
+                                           const MultiVector<ValueType>* b,
+                                           const MultiVector<ValueType>* beta,
+                                           MultiVector<ValueType>* x) const
+{
+    this->get_executor()->run(
+        ell::make_advanced_apply(alpha, this, b, beta, x));
+}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::convert_to(
+    Ell<next_precision<ValueType>, IndexType>* result) const
+{
+    result->values_ = this->values_;
+    result->col_idxs_ = this->col_idxs_;
+    result->num_elems_per_row_ = this->num_elems_per_row_;
+    result->set_size(this->get_size());
+}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::move_to(
+    Ell<next_precision<ValueType>, IndexType>* result)
+{
+    this->convert_to(result);
+}
+
+
+#define GKO_DECLARE_BATCH_ELL_MATRIX(ValueType) class Ell<ValueType, int32>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX);
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
diff --git a/core/matrix/batch_ell_kernels.hpp b/core/matrix/batch_ell_kernels.hpp
new file mode 100644
index 00000000000..d3acc582f9b
--- /dev/null
+++ b/core/matrix/batch_ell_kernels.hpp
@@ -0,0 +1,84 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_
+
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(_vtype, _itype)  \
+    void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                      const batch::matrix::Ell<_vtype, _itype>* a, \
+                      const batch::MultiVector<_vtype>* b,         \
+                      batch::MultiVector<_vtype>* c)
+
+#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype)  \
+    void advanced_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                        const batch::MultiVector<_vtype>* alpha,     \
+                        const batch::matrix::Ell<_vtype, _itype>* a, \
+                        const batch::MultiVector<_vtype>* b,         \
+                        const batch::MultiVector<_vtype>* beta,      \
+                        batch::MultiVector<_vtype>* c)
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                 \
+    template <typename ValueType, typename IndexType>                \
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>                \
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(ValueType, IndexType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_ell,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 0bbfde40cc9..f208f5ff078 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 namespace gko {
@@ -82,6 +83,53 @@ struct uniform_batch {
 }  // namespace dense
 
 
+namespace ell {
+
+
+/**
+ * Encapsulates one matrix from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+struct batch_item {
+    using value_type = ValueType;
+    using index_type = IndexType;
+
+    ValueType* values;
+    const index_type* col_idxs;
+    index_type stride;
+    index_type num_rows;
+    index_type num_cols;
+    index_type num_stored_elems_per_row;
+};
+
+
+/**
+ * A 'simple' structure to store a global uniform batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+struct uniform_batch {
+    using value_type = ValueType;
+    using index_type = IndexType;
+    using entry_type = batch_item<value_type, index_type>;
+
+    ValueType* values;
+    const index_type* col_idxs;
+    size_type num_batch_items;
+    index_type stride;
+    index_type num_rows;
+    index_type num_cols;
+    index_type num_stored_elems_per_row;
+
+    size_type get_entry_storage() const
+    {
+        return num_rows * num_stored_elems_per_row * sizeof(value_type);
+    }
+};
+
+
+}  // namespace ell
+
+
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE dense::batch_item<const ValueType> to_const(
     const dense::batch_item<ValueType>& b)
@@ -116,6 +164,54 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
 }
 
 
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType, IndexType> to_const(
+    const ell::batch_item<ValueType, IndexType>& b)
+{
+    return {b.values,   b.col_idxs, b.stride,
+            b.num_rows, b.num_cols, b.num_stored_elems_per_row};
+}
+
+
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch<const ValueType, IndexType>
+to_const(const ell::uniform_batch<ValueType, IndexType>& ub)
+{
+    return {ub.values,   ub.col_idxs, ub.num_batch_items,         ub.stride,
+            ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row};
+}
+
+
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType, IndexType>
+extract_batch_item(const ell::uniform_batch<ValueType, IndexType>& batch,
+                   const size_type batch_idx)
+{
+    return {batch.values +
+                batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
+            batch.col_idxs,
+            batch.stride,
+            batch.num_rows,
+            batch.num_cols,
+            batch.num_stored_elems_per_row};
+}
+
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType, IndexType>
+extract_batch_item(ValueType* const batch_values,
+                   IndexType* const batch_col_idxs, const int stride,
+                   const int num_rows, const int num_cols,
+                   int num_elems_per_row, const size_type batch_idx)
+{
+    return {batch_values + batch_idx * num_elems_per_row * num_rows,
+            batch_col_idxs,
+            stride,
+            num_rows,
+            num_cols,
+            num_elems_per_row};
+}
+
+
 }  // namespace matrix
 }  // namespace batch
 }  // namespace gko
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index cca4b8da1c0..ec7ef93e517 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_test(batch_dense)
+ginkgo_create_test(batch_ell)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
new file mode 100644
index 00000000000..2c8166aa023
--- /dev/null
+++ b/core/test/matrix/batch_ell.cpp
@@ -0,0 +1,525 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+
+
+template <typename T>
+class Ell : public ::testing::Test {
+protected:
+    using value_type = T;
+    using index_type = gko::int32;
+    using BatchEllMtx = gko::batch::matrix::Ell<value_type, index_type>;
+    using EllMtx = gko::matrix::Ell<value_type, index_type>;
+    using size_type = gko::size_type;
+    Ell()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::batch::initialize<BatchEllMtx>(
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec, 3)),
+          sp_mtx(gko::batch::initialize<BatchEllMtx>(
+              {{{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
+               {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}},
+              exec, 2)),
+          ell_mtx(gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          exec, gko::dim<2>(2, 3), 3)),
+          sp_ell_mtx(gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}},
+                                             exec, gko::dim<2>(2, 3), 2))
+    {}
+
+    static void assert_equal_to_original_sparse_mtx(const BatchEllMtx* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 2);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 2));
+        ASSERT_EQ(m->get_num_stored_elements_per_row(), 2);
+        EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
+        EXPECT_EQ(m->get_const_values()[1], value_type{2.5});
+        EXPECT_EQ(m->get_const_values()[2], value_type{0.0});
+        EXPECT_EQ(m->get_const_values()[3], value_type{3.5});
+        EXPECT_EQ(m->get_const_values()[4], value_type{1.0});
+        EXPECT_EQ(m->get_const_values()[5], value_type{2.0});
+        EXPECT_EQ(m->get_const_values()[6], value_type{0.0});
+        EXPECT_EQ(m->get_const_values()[7], value_type{3.0});
+        EXPECT_EQ(m->get_const_col_idxs()[0], index_type{0});
+        EXPECT_EQ(m->get_const_col_idxs()[1], index_type{1});
+        EXPECT_EQ(m->get_const_col_idxs()[2], index_type{-1});
+        ASSERT_EQ(m->get_const_col_idxs()[3], index_type{2});
+    }
+
+    static void assert_equal_to_original_mtx(const BatchEllMtx* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 2);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
+        ASSERT_EQ(m->get_num_stored_elements_per_row(), 3);
+        EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
+        EXPECT_EQ(m->get_const_values()[1], value_type{-1.5});
+        EXPECT_EQ(m->get_const_values()[2], value_type{2.0});
+        EXPECT_EQ(m->get_const_values()[3], value_type{2.5});
+        EXPECT_EQ(m->get_const_values()[4], value_type{3.0});
+        EXPECT_EQ(m->get_const_values()[5], value_type{3.5});
+        EXPECT_EQ(m->get_const_values()[6], value_type{1.0});
+        EXPECT_EQ(m->get_const_values()[7], value_type{1.0});
+        EXPECT_EQ(m->get_const_values()[8], value_type{2.5});
+        EXPECT_EQ(m->get_const_values()[9], value_type{2.0});
+        EXPECT_EQ(m->get_const_values()[10], value_type{3.0});
+        ASSERT_EQ(m->get_const_values()[11], value_type{3.0});
+    }
+
+    static void assert_empty(BatchEllMtx* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 0);
+        ASSERT_EQ(m->get_num_stored_elements(), 0);
+        ASSERT_EQ(m->get_num_stored_elements_per_row(), 0);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<BatchEllMtx> mtx;
+    std::unique_ptr<BatchEllMtx> sp_mtx;
+    std::unique_ptr<EllMtx> ell_mtx;
+    std::unique_ptr<EllMtx> sp_ell_mtx;
+};
+
+TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
+
+
+TYPED_TEST(Ell, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_sparse_mtx(this->sp_mtx.get());
+}
+
+
+TYPED_TEST(Ell, CanBeEmpty)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto empty = BatchEllMtx::create(this->exec);
+
+    this->assert_empty(empty.get());
+    ASSERT_EQ(empty->get_const_values(), nullptr);
+}
+
+
+TYPED_TEST(Ell, CanGetValuesForEntry)
+{
+    using value_type = typename TestFixture::value_type;
+
+    ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0});
+}
+
+
+TYPED_TEST(Ell, CanCreateEllItemView)
+{
+    GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->ell_mtx, 0.0);
+}
+
+
+TYPED_TEST(Ell, CanCreateSpEllItemView)
+{
+    GKO_ASSERT_MTX_NEAR(this->sp_mtx->create_view_for_item(1), this->sp_ell_mtx,
+                        0.0);
+}
+
+
+TYPED_TEST(Ell, CanBeCopied)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto mtx_copy = BatchEllMtx::create(this->exec);
+
+    mtx_copy->copy_from(this->mtx.get());
+
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->get_values()[0] = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Ell, CanBeMoved)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto mtx_copy = BatchEllMtx::create(this->exec);
+
+    this->mtx->move_to(mtx_copy);
+
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Ell, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(Ell, CanBeCleared)
+{
+    this->mtx->clear();
+
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedWithSize)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto m = BatchEllMtx::create(this->exec,
+                                 gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 2);
+    ASSERT_EQ(m->get_num_stored_elements(), 20);
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromExistingData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    // clang-format off
+    value_type values[] = {
+       -1.0,  2.5,
+        0.0,  3.5,
+        1.0,  2.0,
+        0.0,  3.0};
+    index_type col_idxs[] = {
+       0, 1,
+      -1, 2};
+    // clang-format on
+
+    auto m = BatchEllMtx::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2,
+        gko::array<value_type>::view(this->exec, 8, values),
+        gko::array<index_type>::view(this->exec, 4, col_idxs));
+
+    this->assert_equal_to_original_sparse_mtx(m.get());
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromExistingConstData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    // clang-format off
+    value_type values[] = {
+       -1.0,  2.5,
+        0.0,  3.5,
+        1.0,  2.0,
+        0.0,  3.0};
+    index_type col_idxs[] = {
+       0, 1,
+      -1, 2};
+    // clang-format on
+
+    auto m = BatchEllMtx::create_const(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2,
+        gko::array<value_type>::const_view(this->exec, 8, values),
+        gko::array<index_type>::const_view(this->exec, 4, col_idxs));
+
+    this->assert_equal_to_original_sparse_mtx(m.get());
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromEllMatrices)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using EllMtx = typename TestFixture::EllMtx;
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec);
+
+    auto m = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()},
+        mat1->get_num_stored_elements_per_row());
+
+    this->assert_equal_to_original_sparse_mtx(m.get());
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using EllMtx = typename TestFixture::EllMtx;
+    auto mat1 =
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
+    auto bat_m = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()},
+        mat1->get_num_stored_elements_per_row());
+
+    auto m = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using EllMtx = typename TestFixture::EllMtx;
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
+
+    auto m = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()},
+        mat1->get_num_stored_elements_per_row());
+    auto m_ref = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec,
+        std::vector<EllMtx*>{mat1.get(), mat2.get(), mat1.get(), mat2.get(),
+                             mat1.get(), mat2.get()},
+        mat1->get_num_stored_elements_per_row());
+
+    auto m2 = gko::batch::duplicate<BatchEllMtx>(
+        this->exec, 3, m.get(), mat1->get_num_stored_elements_per_row());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+}
+
+
+TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices)
+{
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using EllMtx = typename TestFixture::EllMtx;
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec);
+
+    auto ell_mats = gko::batch::unbatch<BatchEllMtx>(this->sp_mtx.get());
+
+    GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.);
+}
+
+
+TYPED_TEST(Ell, CanBeListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using EllMtx = typename TestFixture::EllMtx;
+
+    auto m = gko::batch::initialize<BatchEllMtx>({{0.0, -1.0}, {0.0, -5.0}},
+                                                 this->exec);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 1);
+    EXPECT_EQ(m->get_values()[0], value_type{0.0});
+    EXPECT_EQ(m->get_values()[1], value_type{-1.0});
+    EXPECT_EQ(m->get_values()[2], value_type{0.0});
+    EXPECT_EQ(m->get_values()[3], value_type{-5.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{-1});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{0});
+}
+
+
+TYPED_TEST(Ell, CanBeListConstructedByCopies)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto m = gko::batch::initialize<BatchEllMtx>(2, I<value_type>({0.0, -1.0}),
+                                                 this->exec, 1);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 1);
+    EXPECT_EQ(m->get_values()[0], value_type{0.0});
+    EXPECT_EQ(m->get_values()[1], value_type{-1.0});
+    EXPECT_EQ(m->get_values()[2], value_type{0.0});
+    EXPECT_EQ(m->get_values()[3], value_type{-1.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{-1});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{0});
+}
+
+
+TYPED_TEST(Ell, CanBeDoubleListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using T = value_type;
+
+    auto m = gko::batch::initialize<BatchEllMtx>(
+        // clang-format off
+        {{I<T>{1.0, 0.0, 0.0},
+          I<T>{2.0, 0.0, 3.0},
+          I<T>{3.0, 6.0, 0.0}},
+         {I<T>{1.0, 0.0, 0.0},
+          I<T>{3.0, 0.0, -2.0},
+          I<T>{5.0, 8.0, 0.0}}},
+        // clang-format on
+        this->exec, 2);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
+    ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 2);
+    EXPECT_EQ(m->get_values()[0], value_type{1.0});
+    EXPECT_EQ(m->get_values()[1], value_type{2.0});
+    EXPECT_EQ(m->get_values()[2], value_type{3.0});
+    EXPECT_EQ(m->get_values()[3], value_type{0.0});
+    EXPECT_EQ(m->get_values()[4], value_type{3.0});
+    EXPECT_EQ(m->get_values()[5], value_type{6.0});
+    EXPECT_EQ(m->get_values()[6], value_type{1.0});
+    EXPECT_EQ(m->get_values()[7], value_type{3.0});
+    EXPECT_EQ(m->get_values()[8], value_type{5.0});
+    EXPECT_EQ(m->get_values()[9], value_type{0.0});
+    EXPECT_EQ(m->get_values()[10], value_type{-2.0});
+    EXPECT_EQ(m->get_values()[11], value_type{8.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[2], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[3], index_type{-1});
+    EXPECT_EQ(m->get_col_idxs()[4], index_type{2});
+    EXPECT_EQ(m->get_col_idxs()[5], index_type{1});
+}
+
+
+TYPED_TEST(Ell, CanBeReadFromMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 3}, {{0, 0, -1.0}, {1, 1, 2.5}, {1, 2, 3.5}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
+
+    auto m = gko::batch::read<value_type, index_type, BatchEllMtx>(this->exec,
+                                                                   vec_data, 2);
+
+    this->assert_equal_to_original_sparse_mtx(m.get());
+}
+
+
+TYPED_TEST(Ell, ThrowsForDataWithDifferentNnz)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(
+        gko::matrix_data<value_type, index_type>({2, 3}, {
+                                                             {0, 0, -1.0},
+                                                             {1, 1, 2.5},
+                                                             {1, 2, 0.5},
+                                                             {2, 2, -3.0},
+                                                         }));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
+
+    EXPECT_THROW(
+        gko::batch::detail::assert_same_sparsity_in_batched_data(vec_data),
+        gko::NotImplemented);
+}
+
+
+TYPED_TEST(Ell, ThrowsForDataWithDifferentSparsity)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(
+        gko::matrix_data<value_type, index_type>({2, 3}, {
+                                                             {0, 0, -1.0},
+                                                             {1, 1, 2.5},
+                                                             {2, 2, -3.0},
+                                                         }));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
+
+    EXPECT_THROW(
+        gko::batch::detail::assert_same_sparsity_in_batched_data(vec_data),
+        gko::NotImplemented);
+}
+
+
+TYPED_TEST(Ell, GeneratesCorrectMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
+
+    auto data = gko::batch::write<value_type, index_type, BatchEllMtx>(
+        this->sp_mtx.get());
+
+    ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[0].nonzeros.size(), 3);
+    EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0}));
+    EXPECT_EQ(data[0].nonzeros[1], tpl(1, 1, value_type{2.5}));
+    EXPECT_EQ(data[0].nonzeros[2], tpl(1, 2, value_type{3.5}));
+    ASSERT_EQ(data[1].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[1].nonzeros.size(), 3);
+    EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[1], tpl(1, 1, value_type{2.0}));
+    EXPECT_EQ(data[1].nonzeros[2], tpl(1, 2, value_type{3.0}));
+}
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 4cf9d4973e2..5b1fa60ed36 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -82,13 +82,23 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
     auto result = MatrixType::create(
         exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
         std::forward<MatrixArgs>(args)...);
+    auto sp_mat = generate_random_device_matrix_data<value_type, index_type>(
+        num_rows, num_cols, nonzero_dist, value_dist, engine,
+        exec->get_master());
+    auto row_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), sp_mat.get_num_elems(),
+                        sp_mat.get_const_row_idxs())
+                        .copy_to_array();
+    auto col_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), sp_mat.get_num_elems(),
+                        sp_mat.get_const_col_idxs())
+                        .copy_to_array();
 
-    // TODO: Need to preserve sparsity pattern across batch items for batched
-    // sparse matrix formats
     for (size_type b = 0; b < num_batch_items; b++) {
         auto rand_mat =
-            generate_random_matrix<typename MatrixType::unbatch_type>(
-                num_rows, num_cols, nonzero_dist, value_dist, engine, exec);
+            fill_random_matrix<typename MatrixType::unbatch_type, index_type>(
+                num_rows, num_cols, row_idxs, col_idxs, value_dist, engine,
+                exec);
         result->create_view_for_item(b)->copy_from(rand_mat.get());
     }
 
diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp
index 6928c5424a5..d5370c6ef6a 100644
--- a/core/test/utils/matrix_generator.hpp
+++ b/core/test/utils/matrix_generator.hpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 
 
+#include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
@@ -54,6 +55,49 @@ namespace gko {
 namespace test {
 
 
+/**
+ * Fills matrix data for a random matrix given a sparsity pattern
+ *
+ * @tparam ValueType  the type for matrix values
+ * @tparam IndexType  the type for row and column indices
+ * @tparam ValueDistribution  type of value distribution
+ * @tparam Engine  type of random engine
+ *
+ * @param num_rows  number of rows
+ * @param num_cols  number of columns
+ * @param row_idxs  the row indices of the matrix
+ * @param col_idxs  the column indices of the matrix
+ * @param value_dist  distribution of matrix values
+ * @param engine  a random engine
+ *
+ * @return the generated matrix_data with entries according to the given
+ *         dimensions and nonzero count and value distributions.
+ */
+template <typename ValueType, typename IndexType, typename ValueDistribution,
+          typename Engine>
+matrix_data<ValueType, IndexType> fill_random_matrix_data(
+    size_type num_rows, size_type num_cols,
+    const gko::array<IndexType>& row_indices,
+    const gko::array<IndexType>& col_indices, ValueDistribution&& value_dist,
+    Engine&& engine)
+{
+    matrix_data<ValueType, IndexType> data{gko::dim<2>{num_rows, num_cols}, {}};
+    auto host_exec = row_indices.get_executor()->get_master();
+    auto host_row_indices = make_temporary_clone(host_exec, &row_indices);
+    auto host_col_indices = make_temporary_clone(host_exec, &col_indices);
+
+    for (int nnz = 0; nnz < row_indices.get_num_elems(); ++nnz) {
+        data.nonzeros.emplace_back(
+            host_row_indices->get_const_data()[nnz],
+            host_col_indices->get_const_data()[nnz],
+            detail::get_rand_value<ValueType>(value_dist, engine));
+    }
+
+    data.ensure_row_major_order();
+    return data;
+}
+
+
 /**
  * Generates matrix data for a random matrix.
  *
@@ -156,6 +200,49 @@ generate_random_device_matrix_data(gko::size_type num_rows,
 }
 
 
+/**
+ * Fills a random matrix with given sparsity pattern.
+ *
+ * @tparam MatrixType  type of matrix to generate (must implement
+ *                     the interface `ReadableFromMatrixData<>` and provide
+ *                     matching `value_type` and `index_type` type aliases)
+ * @tparam IndexType  the type for row and column indices
+ * @tparam ValueDistribution  type of value distribution
+ * @tparam Engine  type of random engine
+ *
+ * @param num_rows  number of rows
+ * @param num_cols  number of columns
+ * @param row_idxs  the row indices of the matrix
+ * @param col_idxs  the column indices of the matrix
+ * @param value_dist  distribution of matrix values
+ * @param exec  executor where the matrix should be allocated
+ * @param args  additional arguments for the matrix constructor
+ *
+ * @return the unique pointer of MatrixType
+ */
+template <typename MatrixType = matrix::Dense<>,
+          typename IndexType = typename MatrixType::index_type,
+          typename ValueDistribution, typename Engine, typename... MatrixArgs>
+std::unique_ptr<MatrixType> fill_random_matrix(
+    size_type num_rows, size_type num_cols,
+    const gko::array<IndexType>& row_idxs,
+    const gko::array<IndexType>& col_idxs, ValueDistribution&& value_dist,
+    Engine&& engine, std::shared_ptr<const Executor> exec, MatrixArgs&&... args)
+{
+    using value_type = typename MatrixType::value_type;
+    using index_type = IndexType;
+
+    GKO_ASSERT(row_idxs.get_num_elems() == col_idxs.get_num_elems());
+    GKO_ASSERT(row_idxs.get_num_elems() <= (num_rows * num_cols));
+    auto result = MatrixType::create(exec, std::forward<MatrixArgs>(args)...);
+    result->read(fill_random_matrix_data<value_type, index_type>(
+        num_rows, num_cols, row_idxs, col_idxs,
+        std::forward<ValueDistribution>(value_dist),
+        std::forward<Engine>(engine)));
+    return result;
+}
+
+
 /**
  * Generates a random matrix.
  *
@@ -163,6 +250,10 @@ generate_random_device_matrix_data(gko::size_type num_rows,
  *                     the interface `ReadableFromMatrixData<>` and provide
  *                     matching `value_type` and `index_type` type aliases)
  *
+ * @param num_rows  number of rows
+ * @param num_cols  number of columns
+ * @param nonzero_dist  distribution of nonzeros per row
+ * @param value_dist  distribution of matrix values
  * @param exec  executor where the matrix should be allocated
  * @param args  additional arguments for the matrix constructor
  *
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index dfa1b2177ee..f5b7932ed39 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -39,6 +39,7 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
     matrix/batch_dense_kernels.cu
+    matrix/batch_ell_kernels.cu
     matrix/coo_kernels.cu
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index dd82e15b8cc..c693a3ae861 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -36,7 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <thrust/functional.h>
 
 
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu
new file mode 100644
index 00000000000..5cadd7755a2
--- /dev/null
+++ b/cuda/matrix/batch_ell_kernels.cu
@@ -0,0 +1,85 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <thrust/functional.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/thrust.cuh"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+// clang-format off
+
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+
+
+#include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
+
+// clang-format on
+
+
+}  // namespace batch_ell
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 73712a7b81b..4a2a1835961 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -87,6 +88,40 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const cuda_type<ValueType>, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {as_cuda_type(op->get_const_values()),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<cuda_type<ValueType>, IndexType>
+get_batch_struct(batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {as_cuda_type(op->get_values()),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 9990496c98f..9c2e799ede9 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -37,6 +37,7 @@ target_sources(ginkgo_dpcpp
     factorization/par_ilut_spgeam_kernel.dp.cpp
     factorization/par_ilut_sweep_kernel.dp.cpp
     matrix/batch_dense_kernels.dp.cpp
+    matrix/batch_ell_kernels.dp.cpp
     matrix/coo_kernels.dp.cpp
     matrix/csr_kernels.dp.cpp
     matrix/fbcsr_kernels.dp.cpp
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
new file mode 100644
index 00000000000..5a69bbd3d5d
--- /dev/null
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -0,0 +1,174 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <CL/sycl.hpp>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "dpcpp/base/batch_struct.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+#include "dpcpp/base/helper.hpp"
+#include "dpcpp/components/cooperative_groups.dp.hpp"
+#include "dpcpp/components/intrinsics.dp.hpp"
+#include "dpcpp/components/reduction.dp.hpp"
+#include "dpcpp/components/thread_ids.dp.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+#include "dpcpp/matrix/batch_ell_kernels.hpp.inc"
+
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const size_type num_rows = mat->get_common_size()[0];
+    const size_type num_cols = mat->get_common_size()[1];
+
+    const auto num_batch_items = mat->get_num_batch_items();
+    auto device = exec->get_queue()->get_device();
+    // TODO: use runtime selection of group size based on num_rows.
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+    const auto x_ub = get_batch_struct(x);
+    const auto b_ub = get_batch_struct(b);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+                });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto mat_ub = get_batch_struct(mat);
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    const auto num_batch_items = mat_ub.num_batch_items;
+    auto device = exec->get_queue()->get_device();
+    // TODO: use runtime selection of group size based on num_rows.
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                          item_ct1);
+                });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
new file mode 100644
index 00000000000..64d71710dbb
--- /dev/null
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,81 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType, typename IndexType>
+__dpct_inline__ void simple_apply_kernel(
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
+         tidx += item_ct1.get_local_range().size()) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
+            const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
+            if (col_idx == invalid_index<IndexType>()) {
+                break;
+            } else {
+                temp += mat.values[tidx + idx * mat.stride] *
+                        b.values[col_idx * b.stride];
+            }
+        }
+        x.values[tidx * x.stride] = temp;
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+__dpct_inline__ void advanced_apply_kernel(
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
+         tidx += item_ct1.get_local_range().size()) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
+            const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
+            if (col_idx == invalid_index<IndexType>()) {
+                break;
+            } else {
+                temp += mat.values[tidx + idx * mat.stride] *
+                        b.values[col_idx * b.stride];
+            }
+        }
+        x.values[tidx * x.stride] =
+            alpha.values[0] * temp + beta.values[0] * x.values[tidx * x.stride];
+    }
+}
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index b0393daf55d..fe04407d82d 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -86,6 +87,40 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const ValueType, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {op->get_const_values(),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<ValueType, IndexType> get_batch_struct(
+    batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {op->get_values(),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 21b573b6cd0..ccc88769a4e 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -36,6 +36,7 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
     matrix/batch_dense_kernels.hip.cpp
+    matrix/batch_ell_kernels.hip.cpp
     matrix/coo_kernels.hip.cpp
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index eb3da83760a..3361feeb8b8 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp
new file mode 100644
index 00000000000..96e7cdb298e
--- /dev/null
+++ b/hip/matrix/batch_ell_kernels.hip.cpp
@@ -0,0 +1,86 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <hip/hip_runtime.h>
+#include <thrust/functional.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "hip/base/batch_struct.hip.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+#include "hip/components/thread_ids.hip.hpp"
+#include "hip/components/uninitialized_array.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+// clang-format off
+
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+
+
+#include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
+
+// clang-format on
+
+
+}  // namespace batch_ell
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index 4670cf0988b..e35f13f1249 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -87,6 +88,40 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const hip_type<ValueType>, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {as_hip_type(op->get_const_values()),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<hip_type<ValueType>, IndexType>
+get_batch_struct(batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {as_hip_type(op->get_values()),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 61dffba3193..405603269ff 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -52,14 +52,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch {
-namespace matrix {
-
-
-template <typename ValueType>
-class Dense;
-
-
-}
 
 
 /**
@@ -90,21 +82,17 @@ class MultiVector
     : public EnablePolymorphicObject<MultiVector<ValueType>>,
       public EnablePolymorphicAssignment<MultiVector<ValueType>>,
       public EnableCreateMethod<MultiVector<ValueType>>,
-      public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
-      public ConvertibleTo<matrix::Dense<ValueType>> {
+      public ConvertibleTo<MultiVector<next_precision<ValueType>>> {
     friend class EnableCreateMethod<MultiVector>;
     friend class EnablePolymorphicObject<MultiVector>;
     friend class MultiVector<to_complex<ValueType>>;
     friend class MultiVector<next_precision<ValueType>>;
-    friend class matrix::Dense<ValueType>;
 
 public:
     using EnablePolymorphicAssignment<MultiVector>::convert_to;
     using EnablePolymorphicAssignment<MultiVector>::move_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::move_to;
-    using ConvertibleTo<matrix::Dense<ValueType>>::convert_to;
-    using ConvertibleTo<matrix::Dense<ValueType>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
@@ -126,10 +114,6 @@ class MultiVector
 
     void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
-    void convert_to(matrix::Dense<ValueType>* result) const override;
-
-    void move_to(matrix::Dense<ValueType>* result) override;
-
     /**
      * Creates a mutable view (of matrix::Dense type) of one item of the Batch
      * MultiVector object. Does not perform any deep copies, but only returns a
@@ -462,224 +446,6 @@ class MultiVector
 };
 
 
-/**
- * Creates and initializes a batch of single column-vectors.
- *
- * This function first creates a temporary MultiVector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                 interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param vals  values used to initialize the batch vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup MultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    size_type num_batch_items = vals.size();
-    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
-    auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
-    auto common_size = dim<2>(common_num_rows, 1);
-    for (auto& val : vals) {
-        GKO_ASSERT_EQ(common_num_rows, val.size());
-    }
-    auto b_size = batch_dim<2>(num_batch_items, common_size);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    size_type batch = 0;
-    for (const auto& b : vals) {
-        size_type idx = 0;
-        for (const auto& elem : b) {
-            tmp->at(batch, idx) = elem;
-            ++idx;
-        }
-        ++batch;
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
-/**
- * Creates and initializes a batch of multi-vectors.
- *
- * This function first creates a temporary MultiVector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup MultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    std::initializer_list<std::initializer_list<
-        std::initializer_list<typename Matrix::value_type>>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    size_type num_batch_items = vals.size();
-    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
-    auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
-    size_type common_num_cols =
-        vals_begin->begin() ? vals_begin->begin()->size() : 0;
-    auto common_size = dim<2>(common_num_rows, common_num_cols);
-    for (const auto& b : vals) {
-        auto num_rows = b.size();
-        auto num_cols = begin(b)->size();
-        auto b_size = dim<2>(num_rows, num_cols);
-        GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
-    }
-
-    auto b_size = batch_dim<2>(num_batch_items, common_size);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    size_type batch = 0;
-    for (const auto& b : vals) {
-        size_type ridx = 0;
-        for (const auto& row : b) {
-            size_type cidx = 0;
-            for (const auto& elem : row) {
-                tmp->at(batch, ridx, cidx) = elem;
-                ++cidx;
-            }
-            ++ridx;
-        }
-        ++batch;
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
-/**
- * Creates and initializes a batch single column-vector by making copies of the
- * single input column vector.
- *
- * This function first creates a temporary batch multi-vector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                  interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param num_vectors  The number of times the input vector is to be duplicated
- * @param vals  values used to initialize each vector in the temp. batch
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup MultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    const size_type num_vectors,
-    std::initializer_list<typename Matrix::value_type> vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    size_type num_batch_items = num_vectors;
-    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
-                         "Input data is empty");
-    auto b_size =
-        batch_dim<2>(num_batch_items, dim<2>(begin(vals) ? vals.size() : 0, 1));
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    for (size_type batch = 0; batch < num_vectors; batch++) {
-        size_type idx = 0;
-        for (const auto& elem : vals) {
-            tmp->at(batch, idx) = elem;
-            ++idx;
-        }
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
-/**
- * Creates and initializes a matrix from copies of a given matrix.
- *
- * This function first creates a temporary batch multi-vector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                  interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param num_batch_items The number of times the input matrix is duplicated
- * @param vals  values used to initialize each vector in the temp. batch
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup LinOp
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    const size_type num_batch_items,
-    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
-                         "Input data is empty");
-    auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
-                              begin(vals) ? begin(vals)->size() : 0);
-    batch_dim<2> b_size(num_batch_items, common_size);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    for (size_type batch = 0; batch < num_batch_items; batch++) {
-        size_type ridx = 0;
-        for (const auto& row : vals) {
-            size_type cidx = 0;
-            for (const auto& elem : row) {
-                tmp->at(batch, ridx, cidx) = elem;
-                ++cidx;
-            }
-            ++ridx;
-        }
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
 }  // namespace batch
 }  // namespace gko
 
diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp
index 68b5da6e3eb..f5a75c7448e 100644
--- a/include/ginkgo/core/base/types.hpp
+++ b/include/ginkgo/core/base/types.hpp
@@ -531,6 +531,22 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x,
     template _macro(double, int64)
 #endif
 
+#if GINKGO_DPCPP_SINGLE_MODE
+#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) \
+    template _macro(float, int32);                            \
+    template <>                                               \
+    _macro(double, int32) GKO_NOT_IMPLEMENTED;                \
+    template _macro(std::complex<float>, int32);              \
+    template <>                                               \
+    _macro(std::complex<double>, int32) GKO_NOT_IMPLEMENTED
+#else
+#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) \
+    template _macro(float, int32);                            \
+    template _macro(double, int32);                           \
+    template _macro(std::complex<float>, int32);              \
+    template _macro(std::complex<double>, int32)
+#endif
+
 
 /**
  * Instantiates a template for each value and index type compiled by Ginkgo.
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 7f3ce5890e4..47230c24e32 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -93,15 +93,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     using absolute_type = remove_complex<Dense>;
     using complex_type = to_complex<Dense>;
 
-    /**
-     * Creates a Dense matrix with the configuration of another Dense
-     * matrix.
-     *
-     * @param other  The other matrix whose configuration needs to copied.
-     */
-    static std::unique_ptr<Dense> create_with_config_of(
-        ptr_param<const Dense> other);
-
     void convert_to(Dense<next_precision<ValueType>>* result) const override;
 
     void move_to(Dense<next_precision<ValueType>>* result) override;
@@ -275,11 +266,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      * @param b  the multi-vector to be applied to
      * @param x  the output multi-vector
      */
-    void apply(const MultiVector<value_type>* b,
-               MultiVector<value_type>* x) const
-    {
-        this->apply_impl(b, x);
-    }
+    Dense* apply(ptr_param<const MultiVector<value_type>> b,
+                 ptr_param<MultiVector<value_type>> x);
 
     /**
      * Apply the matrix to a multi-vector with a linear combination of the given
@@ -291,13 +279,26 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      * @param beta   the scalar to scale the x vector with
      * @param x      the output multi-vector
      */
-    void apply(const MultiVector<value_type>* alpha,
-               const MultiVector<value_type>* b,
-               const MultiVector<value_type>* beta,
-               MultiVector<value_type>* x) const
-    {
-        this->apply_impl(alpha, b, beta, x);
-    }
+    Dense* apply(ptr_param<const MultiVector<value_type>> alpha,
+                 ptr_param<const MultiVector<value_type>> b,
+                 ptr_param<const MultiVector<value_type>> beta,
+                 ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
+     */
+    const Dense* apply(ptr_param<const MultiVector<value_type>> b,
+                       ptr_param<MultiVector<value_type>> x) const;
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, const
+     * MultiVector<value_type>*, const MultiVector<value_type>*,
+     * MultiVector<value_type>*)
+     */
+    const Dense* apply(ptr_param<const MultiVector<value_type>> alpha,
+                       ptr_param<const MultiVector<value_type>> b,
+                       ptr_param<const MultiVector<value_type>> beta,
+                       ptr_param<MultiVector<value_type>> x) const;
 
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
@@ -306,7 +307,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
                size.get_common_size()[1];
     }
 
-protected:
     /**
      * Creates an uninitialized Dense matrix of the specified size.
      *
@@ -362,7 +362,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
                                idx % this->get_common_size()[1]);
     }
 
-private:
     array<value_type> values_;
 };
 
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
new file mode 100644
index 00000000000..fa00a0631fd
--- /dev/null
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -0,0 +1,385 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_
+
+
+#include <initializer_list>
+#include <vector>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+
+
+/**
+ * Ell is a sparse matrix format that stores the same number of nonzeros in each
+ * row, enabling coalesced accesses. It is suitable for sparsity patterns that
+ * have a similar number of nonzeros in every row. The values are stored in a
+ * column-major fashion similar to the monolithic gko::matrix::Ell class.
+ *
+ * Similar to the monolithic gko::matrix::Ell class, invalid_index<IndexType> is
+ * used as the column index for padded zero entries.
+ *
+ * @note It is also assumed that the sparsity pattern of all the items in the
+ * batch is the same and therefore only a single copy of the sparsity pattern is
+ * stored.
+ *
+ * @note Currently only IndexType of int32 is supported.
+ *
+ * @tparam ValueType  value precision of matrix elements
+ * @tparam IndexType  index precision of matrix elements
+ *
+ * @ingroup batch_ell
+ * @ingroup mat_formats
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision, typename IndexType = int32>
+class Ell final
+    : public EnableBatchLinOp<Ell<ValueType, IndexType>>,
+      public EnableCreateMethod<Ell<ValueType, IndexType>>,
+      public ConvertibleTo<Ell<next_precision<ValueType>, IndexType>> {
+    friend class EnableCreateMethod<Ell>;
+    friend class EnablePolymorphicObject<Ell, BatchLinOp>;
+    friend class Ell<to_complex<ValueType>, IndexType>;
+    friend class Ell<next_precision<ValueType>, IndexType>;
+    static_assert(std::is_same<IndexType, int32>::value,
+                  "IndexType must be a 32 bit integer");
+
+public:
+    using EnableBatchLinOp<Ell>::convert_to;
+    using EnableBatchLinOp<Ell>::move_to;
+
+    using value_type = ValueType;
+    using index_type = IndexType;
+    using unbatch_type = gko::matrix::Ell<value_type, index_type>;
+    using absolute_type = remove_complex<Ell>;
+    using complex_type = to_complex<Ell>;
+
+    void convert_to(
+        Ell<next_precision<ValueType>, IndexType>* result) const override;
+
+    void move_to(Ell<next_precision<ValueType>, IndexType>* result) override;
+
+    /**
+     * Creates a mutable view (of matrix::Ell type) of one item of the
+     * batch::matrix::Ell<value_type> object. Does not perform any deep
+     * copies, but only returns a view of the data.
+     *
+     * @param item_id  The index of the batch item
+     *
+     * @return  a batch::matrix::Ell object with the data from the batch item
+     * at the given index.
+     */
+    std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
+
+    /**
+     * @copydoc create_view_for_item(size_type)
+     */
+    std::unique_ptr<const unbatch_type> create_const_view_for_item(
+        size_type item_id) const;
+
+    /**
+     * Returns a pointer to the array of values of the matrix
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values() noexcept { return values_.get_data(); }
+
+    /**
+     * @copydoc get_values()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values() const noexcept
+    {
+        return values_.get_const_data();
+    }
+
+    /**
+     * Returns a pointer to the array of column indices of the matrix
+     *
+     * @return the pointer to the array of column indices
+     */
+    index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
+
+    /**
+     * @copydoc get_col_idxs()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const index_type* get_const_col_idxs() const noexcept
+    {
+        return col_idxs_.get_const_data();
+    }
+
+    /**
+     * Returns the number of elements per row explicitly stored.
+     *
+     * @return the number of elements stored in each row of the ELL matrix. Same
+     * for each batch item
+     */
+    index_type get_num_stored_elements_per_row() const noexcept
+    {
+        return num_elems_per_row_;
+    }
+
+    /**
+     * Returns the number of elements explicitly stored in the batch matrix,
+     * cumulative across all the batch items.
+     *
+     * @return the number of elements explicitly stored in the vector,
+     *         cumulative across all the batch items
+     */
+    size_type get_num_stored_elements() const noexcept
+    {
+        return values_.get_num_elems();
+    }
+
+    /**
+     * Returns the number of stored elements in each batch item.
+     *
+     * @return the number of stored elements per batch item.
+     */
+    size_type get_num_elements_per_item() const noexcept
+    {
+        return this->get_num_stored_elements() / this->get_num_batch_items();
+    }
+
+    /**
+     * Returns a pointer to the array of col_idxs of the matrix. This is shared
+     * across all batch items.
+     *
+     * @param batch_id  the id of the batch item.
+     *
+     * @return the pointer to the array of col_idxs
+     */
+    index_type* get_col_idxs_for_item(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return col_idxs_.get_data();
+    }
+
+    /**
+     * @copydoc get_col_idxs_for_item(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const index_type* get_const_col_idxs_for_item(
+        size_type batch_id) const noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return col_idxs_.get_const_data();
+    }
+
+    /**
+     * Returns a pointer to the array of values of the matrix for a
+     * specific batch item.
+     *
+     * @param batch_id  the id of the batch item.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values_for_item(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data() +
+               batch_id * this->get_num_elements_per_item();
+    }
+
+    /**
+     * @copydoc get_values_for_item(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data() +
+               batch_id * this->get_num_elements_per_item();
+    }
+
+    /**
+     * Creates a constant (immutable) batch ell matrix from a constant
+     * array. The column indices array needs to be the same for all batch items.
+     *
+     * @param exec  the executor to create the matrix on
+     * @param size  the dimensions of the matrix
+     * @param num_elems_per_row  the number of elements to be stored in each row
+     * @param values  the value array of the matrix
+     * @param col_idxs the col_idxs array of a single batch item of the matrix.
+     *
+     * @return A smart pointer to the constant matrix wrapping the input
+     * array (if it resides on the same executor as the matrix) or a copy of the
+     * array on the correct executor.
+     */
+    static std::unique_ptr<const Ell> create_const(
+        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+        const index_type num_elems_per_row,
+        gko::detail::const_array_view<value_type>&& values,
+        gko::detail::const_array_view<index_type>&& col_idxs);
+
+    /**
+     * Apply the matrix to a multi-vector. Represents the matrix vector
+     * multiplication, x = A * b, where x and b are both multi-vectors.
+     *
+     * @param b  the multi-vector to be applied to
+     * @param x  the output multi-vector
+     */
+    Ell* apply(ptr_param<const MultiVector<value_type>> b,
+               ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * Apply the matrix to a multi-vector with a linear combination of the given
+     * input vector. Represents the matrix vector multiplication, x = alpha * A
+     * * b + beta * x, where x and b are both multi-vectors.
+     *
+     * @param alpha  the scalar to scale the matrix-vector product with
+     * @param b      the multi-vector to be applied to
+     * @param beta   the scalar to scale the x vector with
+     * @param x      the output multi-vector
+     */
+    Ell* apply(ptr_param<const MultiVector<value_type>> alpha,
+               ptr_param<const MultiVector<value_type>> b,
+               ptr_param<const MultiVector<value_type>> beta,
+               ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
+     */
+    const Ell* apply(ptr_param<const MultiVector<value_type>> b,
+                     ptr_param<MultiVector<value_type>> x) const;
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, const
+     * MultiVector<value_type>*, const MultiVector<value_type>*,
+     * MultiVector<value_type>*)
+     */
+    const Ell* apply(ptr_param<const MultiVector<value_type>> alpha,
+                     ptr_param<const MultiVector<value_type>> b,
+                     ptr_param<const MultiVector<value_type>> beta,
+                     ptr_param<MultiVector<value_type>> x) const;
+
+private:
+    size_type compute_num_elems(const batch_dim<2>& size,
+                                IndexType num_elems_per_row)
+    {
+        return size.get_num_batch_items() * size.get_common_size()[0] *
+               num_elems_per_row;
+    }
+
+    /**
+     * Creates an uninitialized Ell matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     * @param num_elems_per_row  the number of elements to be stored in each row
+     */
+    Ell(std::shared_ptr<const Executor> exec,
+        const batch_dim<2>& size = batch_dim<2>{},
+        const IndexType num_elems_per_row = 0);
+
+    /**
+     * Creates a Ell matrix from an already allocated (and initialized)
+     * array. The column indices array needs to be the same for all batch items.
+     *
+     * @tparam ValuesArray  type of array of values
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     * @param num_elems_per_row  the number of elements to be stored in each row
+     * @param values  array of matrix values
+     * @param col_idxs the col_idxs array of a single batch item of the matrix.
+     *
+     * @note If `values` is not an rvalue, not an array of ValueType, or is on
+     *       the wrong executor, an internal copy will be created, and the
+     *       original array data will not be used in the matrix.
+     */
+    template <typename ValuesArray, typename IndicesArray>
+    Ell(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+        const IndexType num_elems_per_row, ValuesArray&& values,
+        IndicesArray&& col_idxs)
+        : EnableBatchLinOp<Ell>(exec, size),
+          num_elems_per_row_{num_elems_per_row},
+          values_{exec, std::forward<ValuesArray>(values)},
+          col_idxs_{exec, std::forward<IndicesArray>(col_idxs)}
+    {
+        // Ensure that the value and col_idxs arrays have the correct size
+        auto num_elems = this->get_common_size()[0] * num_elems_per_row *
+                         this->get_num_batch_items();
+        GKO_ASSERT_EQ(num_elems, values_.get_num_elems());
+        GKO_ASSERT_EQ(this->get_num_elements_per_item(),
+                      col_idxs_.get_num_elems());
+    }
+
+    void apply_impl(const MultiVector<value_type>* b,
+                    MultiVector<value_type>* x) const;
+
+    void apply_impl(const MultiVector<value_type>* alpha,
+                    const MultiVector<value_type>* b,
+                    const MultiVector<value_type>* beta,
+                    MultiVector<value_type>* x) const;
+
+    index_type num_elems_per_row_;
+    array<value_type> values_;
+    array<index_type> col_idxs_;
+};
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 8bb29242e88..ad90e264189 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -109,6 +109,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/log/stream.hpp>
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index d87399492f5..aa8e30cd590 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -24,6 +24,7 @@ target_sources(ginkgo_omp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
     matrix/batch_dense_kernels.cpp
+    matrix/batch_ell_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
index 2d0b7ed4d40..b91a4133dba 100644
--- a/omp/matrix/batch_dense_kernels.cpp
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -36,8 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..17710a97366
--- /dev/null
+++ b/omp/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,117 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+#include "reference/matrix/batch_ell_kernels.hpp.inc"
+
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 37498588ca7..21dfc0dfb5a 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -26,6 +26,7 @@ target_sources(ginkgo_reference
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
     matrix/batch_dense_kernels.cpp
+    matrix/batch_ell_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
index 3d7ef03a3bd..87d73bb8e34 100644
--- a/reference/matrix/batch_dense_kernels.cpp
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -36,9 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..1d3a0e1ef94
--- /dev/null
+++ b/reference/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,115 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+#include "reference/matrix/batch_ell_kernels.hpp.inc"
+
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc
new file mode 100644
index 00000000000..979df1a19bd
--- /dev/null
+++ b/reference/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,80 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType, typename IndexType>
+inline void simple_apply_kernel(
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
+{
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int j = 0; j < c.num_rhs; ++j) {
+            c.values[row * c.stride + j] = zero<ValueType>();
+        }
+        for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
+            auto val = a.values[row + k * a.stride];
+            auto col = a.col_idxs[row + k * a.stride];
+            if (col != invalid_index<IndexType>()) {
+                for (int j = 0; j < c.num_rhs; ++j) {
+                    c.values[row * c.stride + j] +=
+                        val * b.values[col * b.stride + j];
+                }
+            }
+        }
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+inline void advanced_apply_kernel(
+    const ValueType alpha,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const ValueType beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
+{
+    for (int row = 0; row < a.num_rows; ++row) {
+        for (int j = 0; j < c.num_rhs; ++j) {
+            c.values[row * c.stride + j] *= beta;
+        }
+        for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
+            auto val = a.values[row + k * a.stride];
+            auto col = a.col_idxs[row + k * a.stride];
+            if (col != invalid_index<IndexType>()) {
+                for (int j = 0; j < b.num_rhs; ++j) {
+                    c.values[row * c.stride + j] +=
+                        alpha * val * b.values[col * b.stride + j];
+                }
+            }
+        }
+    }
+}
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 483d7717718..bb7680d1493 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -90,6 +91,40 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const ValueType, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {op->get_const_values(),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<ValueType, IndexType> get_batch_struct(
+    batch::matrix::Ell<ValueType, IndexType>* const op)
+{
+    return {op->get_values(),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt
index 18634de662d..05498cbadc4 100644
--- a/reference/test/matrix/CMakeLists.txt
+++ b/reference/test/matrix/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_test(batch_dense_kernels)
+ginkgo_create_test(batch_ell_kernels)
 ginkgo_create_test(coo_kernels)
 ginkgo_create_test(csr_kernels)
 ginkgo_create_test(dense_kernels)
diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..81f189c3e02
--- /dev/null
+++ b/reference/test/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,217 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <complex>
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+#include "core/matrix/batch_ell_kernels.hpp"
+#include "core/test/utils.hpp"
+
+
+template <typename T>
+class Ell : public ::testing::Test {
+protected:
+    using value_type = T;
+    using size_type = gko::size_type;
+    using BMtx = gko::batch::matrix::Ell<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
+    using EllMtx = gko::matrix::Ell<value_type>;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    Ell()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx_0(gko::batch::initialize<BMtx>(
+              {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
+               {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
+              exec)),
+          mtx_00(gko::initialize<EllMtx>(
+              {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
+          mtx_01(gko::initialize<EllMtx>(
+              {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
+          b_0(gko::batch::initialize<BMVec>(
+              {{I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})},
+               {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})}},
+              exec)),
+          b_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          b_01(gko::initialize<DenseMtx>(
+              {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          x_0(gko::batch::initialize<BMVec>(
+              {{I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})},
+               {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}},
+              exec)),
+          x_00(gko::initialize<DenseMtx>(
+              {I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})}, exec)),
+          x_01(gko::initialize<DenseMtx>(
+              {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}, exec))
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> exec;
+    std::unique_ptr<BMtx> mtx_0;
+    std::unique_ptr<EllMtx> mtx_00;
+    std::unique_ptr<EllMtx> mtx_01;
+    std::unique_ptr<BMVec> b_0;
+    std::unique_ptr<DenseMtx> b_00;
+    std::unique_ptr<DenseMtx> b_01;
+    std::unique_ptr<BMVec> x_0;
+    std::unique_ptr<DenseMtx> x_00;
+    std::unique_ptr<DenseMtx> x_01;
+
+    std::ranlux48 rand_engine;
+};
+
+
+TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
+
+
+TYPED_TEST(Ell, AppliesToBatchMultiVector)
+{
+    using T = typename TestFixture::value_type;
+
+    this->mtx_0->apply(this->b_0.get(), this->x_0.get());
+
+    this->mtx_00->apply(this->b_00.get(), this->x_00.get());
+    this->mtx_01->apply(this->b_01.get(), this->x_01.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r<T>::value);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r<T>::value);
+}
+
+
+TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
+{
+    using BMtx = typename TestFixture::BMtx;
+    using BMVec = typename TestFixture::BMVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<BMVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<BMVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
+                       this->x_0.get());
+
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r<T>::value);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r<T>::value);
+}
+
+
+TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+
+    ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+    auto alpha =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+    auto beta =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, AdvancedApplyFailsOnWrongAlphaDimension)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
+    auto alpha =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
+    auto beta =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index 9f3b17cd858..a03a0a0bb4e 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_common_test(batch_dense_kernels)
+ginkgo_create_common_test(batch_ell_kernels)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)
diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..572f47ba47d
--- /dev/null
+++ b/test/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,143 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+#include "test/utils/executor.hpp"
+
+
+class Ell : public CommonTestFixture {
+protected:
+    using BMtx = gko::batch::matrix::Ell<value_type, gko::int32>;
+    using BMVec = gko::batch::MultiVector<value_type>;
+
+    Ell() : rand_engine(15) {}
+
+    template <typename BMtxType>
+    std::unique_ptr<BMtxType> gen_mtx(const gko::size_type num_batch_items,
+                                      gko::size_type num_rows,
+                                      gko::size_type num_cols,
+                                      int num_elems_per_row)
+    {
+        return gko::test::generate_random_batch_matrix<BMtxType>(
+            num_batch_items, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_elems_per_row,
+                                            num_elems_per_row),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref,
+            num_elems_per_row);
+    }
+
+    std::unique_ptr<BMVec> gen_mvec(const gko::size_type num_batch_items,
+                                    gko::size_type num_rows,
+                                    gko::size_type num_cols)
+    {
+        return gko::test::generate_random_batch_matrix<BMVec>(
+            num_batch_items, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_cols, num_cols),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
+    }
+
+    void set_up_apply_data(gko::size_type num_vecs = 1,
+                           int num_elems_per_row = 5)
+    {
+        const int num_rows = 252;
+        const int num_cols = 32;
+        GKO_ASSERT(num_elems_per_row <= num_cols);
+        mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols, num_elems_per_row);
+        y = gen_mvec(batch_size, num_cols, num_vecs);
+        alpha = gen_mvec(batch_size, 1, 1);
+        beta = gen_mvec(batch_size, 1, 1);
+        dmat = gko::clone(exec, mat);
+        dy = gko::clone(exec, y);
+        dalpha = gko::clone(exec, alpha);
+        dbeta = gko::clone(exec, beta);
+        expected = BMVec::create(
+            ref,
+            gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
+        expected->fill(gko::one<value_type>());
+        dresult = gko::clone(exec, expected);
+    }
+
+    std::ranlux48 rand_engine;
+
+    const size_t batch_size = 11;
+    std::unique_ptr<BMtx> mat;
+    std::unique_ptr<BMVec> y;
+    std::unique_ptr<BMVec> alpha;
+    std::unique_ptr<BMVec> beta;
+    std::unique_ptr<BMVec> expected;
+    std::unique_ptr<BMVec> dresult;
+    std::unique_ptr<BMtx> dmat;
+    std::unique_ptr<BMVec> dy;
+    std::unique_ptr<BMVec> dalpha;
+    std::unique_ptr<BMVec> dbeta;
+};
+
+
+TEST_F(Ell, SingleVectorApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    mat->apply(y.get(), expected.get());
+    dmat->apply(dy.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}
+
+
+TEST_F(Ell, SingleVectorAdvancedApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    mat->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index 7e53ea8f165..c00bb594ecd 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -219,13 +219,20 @@ int main()
         auto test = batch_multi_vector_type::create(exec);
     }
 
-    // core/base/batch_dense.hpp
+    // core/matrix/batch_dense.hpp
     {
         using type1 = float;
         using batch_dense_type = gko::batch::matrix::Dense<type1>;
         auto test = batch_dense_type::create(exec);
     }
 
+    // core/matrix/batch_ell.hpp
+    {
+        using type1 = float;
+        using batch_ell_type = gko::batch::matrix::Ell<type1>;
+        auto test = batch_ell_type::create(exec);
+    }
+
     // core/base/combination.hpp
     {
         using type1 = int;