diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc new file mode 100644 index 00000000000..24cd24d1bf7 --- /dev/null +++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc @@ -0,0 +1,117 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +template +void scale(std::shared_ptr exec, + const BatchMultiVector* const alpha, + BatchMultiVector* const x) +{ + const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; + const auto alpha_ub = get_batch_struct(alpha); + const auto x_ub = get_batch_struct(x); + scale_kernel<<get_stream()>>>( + alpha_ub, x_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL); + + +template +void add_scaled(std::shared_ptr exec, + const BatchMultiVector* const alpha, + const BatchMultiVector* const x, + BatchMultiVector* const y) +{ + const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; + const size_type nrhs = x->get_common_size()[1]; + const auto alpha_ub = get_batch_struct(alpha); + const auto x_ub = get_batch_struct(x); + const auto y_ub = get_batch_struct(y); + add_scaled_kernel<<get_stream()>>>(alpha_ub, x_ub, y_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL); + + +template +void compute_dot(std::shared_ptr exec, + const BatchMultiVector* x, + const BatchMultiVector* y, + BatchMultiVector* result) +{ + const auto num_blocks = x->get_num_batch_entries(); + const auto num_rhs = x->get_common_size()[1]; + const auto x_ub = get_batch_struct(x); + const auto y_ub = get_batch_struct(y); + const auto res_ub = get_batch_struct(result); + compute_dot_product_kernel<<get_stream()>>>(x_ub, y_ub, res_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL); + + +template +void compute_norm2(std::shared_ptr exec, + const BatchMultiVector* const x, + BatchMultiVector>* const result) +{ + const auto num_blocks = x->get_num_batch_entries(); + const auto num_rhs = x->get_common_size()[1]; + const auto x_ub = get_batch_struct(x); + const auto res_ub = get_batch_struct(result); + compute_norm2_kernel<<get_stream()>>>(x_ub, res_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL); + + +template +void copy(std::shared_ptr exec, + const BatchMultiVector* x, + BatchMultiVector* result) +{ + const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; + const auto result_ub = get_batch_struct(result); + const auto x_ub = get_batch_struct(x); + copy_kernel<<get_stream()>>>( + x_ub, result_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL); diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu index 8bfb6fc0167..e7c57111463 100644 --- a/cuda/base/batch_multi_vector_kernels.cu +++ b/cuda/base/batch_multi_vector_kernels.cu @@ -62,90 +62,10 @@ namespace batch_multi_vector { constexpr auto default_block_size = 256; constexpr int sm_multiplier = 4; - +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc" - -template -void scale(std::shared_ptr exec, - const BatchMultiVector* const alpha, - BatchMultiVector* const x) -{ - const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; - const auto alpha_ub = get_batch_struct(alpha); - const auto x_ub = get_batch_struct(x); - scale_kernel<<>>(alpha_ub, x_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL); - - -template -void add_scaled(std::shared_ptr exec, - const BatchMultiVector* const alpha, - const BatchMultiVector* const x, - BatchMultiVector* const y) -{ - const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; - const size_type nrhs = x->get_common_size()[1]; - const auto alpha_ub = get_batch_struct(alpha); - const auto x_ub = get_batch_struct(x); - const auto y_ub = get_batch_struct(y); - add_scaled_kernel<<>>(alpha_ub, x_ub, y_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL); - - -template -void compute_dot(std::shared_ptr exec, - const BatchMultiVector* x, - const BatchMultiVector* y, - BatchMultiVector* result) -{ - const auto num_blocks = x->get_num_batch_entries(); - const auto num_rhs = x->get_common_size()[1]; - const auto x_ub = get_batch_struct(x); - const auto y_ub = get_batch_struct(y); - const auto res_ub = get_batch_struct(result); - compute_dot_product_kernel<<>>(x_ub, y_ub, - res_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL); - - -template -void compute_norm2(std::shared_ptr exec, - const BatchMultiVector* const x, - BatchMultiVector>* const result) -{ - const auto num_blocks = x->get_num_batch_entries(); - const auto num_rhs = x->get_common_size()[1]; - const auto x_ub = get_batch_struct(x); - const auto res_ub = get_batch_struct(result); - compute_norm2_kernel<<>>(x_ub, res_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL); - - -template -void copy(std::shared_ptr exec, - const BatchMultiVector* x, - BatchMultiVector* result) -{ - const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; - const auto result_ub = get_batch_struct(result); - const auto x_ub = get_batch_struct(x); - copy_kernel<<>>(x_ub, result_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL); +#include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc" } // namespace batch_multi_vector diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp index 50f8593ffec..a8f0f8a7cd6 100644 --- a/hip/base/batch_multi_vector_kernels.hip.cpp +++ b/hip/base/batch_multi_vector_kernels.hip.cpp @@ -66,93 +66,10 @@ constexpr auto default_block_size = 256; constexpr int sm_multiplier = 4; +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc" - -template -void scale(std::shared_ptr exec, - const BatchMultiVector* const alpha, - BatchMultiVector* const x) -{ - const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; - const auto alpha_ub = get_batch_struct(alpha); - const auto x_ub = get_batch_struct(x); - hipLaunchKernelGGL(scale_kernel, dim3(num_blocks), dim3(default_block_size), - 0, 0, alpha_ub, x_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL); - - -template -void add_scaled(std::shared_ptr exec, - const BatchMultiVector* const alpha, - const BatchMultiVector* const x, - BatchMultiVector* const y) -{ - const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; - const size_type nrhs = x->get_common_size()[1]; - const auto alpha_ub = get_batch_struct(alpha); - const auto x_ub = get_batch_struct(x); - const auto y_ub = get_batch_struct(y); - hipLaunchKernelGGL(add_scaled_kernel, dim3(num_blocks), - dim3(default_block_size), 0, 0, alpha_ub, x_ub, y_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL); - - -template -void compute_dot(std::shared_ptr exec, - const BatchMultiVector* x, - const BatchMultiVector* y, - BatchMultiVector* result) -{ - const auto num_blocks = x->get_num_batch_entries(); - const auto num_rhs = x->get_common_size()[1]; - const auto x_ub = get_batch_struct(x); - const auto y_ub = get_batch_struct(y); - const auto res_ub = get_batch_struct(result); - hipLaunchKernelGGL(compute_dot_product_kernel, dim3(num_blocks), - dim3(default_block_size), 0, 0, x_ub, y_ub, res_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL); - - -template -void compute_norm2(std::shared_ptr exec, - const BatchMultiVector* const x, - BatchMultiVector>* const result) -{ - const auto num_blocks = x->get_num_batch_entries(); - const auto num_rhs = x->get_common_size()[1]; - const auto x_ub = get_batch_struct(x); - const auto res_ub = get_batch_struct(result); - hipLaunchKernelGGL(compute_norm2_kernel, dim3(num_blocks), - dim3(default_block_size), 0, 0, x_ub, res_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL); - - -template -void copy(std::shared_ptr exec, - const BatchMultiVector* x, - BatchMultiVector* result) -{ - const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier; - const auto result_ub = get_batch_struct(result); - const auto x_ub = get_batch_struct(x); - hipLaunchKernelGGL(copy_kernel, dim3(num_blocks), dim3(default_block_size), - 0, 0, x_ub, result_ub); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL); +#include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc" } // namespace batch_multi_vector