Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

HipSYCL to AdaptiveCpp update & fixes #493

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
bbc9df1
Initial update of hipSYCL to adaptiveCpp
OuadiElfarouki Jan 3, 2024
fc0aa39
Updated Readme & fixed warnings
OuadiElfarouki Jan 4, 2024
8d8cc93
removed ComputeCpp as a fallback SYCL compiler & updated hipSYCL sele…
OuadiElfarouki Jan 4, 2024
65e2c9d
Added more doc about AdaptiveCpp comppilation & usage
OuadiElfarouki Jan 5, 2024
e4cc584
Disabled complex data & iamin/iamax tests when using AdaptiveCpp
OuadiElfarouki Jan 8, 2024
fd503a4
workarround iamin/iamax for correctness without tests skipping/throwi…
OuadiElfarouki Jan 9, 2024
d848cbe
disabled tests of unsupported ops when using hipSYCL & doc fixes
OuadiElfarouki Jan 12, 2024
d975ee9
Merge branch 'master' into adaptive_cpp_update
OuadiElfarouki Jan 15, 2024
41385e6
general fixes to 2020 reduction & shuffle operators with hipSYCL
OuadiElfarouki Jan 15, 2024
2dc1502
minor fix to hipsycl sdsdot enablement
OuadiElfarouki Jan 15, 2024
a0ce855
Fixed unittests handling with AdaptiveCpp
OuadiElfarouki Jan 15, 2024
3c37048
Enabled benchmark build with AdaptiveCpp when supported
OuadiElfarouki Jan 17, 2024
e607544
added txsv to skipped benchmarks
OuadiElfarouki Jan 18, 2024
5e36948
Added extended scal operation for matrices to be used in gemm with al…
OuadiElfarouki Jan 19, 2024
ae2cf14
extended gemm batched strided tests to cover previously failing tests
OuadiElfarouki Jan 19, 2024
ab4e141
minor return fix
OuadiElfarouki Jan 19, 2024
96550e7
Merge branch 'master' into adaptive_cpp_update
OuadiElfarouki Jan 25, 2024
aa1fdf1
Fixes & guards for adaptiveCpp usage with sb handler & mem pool feature
OuadiElfarouki Jan 25, 2024
9e06b7d
Disabled spr/spr2 tests and bench with AdaptiveCpp due to unsupported…
OuadiElfarouki Jan 29, 2024
c8d3044
Addressed some PR comments
OuadiElfarouki Jan 29, 2024
199717f
Addressed more PR reviews
OuadiElfarouki Jan 29, 2024
affe6e0
spr workarround for AdaptiveCpp to 'avoid' group broadcast instruction
OuadiElfarouki Jan 29, 2024
f119c9f
updated remaining macros
OuadiElfarouki Jan 29, 2024
009f710
Update cmake/Modules/SYCL.cmake
OuadiElfarouki Jan 30, 2024
27f8afa
Merge branch 'master' into adaptive_cpp_update
OuadiElfarouki Feb 12, 2024
a39a613
minor additions to readme
OuadiElfarouki Feb 12, 2024
f88ee98
Update README.md
OuadiElfarouki Feb 15, 2024
f42fcd0
Merge branch 'master' into adaptive_cpp_update
OuadiElfarouki Feb 27, 2024
8b4f189
Workarround to gemm interleaved with half type on Adaptivecpp
OuadiElfarouki Feb 27, 2024
241a34b
Merge branch 'master' into adaptive_cpp_update
OuadiElfarouki Feb 27, 2024
251e564
Merge branch 'master' into adaptive_cpp_update
OuadiElfarouki Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ endif()
option(BLAS_ENABLE_EXTENSIONS "Whether to enable portBLAS extensions" ON)
option(BLAS_ENABLE_COMPLEX "Whether to enable complex data type for supported operators" ON)

if (SYCL_COMPILER MATCHES "adaptivecpp" AND BLAS_ENABLE_COMPLEX)
message(STATUS "SYCL Complex data is not supported on AdaptiveCpp/hipSYCL. Complex
data type is disabled")
set(BLAS_ENABLE_COMPLEX OFF)
endif()

# CmakeFunctionHelper has to be included after any options that it depends on are declared.
# These include:
# * TARGET
Expand Down Expand Up @@ -136,17 +142,17 @@ else()
target_link_libraries(portblas PUBLIC ComputeCpp::ComputeCpp)
elseif(is_dpcpp)
target_link_libraries(portblas PUBLIC DPCPP::DPCPP)
elseif(is_hipsycl)
target_link_libraries(portblas PUBLIC hipSYCL::hipSYCL-rt)
elseif(is_adaptivecpp)
target_link_libraries(portblas PUBLIC AdaptiveCpp::acpp-rt)
endif()
endif()
if(is_computecpp)
set(sycl_impl ComputeCpp::ComputeCpp)
elseif(is_dpcpp)
set(sycl_impl DPCPP::DPCPP)
add_sycl_to_target(TARGET portblas SOURCES)
elseif(is_hipsycl)
set(sycl_impl hipSYCL::hipSYCL-rt)
elseif(is_adaptivecpp)
set(sycl_impl AdaptiveCpp::acpp-rt)
add_sycl_to_target(TARGET portblas SOURCES)
endif()
if(IMGDNN_DIR)
Expand Down
39 changes: 32 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ the project.
- [Requirements](#requirements)
- [Setup](#setup)
- [Compile with DPC++](#compile-with-dpc)
- [Compile with hipSYCL](#compile-with-hipsycl)
- [Compile with AdaptiveCpp *(Formerly hipSYCL)*](#compile-with-adaptivecpp)
- [Instaling portBLAS](#instaling-portBLAS)
- [Doxygen](#doxygen)
- [CMake options](#cmake-options)
Expand Down Expand Up @@ -390,9 +390,9 @@ added to the `CMAKE_PREFIX_PATH` when building portBLAS (see

**IMPORTANT NOTE:** The `TARGET` CMake variable is no longer supported. It has
been replaced by `TUNING_TARGET`, which accepts the same options.
`TUNING_TARGET` affects only the tuning configuration, applicable for some operators such
as GEMM, and has no effect on the target triplet for DPC++ or the hipSYCL target. Please
refer to the sections below for setting them.
`TUNING_TARGET` affects only the tuning configuration and has no effect on the target
triplet for DPC++ or the AdaptiveCpp/hipSYCL target. Please refer to the sections
below for setting them.

1. Clone the portBLAS repository, making sure to pass the `--recursive` option, in order
to clone submodule(s).
Expand All @@ -417,13 +417,38 @@ advisable for NVIDIA and **mandatory for AMD** to provide the specific device
architecture through `-DDPCPP_SYCL_ARCH=<arch>`, e.g., `<arch>` can be `sm_80`
for NVIDIA or `gfx908` for AMD.

### Compile with hipSYCL
### Compile with AdaptiveCpp *(Formerly hipSYCL)*
The following instructions concern the **generic** *(clang-based)* flow supported
by AdaptiveCpp.

```bash
cd build
cmake -GNinja ../ -DhipSYCL_DIR=/path/to/hipSYCL/install/lib/cmake/hipSYCL -DSYCL_COMPILER=hipsycl
export CC=[path/to/system/clang]
export CXX=[path/to/AdaptiveCpp/install/bin/syclcc]
OuadiElfarouki marked this conversation as resolved.
Show resolved Hide resolved
export HIPSYCL_TARGETS=[compilation_flow:target] # (e.g. cuda:sm_75)
cmake -GNinja ../ -DAdaptiveCpp_DIR=/path/to/AdaptiveCpp/install/lib/cmake/AdaptiveCpp \
-DSYCL_COMPILER=adaptivecpp -DHIPSYCL_TARGETS=$HIPSYCL_TARGETS
ninja
```
To build for other than the default devices (`omp`), set the `HIPSYCL_TARGETS` environment variable or specify `-DHIPSYCL_TARGETS` as [documented](https://github.com/illuhad/hipSYCL/blob/develop/doc/using-hipsycl.md).
To build for other than the default devices (`omp`), set the `HIPSYCL_TARGETS` environment
variable or specify `-DHIPSYCL_TARGETS` as
OuadiElfarouki marked this conversation as resolved.
Show resolved Hide resolved
[documented](https://github.com/AdaptiveCpp/AdaptiveCpp/blob/develop/doc/using-hipsycl.md).
The available backends are the ones built with AdaptiveCpp in the first place.

Similarly to DPCPP's `sycl-ls`, AdaptiveCpp's `acpp-info` helps display the available
backends informations. In case of building AdaptiveCpp against llvm *(generic-flow)*,
the `llvm-to-xxx.so` library files should be visible by the runtime to target the
appropriate device, which can be ensured by setting the ENV variable :

```bash
export LD_LIBRARY_PATH=[path/to/AdaptiveCpp/install/lib/hipSYCL:$LD_LIBRARY_PATH]
export LD_LIBRARY_PATH=[path/to/AdaptiveCpp/install/lib/hipSYCL/llvm-to-backend:$LD_LIBRARY_PATH]
```

*Note :*
Some operator kernels are implemented using extensions / SYCL 2020 features not yet implemented
in AdaptiveCpp and are not supported when portBLAS is built with it. These operators include
`asum`, `nrm2`, `dot`, `sdsdot`, `rot`, `trsv`, `tbsv` and `tpsv`.

### Installing portBLAS
To install the portBLAS library (see `CMAKE_INSTALL_PREFIX` below)
Expand Down
18 changes: 18 additions & 0 deletions benchmark/portblas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,30 @@ if(${BLAS_ENABLE_EXTENSIONS})
list(APPEND sources extension/reduction.cpp)
endif()

# Skip these benchmarks for AdaptiveCpp for SPIRV/OpenCL targets
# that use SYCL 2020 features like reduction or hang during execution
# (https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1309)
set(ADAPTIVE_CPP_SKIP
OuadiElfarouki marked this conversation as resolved.
Show resolved Hide resolved
blas1/asum.cpp
blas1/dot.cpp
blas1/sdsdot.cpp
blas1/nrm2.cpp
blas2/trsv.cpp
blas2/tbsv.cpp
blas2/tpsv.cpp
# Hang during execution (without failing)
blas3/trsm.cpp
)

# Operators supporting COMPLEX types benchmarking
set(CPLX_OPS "gemm" "gemm_batched" "gemm_batched_strided")

# Add individual benchmarks for each method
foreach(portblas_bench ${sources})
get_filename_component(bench_exec ${portblas_bench} NAME_WE)
if(is_adaptivecpp AND ${portblas_bench} IN_LIST ADAPTIVE_CPP_SKIP)
continue()
endif()
add_executable(bench_${bench_exec} ${portblas_bench} main.cpp)
target_link_libraries(bench_${bench_exec} PRIVATE benchmark Clara::Clara portblas bench_info)
target_compile_definitions(bench_${bench_exec} PRIVATE -DBLAS_INDEX_T=${BLAS_BENCHMARK_INDEX_TYPE})
Expand Down
41 changes: 25 additions & 16 deletions cmake/Modules/SYCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,36 +25,39 @@
include(CheckCXXCompilerFlag)
include(ConfigurePORTBLAS)

# find_package(hipSYCL) requires HIPSYCL_TARGETS to be set, so set it to a default value before find_package(hipSYCL)
if(SYCL_COMPILER MATCHES "hipsycl" AND NOT HIPSYCL_TARGETS AND NOT ENV{HIPSYCL_TARGETS})
# find_package(AdaptiveCpp) requires HIPSYCL_TARGETS to be set, so set it to a default value before find_package(AdaptiveCpp)
if(SYCL_COMPILER MATCHES "adaptivecpp" AND NOT HIPSYCL_TARGETS AND NOT ENV{HIPSYCL_TARGETS})
message(STATUS "Using `omp` as HIPSYCL_TARGETS")
set(HIPSYCL_TARGETS "omp")
else()
message(STATUS "Using ${HIPSYCL_TARGETS} as HIPSYCL_TARGETS")
endif()

check_cxx_compiler_flag("--acpp-targets" has_acpp)
check_cxx_compiler_flag("-fsycl" has_fsycl)

if(NOT SYCL_COMPILER)
if(has_fsycl)
if(has_acpp)
find_package(AdaptiveCpp QUIET)
set(is_adaptivecpp ${AdaptiveCpp_FOUND})
set(SYCL_COMPILER "adaptivecpp")
else()
set(is_dpcpp ON)
set(SYCL_COMPILER "dpcpp")
else()
find_package(hipSYCL QUIET)
set(is_hipsycl ${hipSYCL_FOUND})
set(SYCL_COMPILER "hipsycl")
if(NOT is_hipsycl)
set(is_computecpp ON)
set(SYCL_COMPILER "computecpp")
endif()
endif()
else()
if(SYCL_COMPILER MATCHES "dpcpp")
set(is_dpcpp ON)
if(NOT has_fsycl)
message(WARNING "Selected DPC++ as backend, but -fsycl not supported")
endif()
elseif(SYCL_COMPILER MATCHES "hipsycl")
find_package(hipSYCL REQUIRED CONFIG)
set(is_hipsycl ON)
elseif(SYCL_COMPILER MATCHES "adaptivecpp")
find_package(AdaptiveCpp CONFIG REQUIRED)
set(is_adaptivecpp ${AdaptiveCpp_FOUND})
if(NOT has_acpp)
message(WARNING "Selected AdaptiveCpp as backend, but the compiler is not
fully supported")
endif()
elseif(SYCL_COMPILER MATCHES "computecpp")
set(is_computecpp ON)
else()
Expand Down Expand Up @@ -88,8 +91,14 @@ elseif(is_dpcpp)
endif()
find_package(DPCPP REQUIRED)
get_target_property(SYCL_INCLUDE_DIRS DPCPP::DPCPP INTERFACE_INCLUDE_DIRECTORIES)
elseif(is_hipsycl)
elseif(is_adaptivecpp)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
get_target_property(SYCL_INCLUDE_DIRS hipSYCL::hipSYCL-rt INTERFACE_INCLUDE_DIRECTORIES)
get_target_property(SYCL_INCLUDE_DIRS AdaptiveCpp::acpp-rt INTERFACE_INCLUDE_DIRECTORIES)
set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT_CPU")
if(${BLAS_ENABLE_BENCHMARK} AND ${TUNING_TARGET} IN_LIST HIP_BENCH_UNSUPPORTED_TARGETS)
OuadiElfarouki marked this conversation as resolved.
Show resolved Hide resolved
message(STATUS "Benchmarks are not supported when targetting OpenCL/LevelZero backend
devices. portBLAS Benchmarks are disabled.")
set(BLAS_ENABLE_BENCHMARK OFF)
endif()
endif()
40 changes: 32 additions & 8 deletions include/container/sycl_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,27 +194,51 @@ template <cl::sycl::access::mode acc_md_t>
inline typename BufferIterator<element_t>::template accessor_t<acc_md_t>
BufferIterator<element_t>::get_range_accessor(cl::sycl::handler& cgh,
size_t size) {
return typename BufferIterator<element_t>::template accessor_t<acc_md_t>(
buffer_, cgh, cl::sycl::range<1>(size),
cl::sycl::id<1>(BufferIterator<element_t>::get_offset()));
if constexpr (acc_md_t == cl::sycl::access::mode::read) {
return typename BufferIterator<element_t>::template accessor_t<acc_md_t>(
buffer_, cgh, cl::sycl::range<1>(size),
cl::sycl::id<1>(BufferIterator<element_t>::get_offset()));
} else {
// Skip data initialization if not accessing in read mode only
return typename BufferIterator<element_t>::template accessor_t<acc_md_t>(
buffer_, cgh, cl::sycl::range<1>(size),
cl::sycl::id<1>(BufferIterator<element_t>::get_offset()),
cl::sycl::property::no_init{});
}
}

template <typename element_t>
template <cl::sycl::access::mode acc_md_t>
inline typename BufferIterator<element_t>::template accessor_t<acc_md_t>
BufferIterator<element_t>::get_range_accessor(cl::sycl::handler& cgh) {
return BufferIterator<element_t>::get_range_accessor<acc_md_t>(
cgh, BufferIterator<element_t>::get_size());
if constexpr (acc_md_t == cl::sycl::access::mode::read) {
return BufferIterator<element_t>::get_range_accessor<acc_md_t>(
cgh, BufferIterator<element_t>::get_size());
} else {
// Skip data initialization if not accessing in read mode only
return BufferIterator<element_t>::get_range_accessor<acc_md_t>(
cgh, BufferIterator<element_t>::get_size(),
cl::sycl::property::no_init{});
}
}

template <typename element_t>
template <cl::sycl::access::mode acc_md_t>
inline typename BufferIterator<element_t>::template placeholder_accessor_t<
acc_md_t>
BufferIterator<element_t>::get_range_accessor(size_t size) {
return typename BufferIterator<element_t>::template placeholder_accessor_t<
acc_md_t>(buffer_, cl::sycl::range<1>(size),
cl::sycl::id<1>(BufferIterator<element_t>::get_offset()));
if constexpr (acc_md_t == cl::sycl::access::mode::read) {
return typename BufferIterator<element_t>::template placeholder_accessor_t<
acc_md_t>(buffer_, cl::sycl::range<1>(size),
cl::sycl::id<1>(BufferIterator<element_t>::get_offset()));

} else {
// Skip data initialization if not accessing in read mode only
return typename BufferIterator<element_t>::template placeholder_accessor_t<
acc_md_t>(buffer_, cl::sycl::range<1>(size),
cl::sycl::id<1>(BufferIterator<element_t>::get_offset()),
cl::sycl::property::no_init{});
}
}

template <typename element_t>
Expand Down
33 changes: 32 additions & 1 deletion include/interface/blas1_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ typename sb_handle_t::event_t _swap(
const typename sb_handle_t::event_t &_dependencies);

/**
* \brief SCALAR operation on a vector
* \brief SCALAR operation on a vector
* @param sb_handle_t sb_handle
* @param _vx BufferIterator or USM pointer
* @param _incx Increment for the vector X
Expand All @@ -208,6 +208,37 @@ typename sb_handle_t::event_t _scal(
sb_handle_t &sb_handle, index_t _N, element_t _alpha, container_0_t _vx,
increment_t _incx, const typename sb_handle_t::event_t &_dependencies);

/**
* \brief SCALAR operation on a matrix. (this is a generalization of
* vector-based _scal operator meant for internal use within the library, namely
* for GEMM and inplace-Matcopy operators)
* @param sb_handle_t sb_handle
* @param _A Input/Output BufferIterator or USM pointer
* @param _incA Increment for the matrix A
* @param _lda Leading dimension for the matrix A
* @param _M number of rows
* @param _N number of columns
* @param alpha scaling scalar
* @param _dependencies Vector of events
*/
template <typename sb_handle_t, typename element_t, typename container_0_t,
typename index_t, typename increment_t>
typename sb_handle_t::event_t _scal_matrix(
sb_handle_t &sb_handle, index_t _M, index_t _N, element_t _alpha,
container_0_t _A, index_t _lda, increment_t _incA,
const typename sb_handle_t::event_t &_dependencies);

/*!
* \brief Prototype for the internal implementation of the _scal_matrix
* operator.
*/
template <bool has_inc, typename sb_handle_t, typename element_t,
typename container_0_t, typename index_t, typename increment_t>
typename sb_handle_t::event_t _scal_matrix_impl(
sb_handle_t &sb_handle, index_t _M, index_t _N, element_t _alpha,
container_0_t _A, index_t _lda, increment_t _incA,
const typename sb_handle_t::event_t &_dependencies);

/**
* \brief NRM2 Returns the euclidian norm of a vector
* @param sb_handle SB_Handle
Expand Down
6 changes: 4 additions & 2 deletions include/operations/blas_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,16 +265,18 @@ struct constant_pair {

} // namespace blas

#ifndef __HIPSYCL__
OuadiElfarouki marked this conversation as resolved.
Show resolved Hide resolved
template <typename ind_t, typename val_t>
struct sycl::is_device_copyable<blas::IndexValueTuple<ind_t, val_t>>
struct cl::sycl::is_device_copyable<blas::IndexValueTuple<ind_t, val_t>>
: std::true_type {};

template <typename ind_t, typename val_t>
struct sycl::is_device_copyable<const blas::IndexValueTuple<ind_t, val_t>>
struct cl::sycl::is_device_copyable<const blas::IndexValueTuple<ind_t, val_t>>
: std::true_type {};

template <typename ind_t, typename val_t>
struct std::is_trivially_copyable<blas::IndexValueTuple<ind_t, val_t>>
: std::true_type {};
#endif

#endif // BLAS_CONSTANTS_H
2 changes: 1 addition & 1 deletion samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ foreach(src_file ${SAMPLES_LIST})
get_filename_component(sample_exec ${src_file} NAME_WE)
set(sample_exec "sample_${sample_exec}")
add_executable(${sample_exec} ${src_file})
if(is_hipsycl OR is_dpcpp)
if(is_adaptivecpp OR is_dpcpp)
set_target_properties(${sample_exec} PROPERTIES CXX_STANDARD 17)
else()
set_target_properties(${sample_exec} PROPERTIES CXX_STANDARD 14)
Expand Down
31 changes: 31 additions & 0 deletions src/interface/blas1/backend/default_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ template <typename sb_handle_t, typename container_0_t, typename container_1_t,
typename sb_handle_t::event_t _iamax(
sb_handle_t& sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
container_1_t _rs, const typename sb_handle_t::event_t& _dependencies) {
#ifndef __HIPSYCL__
constexpr int localSize = 128;
if (_N < 8192) {
return blas::internal::_iamax_iamin_impl<localSize, 0, true, true>(
Expand All @@ -59,6 +60,20 @@ typename sb_handle_t::event_t _iamax(
return blas::internal::_iamax_iamin_impl<localSize, 0, true, false>(
sb_handle, _N, _vx, _incx, _rs, nWG, _dependencies);
}
#else
// Temporary work-around to avoid non-local memory implementation of
// iamin/iamax with AdaptiveCpp.
constexpr int localSize = 128;
if (_N < 8192) {
return blas::internal::_iamax_iamin_impl<localSize, localSize, true, true>(
sb_handle, _N, _vx, _incx, _rs, static_cast<index_t>(1), _dependencies);
} else {
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, localSize, true, false>(
sb_handle, _N, _vx, _incx, _rs, nWG, _dependencies);
}
#endif
}
} // namespace backend
} // namespace iamax
Expand All @@ -70,6 +85,7 @@ template <typename sb_handle_t, typename container_0_t, typename container_1_t,
typename sb_handle_t::event_t _iamin(
sb_handle_t& sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
container_1_t _rs, const typename sb_handle_t::event_t& _dependencies) {
#ifndef __HIPSYCL__
constexpr int localSize = 128;
if (_N < 8192) {
return blas::internal::_iamax_iamin_impl<localSize, 0, false, true>(
Expand All @@ -80,6 +96,21 @@ typename sb_handle_t::event_t _iamin(
return blas::internal::_iamax_iamin_impl<localSize, 0, false, false>(
sb_handle, _N, _vx, _incx, _rs, nWG, _dependencies);
}
#else
// Temporary work-around to avoid non-local memory implementation of
// iamin/iamax with AdaptiveCpp.
constexpr int localSize = 128;
if (_N < 8192) {
return blas::internal::_iamax_iamin_impl<localSize, localSize, false, true>(
sb_handle, _N, _vx, _incx, _rs, static_cast<index_t>(1), _dependencies);
} else {
const index_t nWG = std::min((_N + localSize - 1) / (localSize * 4),
static_cast<index_t>(512));
return blas::internal::_iamax_iamin_impl<localSize, localSize, false,
false>(sb_handle, _N, _vx, _incx,
_rs, nWG, _dependencies);
}
#endif
}
} // namespace backend
} // namespace iamin
Expand Down
Loading