diff --git a/CMakeLists.txt b/CMakeLists.txt index 822b1098..3aa36a25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,14 +17,14 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * # **************************************************************************/ cmake_minimum_required(VERSION 3.16...3.26) -project(sycl-fft VERSION 0.1.0 LANGUAGES CXX) +project(portFFT VERSION 0.1.0 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -56,30 +56,30 @@ set(SYCLFFT_SRC_DIR $ $) -add_library(sycl_fft INTERFACE) -target_include_directories(sycl_fft INTERFACE +add_library(portfft INTERFACE) +target_include_directories(portfft INTERFACE ${SYCLFFT_INCLUDE_DIR} ${SYCLFFT_SRC_DIR} ) -target_compile_definitions(sycl_fft INTERFACE SYCLFFT_TARGET_REGS_PER_WI=${SYCLFFT_TARGET_REGS_PER_WI}) -target_compile_definitions(sycl_fft INTERFACE SYCLFFT_SUBGROUP_SIZES=${SYCLFFT_SUBGROUP_SIZES}) -target_compile_definitions(sycl_fft INTERFACE SYCLFFT_TARGET_WI_LOAD=${SYCLFFT_TARGET_WI_LOAD}) -target_compile_definitions(sycl_fft INTERFACE SYCLFFT_SGS_IN_WG=${SYCLFFT_SGS_IN_WG}) +target_compile_definitions(portfft INTERFACE PORTFFT_TARGET_REGS_PER_WI=${SYCLFFT_TARGET_REGS_PER_WI}) +target_compile_definitions(portfft INTERFACE PORTFFT_SUBGROUP_SIZES=${SYCLFFT_SUBGROUP_SIZES}) +target_compile_definitions(portfft INTERFACE PORTFFT_TARGET_WI_LOAD=${SYCLFFT_TARGET_WI_LOAD}) +target_compile_definitions(portfft INTERFACE PORTFFT_SGS_IN_WG=${SYCLFFT_SGS_IN_WG}) if(${SYCLFFT_USE_SG_TRANSFERS}) - target_compile_definitions(sycl_fft INTERFACE SYCLFFT_USE_SG_TRANSFERS) + target_compile_definitions(portfft INTERFACE PORTFFT_USE_SG_TRANSFERS) endif() if(${SYCLFFT_SLOW_SG_SHUFFLES}) - target_compile_definitions(sycl_fft INTERFACE SYCLFFT_SLOW_SG_SHUFFLES=1) + target_compile_definitions(portfft INTERFACE PORTFFT_SLOW_SG_SHUFFLES=1) else() - target_compile_definitions(sycl_fft INTERFACE SYCLFFT_SLOW_SG_SHUFFLES=0) + target_compile_definitions(portfft INTERFACE PORTFFT_SLOW_SG_SHUFFLES=0) endif() -target_compile_options(sycl_fft INTERFACE -fgpu-inline-threshold=1000000) -target_link_options(sycl_fft INTERFACE -fsycl-device-code-split=per_kernel) -target_compile_options(sycl_fft INTERFACE -fsycl-device-code-split=per_kernel) +target_compile_options(portfft INTERFACE -fgpu-inline-threshold=1000000) +target_link_options(portfft INTERFACE -fsycl-device-code-split=per_kernel) +target_compile_options(portfft INTERFACE -fsycl-device-code-split=per_kernel) include(CMakePackageConfigHelpers) -set(version_file "${CMAKE_CURRENT_BINARY_DIR}/cmake/sycl_fft-version.cmake") +set(version_file "${CMAKE_CURRENT_BINARY_DIR}/cmake/portfft-version.cmake") write_basic_package_version_file(${version_file} VERSION ${PROJECT_VERSION} COMPATIBILITY AnyNewerVersion @@ -91,11 +91,11 @@ if(SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES MATCHES "^[0-9]+(,[0-9]+)*$") else() message(FATAL_ERROR " Invalid SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES value: " ${SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}) endif() -target_compile_definitions(sycl_fft INTERFACE SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES=${SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}) +target_compile_definitions(portfft INTERFACE PORTFFT_COOLEY_TUKEY_OPTIMIZED_SIZES=${SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}) include(GNUInstallDirs) -install(TARGETS sycl_fft - EXPORT sycl_fft +install(TARGETS portfft + EXPORT portfft RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} @@ -106,24 +106,24 @@ install( ${SYCLFFT_INCLUDE_DIR} ${SYCLFFT_SRC_DIR} DESTINATION ${CMAKE_INSTALL_PREFIX} - COMPONENT sycl_fft + COMPONENT portfft FILES_MATCHING PATTERN "*.hpp" ) install(FILES ${version_file} DESTINATION ${CMAKE_INSTALL_PREFIX}) -install(EXPORT sycl_fft +install(EXPORT portfft DESTINATION ${CMAKE_INSTALL_PREFIX} - NAMESPACE sycl_fft:: - FILE sycl_fft-config.cmake + NAMESPACE portfft:: + FILE portfft-config.cmake ) -export(EXPORT sycl_fft - NAMESPACE sycl_fft:: - FILE sycl_fft-config.cmake +export(EXPORT portfft + NAMESPACE portfft:: + FILE portfft-config.cmake ) -add_library(sycl_fft_warnings INTERFACE) -target_compile_options(sycl_fft_warnings INTERFACE -Wall -Wextra -Wshadow -Wconversion -Wpedantic) +add_library(portfft_warnings INTERFACE) +target_compile_options(portfft_warnings INTERFACE -Wall -Wextra -Wshadow -Wconversion -Wpedantic) if(${SYCLFFT_BUILD_TESTS}) enable_testing() diff --git a/README.md b/README.md index bc843349..f71b9ecf 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -# SYCL-FFT +# portFFT ## Introduction -SYCL-FFT is a library implementing Fast Fourier Transforms using SYCL and C++. -SYCL-FFT is in early stages of development and will support more options and optimizations in the future. +portFFT is a library implementing Fast Fourier Transforms using SYCL and C++. +portFFT is in early stages of development and will support more options and optimizations in the future. ## Pre-requisites @@ -21,7 +21,7 @@ SYCL-FFT is in early stages of development and will support more options and opt ### Building with CMake -Clone SYCL-FFT and run the following commands from the cloned repository. +Clone portFFT and run the following commands from the cloned repository. Build using DPC++ 2023.1.0 as: @@ -48,7 +48,7 @@ The list of available targets can be found on [DPC++ compiler documentation page Some AOT targets do not support double precision. To disable the building of tests and benchmarks using double precision, set `-DSYCLFFT_ENABLE_DOUBLE_BUILDS=OFF`. -SYCL-FFT currently requires to set the subgroup size at compile time. Multiple sizes can be set and the first one that is supported by the device will be used. Depending on the device used you may need to set the subgroup size with `-DSYCLFFT_SUBGROUP_SIZES=`. By default only size 32 is used. +portFFT currently requires to set the subgroup size at compile time. Multiple sizes can be set and the first one that is supported by the device will be used. Depending on the device used you may need to set the subgroup size with `-DSYCLFFT_SUBGROUP_SIZES=`. By default only size 32 is used. If you run into the exception with the message `None of the compiled subgroup sizes are supported by the device!` then `DSYCLFFT_SUBGROUP_SIZES` must be set to a different value(s) supported by the device. ### Tests @@ -59,7 +59,7 @@ Run the tests from the build folder with: ctest ``` -### SYCL-FFT benchmarks +### portFFT benchmarks Run pre-defined benchmarks from the build folder with: @@ -77,7 +77,7 @@ Use the `--help` flag to print help message on the configuration syntax. ## Supported configurations -SYCL-FFT is still in early development. The supported configurations are: +portFFT is still in early development. The supported configurations are: * complex to complex transforms * single and double precisions @@ -103,7 +103,7 @@ By default the library assumes subgroup size of 32 is used. If that is not suppo ## Known issues -* Specialization constants are currently emulated on Nvidia and AMD backends. SYCL-FFT relies on this feature on Nvidia devices in particular so the performance is not optimal on these devices. +* Specialization constants are currently emulated on Nvidia and AMD backends. portFFT relies on this feature on Nvidia devices in particular so the performance is not optimal on these devices. We are investigating other performance issues that affect all the backends. diff --git a/cmake/Modules/FindDPCPP.cmake b/cmake/Modules/FindDPCPP.cmake index b721fdda..535437f2 100644 --- a/cmake/Modules/FindDPCPP.cmake +++ b/cmake/Modules/FindDPCPP.cmake @@ -17,7 +17,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * diff --git a/cmake/Modules/FindSYCL.cmake b/cmake/Modules/FindSYCL.cmake index 9b248413..9a5253fe 100644 --- a/cmake/Modules/FindSYCL.cmake +++ b/cmake/Modules/FindSYCL.cmake @@ -17,7 +17,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * diff --git a/hooks/clang-format-all.sh b/hooks/clang-format-all.sh index 90844301..c0497f32 100755 --- a/hooks/clang-format-all.sh +++ b/hooks/clang-format-all.sh @@ -1,9 +1,9 @@ #!/bin/bash # # (C) Codeplay Software Ltd -# SYCL-FFT source code formatting script +# portFFT source code formatting script # -# NB: SYCL-FFT uses clang-format 11. +# NB: portFFT uses clang-format 11. # set -euo pipefail diff --git a/hooks/is-clang-formatted.sh b/hooks/is-clang-formatted.sh index ad10a0d0..7879e6df 100755 --- a/hooks/is-clang-formatted.sh +++ b/hooks/is-clang-formatted.sh @@ -1,7 +1,7 @@ #!/bin/bash # # (C) Codeplay Software Ltd -# SYCL-FFT source code formatting check script +# portFFT source code formatting check script set -euo pipefail IFS=$'\n\t' diff --git a/include/sycl_fft.hpp b/include/portfft.hpp similarity index 97% rename from include/sycl_fft.hpp rename to include/portfft.hpp index 4ee9ed06..1af9301d 100644 --- a/include/sycl_fft.hpp +++ b/include/portfft.hpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ diff --git a/scripts/benchmark_reference_specification.py b/scripts/benchmark_reference_specification.py index d21af94e..a8c72d2e 100644 --- a/scripts/benchmark_reference_specification.py +++ b/scripts/benchmark_reference_specification.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * ************************************************************************""" from pathlib import Path diff --git a/scripts/generate_precomputed_fft_file.py b/scripts/generate_precomputed_fft_file.py index f9a3ade1..593eaef0 100644 --- a/scripts/generate_precomputed_fft_file.py +++ b/scripts/generate_precomputed_fft_file.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * ************************************************************************""" import numpy as np diff --git a/scripts/generate_reference_data.py b/scripts/generate_reference_data.py index 9540635e..58d6cfd1 100644 --- a/scripts/generate_reference_data.py +++ b/scripts/generate_reference_data.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * * A tool to generate FFT reference data and headers detailing this data. * @@ -29,7 +29,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--build_path", - help="The path of the build directory for SYCL-FFT", + help="The path of the build directory for portFFT", required=True) parser.add_argument("--verbose", help="Verbose", action='store_true') parser.add_argument("--data", diff --git a/scripts/generate_twiddles.py b/scripts/generate_twiddles.py index 8b46a152..08daefd8 100644 --- a/scripts/generate_twiddles.py +++ b/scripts/generate_twiddles.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * ************************************************************************""" import math @@ -28,14 +28,14 @@ * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_TWIDDLE_HPP -#define SYCL_FFT_COMMON_TWIDDLE_HPP +#ifndef PORTFFT_COMMON_TWIDDLE_HPP +#define PORTFFT_COMMON_TWIDDLE_HPP #pragma clang diagnostic push // The twiddle precision can be lower than the constants used here when not using double precision. #pragma clang diagnostic ignored "-Wimplicit-float-conversion" -namespace sycl_fft::detail {{ +namespace portfft::detail {{ template struct twiddle {{ @@ -50,7 +50,7 @@ static constexpr T im[{size}][{size}] = {{ {imag_forward} }}; // clang-format on }}; -}} // namespace sycl_fft::detail +}} // namespace portfft::detail #pragma clang diagnostic pop diff --git a/scripts/generate_verification_data_integration_header.py b/scripts/generate_verification_data_integration_header.py index bf00d3ab..0d23c11b 100644 --- a/scripts/generate_verification_data_integration_header.py +++ b/scripts/generate_verification_data_integration_header.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * ************************************************************************""" from pathlib import Path @@ -26,8 +26,8 @@ * **************************************************************************/ -#ifndef SYCL_FFT_{include_guard_str}_HPP -#define SYCL_FFT_{include_guard_str}_HPP +#ifndef PORTFFT_{include_guard_str}_HPP +#define PORTFFT_{include_guard_str}_HPP #include "reference_data_wrangler.hpp" const std::vector verification_data({{ @@ -69,8 +69,8 @@ def generate_header_string(config_dicts, output_file_path, header_guard_name): "[", "").replace("]", "") batch_string = str(d["batch"]) file_string = str(d["file_path"]) - domain_bool = "sycl_fft::domain::COMPLEX" if d[ - "transform_type"] == "COMPLEX" else "sycl_fft::domain::REAL" + domain_bool = "portfft::domain::COMPLEX" if d[ + "transform_type"] == "COMPLEX" else "portfft::domain::REAL" f.write( template_per_item.format(dft_sizes_str=dftSizesStr, batch_str=batch_string, diff --git a/scripts/test_reference_specification.py b/scripts/test_reference_specification.py index 11c409f0..d8b8508f 100644 --- a/scripts/test_reference_specification.py +++ b/scripts/test_reference_specification.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * ************************************************************************""" from pathlib import Path diff --git a/scripts/verification_data_config.py b/scripts/verification_data_config.py index 59288220..40304022 100644 --- a/scripts/verification_data_config.py +++ b/scripts/verification_data_config.py @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * ************************************************************************""" from pathlib import Path diff --git a/src/common/cooley_tukey_compiled_sizes.hpp b/src/common/cooley_tukey_compiled_sizes.hpp index e290268c..f0832c92 100644 --- a/src/common/cooley_tukey_compiled_sizes.hpp +++ b/src/common/cooley_tukey_compiled_sizes.hpp @@ -14,15 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COOLEY_TUKEY_COMPILED_SIZES_HPP -#define SYCL_FFT_COOLEY_TUKEY_COMPILED_SIZES_HPP +#ifndef PORTFFT_COOLEY_TUKEY_COMPILED_SIZES_HPP +#define PORTFFT_COOLEY_TUKEY_COMPILED_SIZES_HPP #include -namespace sycl_fft::detail { +namespace portfft::detail { /** A list of supported FFT sizes. * @tparam Sizes The supported FFT sizes. @@ -52,8 +52,8 @@ struct size_list { } }; -using cooley_tukey_size_list_t = size_list; +using cooley_tukey_size_list_t = size_list; -} // namespace sycl_fft::detail +} // namespace portfft::detail -#endif // SYCL_FFT_COOLEY_TUKEY_COMPILED_SIZES_HPP +#endif // PORTFFT_COOLEY_TUKEY_COMPILED_SIZES_HPP diff --git a/src/common/helpers.hpp b/src/common/helpers.hpp index c79c967f..67530f48 100644 --- a/src/common/helpers.hpp +++ b/src/common/helpers.hpp @@ -14,17 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_HELPERS_HPP -#define SYCL_FFT_COMMON_HELPERS_HPP +#ifndef PORTFFT_COMMON_HELPERS_HPP +#define PORTFFT_COMMON_HELPERS_HPP #include #include -namespace sycl_fft::detail { +namespace portfft::detail { /** * Implements a loop that will be fully unrolled. @@ -108,6 +108,6 @@ auto get_access(const sycl::buffer& buf, sycl::handler& cgh) { return buf.template reinterpret(2 * buf.size()).template get_access(cgh); } -}; // namespace sycl_fft::detail +}; // namespace portfft::detail #endif diff --git a/src/common/subgroup.hpp b/src/common/subgroup.hpp index cc629333..fe578f47 100644 --- a/src/common/subgroup.hpp +++ b/src/common/subgroup.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_SUBGROUP_HPP -#define SYCL_FFT_COMMON_SUBGROUP_HPP +#ifndef PORTFFT_COMMON_SUBGROUP_HPP +#define PORTFFT_COMMON_SUBGROUP_HPP #include #include @@ -28,7 +28,7 @@ #include #include -namespace sycl_fft { +namespace portfft { namespace detail { /* @@ -221,7 +221,7 @@ __attribute__((always_inline)) inline void cross_sg_dft(T& real, T& imag, sycl:: * @return the factor below or equal to subgroup size */ constexpr int factorize_sg(int N, int sg_size) { - if constexpr (SYCLFFT_SLOW_SG_SHUFFLES) { + if constexpr (PORTFFT_SLOW_SG_SHUFFLES) { return 1; } else { for (int i = sg_size; i > 1; i--) { @@ -305,6 +305,6 @@ void sg_calc_twiddles(int N, int M, int n, int k, T* sg_twiddles) { sg_twiddles[(k + M) * N + n] = twiddle.imag(); } -}; // namespace sycl_fft +}; // namespace portfft #endif diff --git a/src/common/transfers.hpp b/src/common/transfers.hpp index 32365544..3b24762d 100644 --- a/src/common/transfers.hpp +++ b/src/common/transfers.hpp @@ -14,31 +14,31 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_TRANSFERS_HPP -#define SYCL_FFT_COMMON_TRANSFERS_HPP +#ifndef PORTFFT_COMMON_TRANSFERS_HPP +#define PORTFFT_COMMON_TRANSFERS_HPP #include #include #include -#ifndef SYCL_FFT_N_LOCAL_BANKS -#define SYCL_FFT_N_LOCAL_BANKS 32 +#ifndef PORTFFT_N_LOCAL_BANKS +#define PORTFFT_N_LOCAL_BANKS 32 #endif -static_assert((SYCLFFT_TARGET_WI_LOAD & (SYCLFFT_TARGET_WI_LOAD - 1)) == 0, - "SYCLFFT_TARGET_WI_LOAD should be a power of 2!"); +static_assert((PORTFFT_TARGET_WI_LOAD & (PORTFFT_TARGET_WI_LOAD - 1)) == 0, + "PORTFFT_TARGET_WI_LOAD should be a power of 2!"); -namespace sycl_fft { +namespace portfft { namespace detail { /** * If Pad is true transforms an index into local memory to skip one element for every - * SYCL_FFT_N_LOCAL_BANKS elements. Padding in this way avoids bank conflicts when accessing + * PORTFFT_N_LOCAL_BANKS elements. Padding in this way avoids bank conflicts when accessing * elements with a stride that is multiple of (or has any common divisor greater than 1 with) * the number of local banks. Does nothing if Pad is false. * @@ -51,7 +51,7 @@ namespace detail { template __attribute__((always_inline)) inline std::size_t pad_local(std::size_t local_idx) { if constexpr (Pad == detail::pad::DO_PAD) { - local_idx += local_idx / static_cast(SYCL_FFT_N_LOCAL_BANKS); + local_idx += local_idx / static_cast(PORTFFT_N_LOCAL_BANKS); } return local_idx; } @@ -61,7 +61,7 @@ __attribute__((always_inline)) inline std::size_t pad_local(std::size_t local_id /** * Copies data from global memory to local memory. * - * @tparam Pad whether to skip each SYCL_FFT_N_LOCAL_BANKS element in local to allow + * @tparam Pad whether to skip each PORTFFT_N_LOCAL_BANKS element in local to allow * strided reads without bank conflicts * @tparam Level Which level (subgroup or workgroup) does the transfer. * @tparam SubgroupSize size of the subgroup @@ -79,7 +79,7 @@ __attribute__((always_inline)) inline void global2local(sycl::nd_item<1> it, con std::size_t local_offset = 0) { static_assert(Level == detail::level::SUBGROUP || Level == detail::level::WORKGROUP, "Only implemented for subgroup and workgroup levels!"); - constexpr int chunk_size_raw = SYCLFFT_TARGET_WI_LOAD / sizeof(T); + constexpr int chunk_size_raw = PORTFFT_TARGET_WI_LOAD / sizeof(T); constexpr int chunk_size = chunk_size_raw < 1 ? 1 : chunk_size_raw; using T_vec = sycl::vec; @@ -97,7 +97,7 @@ __attribute__((always_inline)) inline void global2local(sycl::nd_item<1> it, con std::size_t stride = local_size * static_cast(chunk_size); std::size_t rounded_down_num_elems = (total_num_elems / stride) * stride; -#ifdef SYCLFFT_USE_SG_TRANSFERS +#ifdef PORTFFT_USE_SG_TRANSFERS if constexpr (Level == detail::level::WORKGROUP) { // recalculate parameters for subgroup transfer std::size_t subgroup_id = sg.get_group_id(); std::size_t elems_per_sg = detail::divideCeil(total_num_elems, local_size / SubgroupSize); @@ -114,7 +114,7 @@ __attribute__((always_inline)) inline void global2local(sycl::nd_item<1> it, con // Each subgroup loads a chunk of `chunk_size * local_size` elements. for (std::size_t i = 0; i < rounded_down_num_elems; i += stride) { T_vec loaded = sg.load(detail::get_global_multi_ptr(&global[global_offset + i])); - if constexpr (SYCL_FFT_N_LOCAL_BANKS % SubgroupSize == 0 || Pad == detail::pad::DONT_PAD) { + if constexpr (PORTFFT_N_LOCAL_BANKS % SubgroupSize == 0 || Pad == detail::pad::DONT_PAD) { detail::unrolled_loop<0, chunk_size, 1>([&](int j) __attribute__((always_inline)) { std::size_t local_idx = detail::pad_local(local_offset + i + static_cast(j) * local_size); sg.store(detail::get_local_multi_ptr(&local[local_idx]), loaded[j]); @@ -169,7 +169,7 @@ __attribute__((always_inline)) inline void global2local(sycl::nd_item<1> it, con /** * Copies data from local memory to global memory. * - * @tparam Pad whether to skip each SYCL_FFT_N_LOCAL_BANKS element in local to allow + * @tparam Pad whether to skip each PORTFFT_N_LOCAL_BANKS element in local to allow * strided reads without bank conflicts * @tparam Level Which level (subgroup or workgroup) does the transfer. * @tparam SubgroupSize size of the subgroup @@ -187,7 +187,7 @@ __attribute__((always_inline)) inline void local2global(sycl::nd_item<1> it, con std::size_t global_offset = 0) { static_assert(Level == detail::level::SUBGROUP || Level == detail::level::WORKGROUP, "Only implemented for subgroup and workgroup levels!"); - constexpr int chunk_size_raw = SYCLFFT_TARGET_WI_LOAD / sizeof(T); + constexpr int chunk_size_raw = PORTFFT_TARGET_WI_LOAD / sizeof(T); constexpr int chunk_size = chunk_size_raw < 1 ? 1 : chunk_size_raw; using T_vec = sycl::vec; @@ -205,7 +205,7 @@ __attribute__((always_inline)) inline void local2global(sycl::nd_item<1> it, con std::size_t stride = local_size * static_cast(chunk_size); std::size_t rounded_down_num_elems = (total_num_elems / stride) * stride; -#ifdef SYCLFFT_USE_SG_TRANSFERS +#ifdef PORTFFT_USE_SG_TRANSFERS if constexpr (Level == detail::level::WORKGROUP) { // recalculate parameters for subgroup transfer std::size_t subgroup_id = sg.get_group_id(); std::size_t elems_per_sg = detail::divideCeil(total_num_elems, local_size / SubgroupSize); @@ -222,7 +222,7 @@ __attribute__((always_inline)) inline void local2global(sycl::nd_item<1> it, con // Each subgroup stores a chunk of `chunk_size * local_size` elements. for (std::size_t i = 0; i < rounded_down_num_elems; i += stride) { T_vec to_store; - if constexpr (SYCL_FFT_N_LOCAL_BANKS % SubgroupSize == 0 || Pad == detail::pad::DONT_PAD) { + if constexpr (PORTFFT_N_LOCAL_BANKS % SubgroupSize == 0 || Pad == detail::pad::DONT_PAD) { detail::unrolled_loop<0, chunk_size, 1>([&](int j) __attribute__((always_inline)) { std::size_t local_idx = detail::pad_local(local_offset + i + static_cast(j) * local_size); to_store[j] = sg.load(detail::get_local_multi_ptr(&local[local_idx])); @@ -280,7 +280,7 @@ __attribute__((always_inline)) inline void local2global(sycl::nd_item<1> it, con * of consecutive values from local memory. * * @tparam NumElemsPerWI Number of elements to copy by each work item - * @tparam Pad whether to skip each SYCL_FFT_N_LOCAL_BANKS element in local avoiding bank conflicts + * @tparam Pad whether to skip each PORTFFT_N_LOCAL_BANKS element in local avoiding bank conflicts * @tparam T type of the scalar used for computations * @param local pointer to local memory * @param priv pointer to private memory @@ -415,7 +415,7 @@ __attribute__((always_inline)) inline void private2local_transposed(const T* pri * chunk of consecutive values to local memory. * * @tparam NumElemsPerWI Number of elements to copy by each work item - * @tparam Pad whether to skip each SYCL_FFT_N_LOCAL_BANKS element in local avoiding bank conflicts + * @tparam Pad whether to skip each PORTFFT_N_LOCAL_BANKS element in local avoiding bank conflicts * @tparam T type of the scalar used for computations * @param priv pointer to private memory * @param local pointer to local memory @@ -438,7 +438,7 @@ __attribute__((always_inline)) inline void private2local(const T* priv, T* local * consecutive elements. The copy is done jointly by a group of threads defined by `local_id` and `workers_in_group`. * * @tparam NumElemsPerWI Number of elements to copy by each work item - * @tparam Pad whether to skip each SYCL_FFT_N_LOCAL_BANKS element in local avoiding bank conflicts + * @tparam Pad whether to skip each PORTFFT_N_LOCAL_BANKS element in local avoiding bank conflicts * @tparam T type of the scalar used for computations * @param priv pointer to private memory * @param destination pointer to destination - local or global memory @@ -467,6 +467,6 @@ __attribute__((always_inline)) inline void store_transposed(const T* priv, T* de } }); } -}; // namespace sycl_fft +}; // namespace portfft #endif diff --git a/src/common/twiddle.hpp b/src/common/twiddle.hpp index a9d1ed9b..acbb44f5 100644 --- a/src/common/twiddle.hpp +++ b/src/common/twiddle.hpp @@ -5,14 +5,14 @@ * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_TWIDDLE_HPP -#define SYCL_FFT_COMMON_TWIDDLE_HPP +#ifndef PORTFFT_COMMON_TWIDDLE_HPP +#define PORTFFT_COMMON_TWIDDLE_HPP #pragma clang diagnostic push // The twiddle precision can be lower than the constants used here when not using double precision. #pragma clang diagnostic ignored "-Wimplicit-float-conversion" -namespace sycl_fft::detail { +namespace portfft::detail { template struct twiddle { @@ -155,7 +155,7 @@ static constexpr T im[65][65] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {0.0, -0.0980171403295606, -0.19509032201612825, -0.29028467725446233, -0.3826834323650898, -0.47139673682599764, -0.5555702330196022, -0.6343932841636455, -0.7071067811865475, -0.773010453362737, -0.8314696123025452, -0.8819212643483549, -0.9238795325112867, -0.9569403357322089, -0.9807852804032304, -0.9951847266721968, -1.0, -0.9951847266721969, -0.9807852804032304, -0.9569403357322089, -0.9238795325112867, -0.881921264348355, -0.8314696123025455, -0.7730104533627371, -0.7071067811865476, -0.6343932841636455, -0.5555702330196022, -0.47139673682599786, -0.3826834323650899, -0.2902846772544624, -0.1950903220161286, -0.09801714032956083, 0.0, 0.09801714032956059, 0.19509032201612836, 0.2902846772544621, 0.38268343236508967, 0.47139673682599764, 0.555570233019602, 0.6343932841636453, 0.7071067811865475, 0.7730104533627367, 0.8314696123025452, 0.8819212643483549, 0.9238795325112865, 0.9569403357322088, 0.9807852804032303, 0.9951847266721969, 1.0, 0.9951847266721969, 0.9807852804032304, 0.9569403357322089, 0.9238795325112866, 0.881921264348355, 0.8314696123025455, 0.7730104533627369, 0.7071067811865477, 0.6343932841636459, 0.5555702330196022, 0.4713967368259979, 0.3826834323650904, 0.2902846772544625, 0.19509032201612872, 0.0980171403295605, 0} }; // clang-format on }; -} // namespace sycl_fft::detail +} // namespace portfft::detail #pragma clang diagnostic pop diff --git a/src/common/twiddle_calc.hpp b/src/common/twiddle_calc.hpp index 97996ff1..77bb09b6 100644 --- a/src/common/twiddle_calc.hpp +++ b/src/common/twiddle_calc.hpp @@ -14,17 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_TWIDDLE_CALC_HPP -#define SYCL_FFT_COMMON_TWIDDLE_CALC_HPP +#ifndef PORTFFT_COMMON_TWIDDLE_CALC_HPP +#define PORTFFT_COMMON_TWIDDLE_CALC_HPP #include #include -namespace sycl_fft { +namespace portfft { namespace detail { /** @@ -42,6 +42,6 @@ std::complex calculate_twiddle(T_index n, T_index total) { } } // namespace detail -} // namespace sycl_fft +} // namespace portfft #endif diff --git a/src/common/workgroup.hpp b/src/common/workgroup.hpp index 8450194b..5e82c693 100644 --- a/src/common/workgroup.hpp +++ b/src/common/workgroup.hpp @@ -14,18 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_WORKGROUP_HPP -#define SYCL_FFT_COMMON_WORKGROUP_HPP +#ifndef PORTFFT_COMMON_WORKGROUP_HPP +#define PORTFFT_COMMON_WORKGROUP_HPP #include #include #include -namespace sycl_fft { +namespace portfft { /** * Calculates FFT using Bailey 4 step algorithm. @@ -122,6 +122,6 @@ __attribute__((always_inline)) inline void wg_dft(T* loc, T* loc_twiddles, const } } -} // namespace sycl_fft +} // namespace portfft #endif diff --git a/src/common/workitem.hpp b/src/common/workitem.hpp index 43280b01..4a8d309d 100644 --- a/src/common/workitem.hpp +++ b/src/common/workitem.hpp @@ -14,19 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_WORKITEM_HPP -#define SYCL_FFT_COMMON_WORKITEM_HPP +#ifndef PORTFFT_COMMON_WORKITEM_HPP +#define PORTFFT_COMMON_WORKITEM_HPP #include #include #include #include -namespace sycl_fft { +namespace portfft { // forward declaration template @@ -169,7 +169,7 @@ template constexpr bool fits_in_wi(T_index N) { T_index N_complex = N + wi_temps(N); T_index complex_size = 2 * sizeof(Scalar); - T_index register_space = SYCLFFT_TARGET_REGS_PER_WI * 4; + T_index register_space = PORTFFT_TARGET_REGS_PER_WI * 4; return N_complex * complex_size <= register_space; } @@ -204,6 +204,6 @@ __attribute__((always_inline)) inline void wi_dft(const T* in, T* out) { } } -}; // namespace sycl_fft +}; // namespace portfft #endif diff --git a/src/descriptor.hpp b/src/descriptor.hpp index 1c7d4893..7456cecb 100644 --- a/src/descriptor.hpp +++ b/src/descriptor.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_DESCRIPTOR_HPP -#define SYCL_FFT_DESCRIPTOR_HPP +#ifndef PORTFFT_DESCRIPTOR_HPP +#define PORTFFT_DESCRIPTOR_HPP #include #include @@ -33,7 +33,7 @@ #include #include -namespace sycl_fft { +namespace portfft { namespace detail { @@ -70,7 +70,7 @@ compile time constant. The one for subgroup implementation also calls `cross_sg_ cross-subgroup factor of FFT size compile time constant. They do that by using a switch on the FFT size for one workitem, before calling `workitem_impl`, `subgroup_impl` or `workgroup_impl` . The `_impl` functions take the FFT size for one workitem as a template parameter. Only the calls that are determined to fit into available registers (depending -on the value of SYCLFFT_TARGET_REGS_PER_WI macro) are actually instantiated. +on the value of PORTFFT_TARGET_REGS_PER_WI macro) are actually instantiated. The `_impl` functions iterate over the batch of problems, loading data for each first in local memory then from there into private one. This is done in these two steps to avoid non-coalesced global memory @@ -147,22 +147,22 @@ class committed_descriptor { int SubgroupSize> void get_ids(std::vector& ids) { // if not used, some kernels might be optimized away in AOT compilation and not available here -#define SYCL_FFT_GET_ID(DIRECTION, MEMORY, TRANSPOSE) \ +#define PORTFFT_GET_ID(DIRECTION, MEMORY, TRANSPOSE) \ try { \ ids.push_back(sycl::get_kernel_id>()); \ } catch (...) { \ } - SYCL_FFT_GET_ID(direction::FORWARD, detail::memory::BUFFER, detail::transpose::NOT_TRANSPOSED) - SYCL_FFT_GET_ID(direction::BACKWARD, detail::memory::BUFFER, detail::transpose::NOT_TRANSPOSED) - SYCL_FFT_GET_ID(direction::FORWARD, detail::memory::USM, detail::transpose::NOT_TRANSPOSED) - SYCL_FFT_GET_ID(direction::BACKWARD, detail::memory::USM, detail::transpose::NOT_TRANSPOSED) - SYCL_FFT_GET_ID(direction::FORWARD, detail::memory::BUFFER, detail::transpose::TRANSPOSED) - SYCL_FFT_GET_ID(direction::BACKWARD, detail::memory::BUFFER, detail::transpose::TRANSPOSED) - SYCL_FFT_GET_ID(direction::FORWARD, detail::memory::USM, detail::transpose::TRANSPOSED) - SYCL_FFT_GET_ID(direction::BACKWARD, detail::memory::USM, detail::transpose::TRANSPOSED) + PORTFFT_GET_ID(direction::FORWARD, detail::memory::BUFFER, detail::transpose::NOT_TRANSPOSED) + PORTFFT_GET_ID(direction::BACKWARD, detail::memory::BUFFER, detail::transpose::NOT_TRANSPOSED) + PORTFFT_GET_ID(direction::FORWARD, detail::memory::USM, detail::transpose::NOT_TRANSPOSED) + PORTFFT_GET_ID(direction::BACKWARD, detail::memory::USM, detail::transpose::NOT_TRANSPOSED) + PORTFFT_GET_ID(direction::FORWARD, detail::memory::BUFFER, detail::transpose::TRANSPOSED) + PORTFFT_GET_ID(direction::BACKWARD, detail::memory::BUFFER, detail::transpose::TRANSPOSED) + PORTFFT_GET_ID(direction::FORWARD, detail::memory::USM, detail::transpose::TRANSPOSED) + PORTFFT_GET_ID(direction::BACKWARD, detail::memory::USM, detail::transpose::TRANSPOSED) -#undef SYCL_FFT_GET_ID +#undef PORTFFT_GET_ID } /** @@ -326,11 +326,11 @@ class committed_descriptor { n_compute_units(dev.get_info()), supported_sg_sizes(dev.get_info()), // compile the kernels - exec_bundle(build_w_spec_const()), - num_sgs_per_wg(SYCLFFT_SGS_IN_WG) { + exec_bundle(build_w_spec_const()), + num_sgs_per_wg(PORTFFT_SGS_IN_WG) { // TODO: check and support all the parameter values if (params.lengths.size() != 1) { - throw std::runtime_error("SYCL-FFT only supports 1D FFT for now"); + throw std::runtime_error("portFFT only supports 1D FFT for now"); } // get some properties we will use for tuning @@ -513,7 +513,7 @@ class committed_descriptor { */ template sycl::event dispatch_kernel(const T_in in, T_out out, const std::vector& dependencies = {}) { - return dispatch_kernel_helper(in, out, dependencies); + return dispatch_kernel_helper(in, out, dependencies); } /** @@ -600,7 +600,7 @@ class committed_descriptor { } }; -#undef SYCL_FFT_DISPATCH +#undef PORTFFT_DISPATCH /** * A descriptor containing FFT problem parameters. @@ -649,6 +649,6 @@ struct descriptor { } }; -} // namespace sycl_fft +} // namespace portfft #endif diff --git a/src/dispatcher/subgroup_dispatcher.hpp b/src/dispatcher/subgroup_dispatcher.hpp index 43039986..e34ed7ab 100644 --- a/src/dispatcher/subgroup_dispatcher.hpp +++ b/src/dispatcher/subgroup_dispatcher.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_DISPATCHER_SUBGROUP_DISPATCHER_HPP -#define SYCL_FFT_DISPATCHER_SUBGROUP_DISPATCHER_HPP +#ifndef PORTFFT_DISPATCHER_SUBGROUP_DISPATCHER_HPP +#define PORTFFT_DISPATCHER_SUBGROUP_DISPATCHER_HPP #include #include @@ -28,7 +28,7 @@ #include #include -namespace sycl_fft { +namespace portfft { namespace detail { // specialization constants constexpr static sycl::specialization_id factor_wi_spec_const{}; @@ -344,7 +344,7 @@ struct committed_descriptor::num_scalars_in_local_mem_struct::in std::size_t padded_fft_bytes = detail::pad_local(2 * desc.params.lengths[0]) * sizeof(Scalar); std::size_t max_batches_in_local_mem = (desc.local_memory_size - twiddle_bytes) / padded_fft_bytes; std::size_t batches_per_sg = static_cast(desc.used_sg_size) / 2; - std::size_t num_sgs_required = std::min(static_cast(SYCLFFT_SGS_IN_WG), + std::size_t num_sgs_required = std::min(static_cast(PORTFFT_SGS_IN_WG), std::max(1ul, max_batches_in_local_mem / batches_per_sg)); desc.num_sgs_per_wg = num_sgs_required; std::size_t num_batches_in_local_mem = static_cast(desc.used_sg_size) * desc.num_sgs_per_wg / 2; @@ -354,12 +354,12 @@ struct committed_descriptor::num_scalars_in_local_mem_struct::in std::size_t n_ffts_per_sg = static_cast(desc.used_sg_size / factor_sg); std::size_t num_scalars_per_sg = detail::pad_local(2 * desc.params.lengths[0] * n_ffts_per_sg); std::size_t max_n_sgs = desc.local_memory_size / sizeof(Scalar) / num_scalars_per_sg; - desc.num_sgs_per_wg = std::min(static_cast(SYCLFFT_SGS_IN_WG), std::max(1ul, max_n_sgs)); + desc.num_sgs_per_wg = std::min(static_cast(PORTFFT_SGS_IN_WG), std::max(1ul, max_n_sgs)); return num_scalars_per_sg * desc.num_sgs_per_wg; } } }; -} // namespace sycl_fft +} // namespace portfft -#endif // SYCL_FFT_DISPATCHER_SUBGROUP_DISPATCHER_HPP +#endif // PORTFFT_DISPATCHER_SUBGROUP_DISPATCHER_HPP diff --git a/src/dispatcher/workgroup_dispatcher.hpp b/src/dispatcher/workgroup_dispatcher.hpp index 3fe6c8cf..1f329eb0 100644 --- a/src/dispatcher/workgroup_dispatcher.hpp +++ b/src/dispatcher/workgroup_dispatcher.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_DISPATCHER_WORKGROUP_DISPATCHER_HPP -#define SYCL_FFT_DISPATCHER_WORKGROUP_DISPATCHER_HPP +#ifndef PORTFFT_DISPATCHER_WORKGROUP_DISPATCHER_HPP +#define PORTFFT_DISPATCHER_WORKGROUP_DISPATCHER_HPP #include #include @@ -28,7 +28,7 @@ #include #include -namespace sycl_fft { +namespace portfft { namespace detail { // specialization constants constexpr static sycl::specialization_id workgroup_spec_const_fft_size{}; @@ -47,8 +47,8 @@ std::size_t get_global_size_workgroup(std::size_t n_transforms, std::size_t subg std::size_t n_compute_units) { // TODO should this really be just a copy of workitem's? std::size_t maximum_n_sgs = 8 * n_compute_units * 64; - std::size_t maximum_n_wgs = maximum_n_sgs / SYCLFFT_SGS_IN_WG; - std::size_t wg_size = subgroup_size * SYCLFFT_SGS_IN_WG; + std::size_t maximum_n_wgs = maximum_n_sgs / PORTFFT_SGS_IN_WG; + std::size_t wg_size = subgroup_size * PORTFFT_SGS_IN_WG; std::size_t n_wgs_we_can_utilize = divideCeil(n_transforms, wg_size); return wg_size * sycl::min(maximum_n_wgs, n_wgs_we_can_utilize); @@ -188,7 +188,7 @@ struct committed_descriptor::run_kernel_struct(out, cgh); sycl::local_accessor loc(local_elements, cgh); cgh.parallel_for>( - sycl::nd_range<1>{{global_size}, {SubgroupSize * SYCLFFT_SGS_IN_WG}}, [= + sycl::nd_range<1>{{global_size}, {SubgroupSize * PORTFFT_SGS_IN_WG}}, [= ](sycl::nd_item<1> it, sycl::kernel_handler kh) [[sycl::reqd_sub_group_size(SubgroupSize)]] { std::size_t fft_size = kh.get_specialization_constant(); detail::workgroup_dispatch_impl( @@ -217,7 +217,7 @@ struct committed_descriptor::num_scalars_in_local_mem_struct::in std::size_t M = static_cast(desc.factors[2] * desc.factors[3]); // working memory + twiddles for subgroup impl for the two sizes if (TransposeIn == detail::transpose::TRANSPOSED) { - std::size_t num_batches_in_local_mem = static_cast(desc.used_sg_size) * SYCLFFT_SGS_IN_WG / 2; + std::size_t num_batches_in_local_mem = static_cast(desc.used_sg_size) * PORTFFT_SGS_IN_WG / 2; return detail::pad_local(2 * fft_size * num_batches_in_local_mem) + 2 * (M + N); } else { return detail::pad_local(2 * fft_size) + 2 * (M + N); @@ -273,6 +273,6 @@ struct committed_descriptor::calculate_twiddles_struct::inner #include @@ -28,7 +28,7 @@ #include #include -namespace sycl_fft { +namespace portfft { namespace detail { // specialization constants constexpr static sycl::specialization_id workitem_spec_const_fft_size{}; @@ -200,7 +200,7 @@ struct committed_descriptor::num_scalars_in_local_mem_struct::in std::size_t num_scalars_per_sg = detail::pad_local(2 * desc.params.lengths[0] * static_cast(desc.used_sg_size)); std::size_t max_n_sgs = desc.local_memory_size / sizeof(Scalar) / num_scalars_per_sg; - desc.num_sgs_per_wg = std::min(static_cast(SYCLFFT_SGS_IN_WG), std::max(1ul, max_n_sgs)); + desc.num_sgs_per_wg = std::min(static_cast(PORTFFT_SGS_IN_WG), std::max(1ul, max_n_sgs)); return num_scalars_per_sg * desc.num_sgs_per_wg; } }; @@ -211,6 +211,6 @@ struct committed_descriptor::calculate_twiddles_struct::inner -namespace sycl_fft { +namespace portfft { template struct get_real { @@ -47,6 +47,6 @@ struct get_domain> { static constexpr domain value = domain::COMPLEX; }; -} // namespace sycl_fft +} // namespace portfft #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1d011f6b..a5e72a2e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -17,7 +17,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * diff --git a/test/bench/CMakeLists.txt b/test/bench/CMakeLists.txt index b438bccb..f3d26983 100644 --- a/test/bench/CMakeLists.txt +++ b/test/bench/CMakeLists.txt @@ -17,7 +17,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * @@ -75,11 +75,11 @@ function(add_benchmark target source_file) target_link_libraries(${target} PRIVATE benchmark::benchmark Threads::Threads - sycl_fft_warnings + portfft_warnings ) - # get target include directories from sycl-fft for the direction enum - get_target_property(SYCLFFT_INCLUDES sycl_fft INTERFACE_INCLUDE_DIRECTORIES) + # get target include directories from portfft for the direction enum + get_target_property(SYCLFFT_INCLUDES portfft INTERFACE_INCLUDE_DIRECTORIES) target_include_directories(${target} PRIVATE ${PROJECT_SOURCE_DIR}/test/common @@ -87,7 +87,7 @@ function(add_benchmark target source_file) ${SYCLFFT_INCLUDES} ) if(SYCLFFT_VERIFY_BENCHMARK) - target_compile_definitions(${target} PRIVATE SYCLFFT_VERIFY_BENCHMARK) + target_compile_definitions(${target} PRIVATE PORTFFT_VERIFY_BENCHMARK) target_include_directories(${target} PRIVATE ${BENCHMARK_REFERENCE_DATA_HEADER_DIR}) add_dependencies(${target} benchmark_reference_data_header) if(SYCLFFT_GENERATE_BENCH_REFERENCE_AT_BUILD_TIME) @@ -99,4 +99,4 @@ function(add_benchmark target source_file) ) endfunction() -add_subdirectory(sycl-fft) +add_subdirectory(portfft) diff --git a/test/bench/sycl-fft/CMakeLists.txt b/test/bench/portfft/CMakeLists.txt similarity index 93% rename from test/bench/sycl-fft/CMakeLists.txt rename to test/bench/portfft/CMakeLists.txt index 320e1d6d..930cd098 100644 --- a/test/bench/sycl-fft/CMakeLists.txt +++ b/test/bench/portfft/CMakeLists.txt @@ -17,7 +17,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * @@ -37,5 +37,5 @@ foreach(BENCHMARK_FILE ${SYCLFFT_BENCHMARKS}) get_filename_component(BENCHMARK_TARGET ${BENCHMARK_FILE} NAME_WE) add_benchmark(${BENCHMARK_TARGET} ${BENCHMARK_FILE}) add_sycl_to_target(TARGET ${BENCHMARK_TARGET}) - target_link_libraries(${BENCHMARK_TARGET} PRIVATE sycl_fft) + target_link_libraries(${BENCHMARK_TARGET} PRIVATE portfft) endforeach() diff --git a/test/bench/sycl-fft/bench_float.cpp b/test/bench/portfft/bench_float.cpp similarity index 86% rename from test/bench/sycl-fft/bench_float.cpp rename to test/bench/portfft/bench_float.cpp index dcedc5ff..885cbfaa 100644 --- a/test/bench/sycl-fft/bench_float.cpp +++ b/test/bench/portfft/bench_float.cpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ @@ -26,10 +26,10 @@ template void bench_dft(sycl::queue q, sycl::queue profiling_q, const std::string& suffix, const std::vector& lengths, std::size_t batch) { - using ftype = typename sycl_fft::get_real::type; - constexpr sycl_fft::domain domain = sycl_fft::get_domain::value; + using ftype = typename portfft::get_real::type; + constexpr portfft::domain domain = portfft::get_domain::value; - sycl_fft::descriptor desc(lengths); + portfft::descriptor desc(lengths); desc.number_of_transforms = batch; register_host_device_benchmark(suffix, q, profiling_q, desc); @@ -45,7 +45,7 @@ int main(int argc, char** argv) { print_device(q); // Benchmark configurations must match with the ones in test/bench/utils/reference_dft_set.hpp - // Configurations are progressively added as SYCL-FFT supports more of them. + // Configurations are progressively added as portFFT supports more of them. bench_dft>(q, profiling_q, "small_1d", {16}, 8 * 1024 * 1024); bench_dft>(q, profiling_q, "medium_small_1d", {256}, 512 * 1024); bench_dft>(q, profiling_q, "medium_large_1d", {4096}, 32 * 1024); diff --git a/test/bench/sycl-fft/bench_manual_double.cpp b/test/bench/portfft/bench_manual_double.cpp similarity index 97% rename from test/bench/sycl-fft/bench_manual_double.cpp rename to test/bench/portfft/bench_manual_double.cpp index 903cbbb6..1b47563f 100644 --- a/test/bench/sycl-fft/bench_manual_double.cpp +++ b/test/bench/portfft/bench_manual_double.cpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ diff --git a/test/bench/sycl-fft/bench_manual_float.cpp b/test/bench/portfft/bench_manual_float.cpp similarity index 97% rename from test/bench/sycl-fft/bench_manual_float.cpp rename to test/bench/portfft/bench_manual_float.cpp index b843fbd5..79a59cbb 100644 --- a/test/bench/sycl-fft/bench_manual_float.cpp +++ b/test/bench/portfft/bench_manual_float.cpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ diff --git a/test/bench/sycl-fft/launch_bench.hpp b/test/bench/portfft/launch_bench.hpp similarity index 77% rename from test/bench/sycl-fft/launch_bench.hpp rename to test/bench/portfft/launch_bench.hpp index 6517f066..4fd30d2b 100644 --- a/test/bench/sycl-fft/launch_bench.hpp +++ b/test/bench/portfft/launch_bench.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_BENCH_LAUNCH_BENCH_HPP -#define SYCL_FFT_BENCH_LAUNCH_BENCH_HPP +#ifndef PORTFFT_BENCH_LAUNCH_BENCH_HPP +#define PORTFFT_BENCH_LAUNCH_BENCH_HPP #include #include @@ -27,7 +27,7 @@ #include -#include +#include #include "bench_utils.hpp" #include "device_number_generator.hpp" @@ -46,11 +46,11 @@ * @param desc Description of the FFT problem * @param runs Number of asynchronous compute in one GBench iteration */ -template -void bench_dft_average_host_time_impl(benchmark::State& state, sycl::queue q, sycl_fft::descriptor desc, +template +void bench_dft_average_host_time_impl(benchmark::State& state, sycl::queue q, portfft::descriptor desc, std::size_t runs) { using complex_type = std::complex; - using forward_t = std::conditional_t; + using forward_t = std::conditional_t; std::size_t N = desc.get_total_length(); std::size_t N_transforms = desc.number_of_transforms; std::size_t num_elements = N * N_transforms; @@ -59,29 +59,29 @@ void bench_dft_average_host_time_impl(benchmark::State& state, sycl::queue q, sy forward_t* in_dev = sycl::malloc_device(num_elements, q); complex_type* out_dev = - desc.placement == sycl_fft::placement::IN_PLACE ? nullptr : sycl::malloc_device(num_elements, q); + desc.placement == portfft::placement::IN_PLACE ? nullptr : sycl::malloc_device(num_elements, q); auto committed = desc.commit(q); q.wait(); -#ifdef SYCLFFT_VERIFY_BENCHMARK +#ifdef PORTFFT_VERIFY_BENCHMARK auto verifSpec = get_matching_spec(verification_data, desc); auto host_input = verifSpec.load_data_time(desc); q.copy(host_input.data(), in_dev, num_elements).wait(); -#endif // SYCLFFT_VERIFY_BENCHMARK +#endif // PORTFFT_VERIFY_BENCHMARK // warmup - auto event = desc.placement == sycl_fft::placement::IN_PLACE ? committed.compute_forward(in_dev) - : committed.compute_forward(in_dev, out_dev); + auto event = desc.placement == portfft::placement::IN_PLACE ? committed.compute_forward(in_dev) + : committed.compute_forward(in_dev, out_dev); event.wait(); -#ifdef SYCLFFT_VERIFY_BENCHMARK +#ifdef PORTFFT_VERIFY_BENCHMARK std::vector host_output(num_elements); - q.copy(desc.placement == sycl_fft::placement::IN_PLACE ? reinterpret_cast(in_dev) : out_dev, + q.copy(desc.placement == portfft::placement::IN_PLACE ? reinterpret_cast(in_dev) : out_dev, host_output.data(), num_elements) .wait(); - verifSpec.verify_dft(desc, host_output, sycl_fft::direction::FORWARD, 1e-2); -#endif // SYCLFFT_VERIFY_BENCHMARK + verifSpec.verify_dft(desc, host_output, portfft::direction::FORWARD, 1e-2); +#endif // PORTFFT_VERIFY_BENCHMARK std::vector dependencies; dependencies.reserve(1); @@ -92,7 +92,7 @@ void bench_dft_average_host_time_impl(benchmark::State& state, sycl::queue q, sy std::chrono::time_point start; std::chrono::time_point end; - if (desc.placement == sycl_fft::placement::IN_PLACE) { + if (desc.placement == portfft::placement::IN_PLACE) { start = std::chrono::high_resolution_clock::now(); dependencies.emplace_back(committed.compute_forward(in_dev)); for (std::size_t r = 1; r != runs; r += 1) { @@ -123,8 +123,8 @@ void bench_dft_average_host_time_impl(benchmark::State& state, sycl::queue q, sy * Separate impl function to handle catching exceptions * @see bench_dft_average_host_time_impl */ -template -void bench_dft_average_host_time(benchmark::State& state, sycl::queue q, sycl_fft::descriptor desc) { +template +void bench_dft_average_host_time(benchmark::State& state, sycl::queue q, portfft::descriptor desc) { try { bench_dft_average_host_time_impl(state, q, desc, runs_to_average); } catch (std::exception& e) { @@ -143,10 +143,10 @@ void bench_dft_average_host_time(benchmark::State& state, sycl::queue q, sycl_ff * @param q Queue to use, \p enable_profiling property must be set * @param desc Description of the FFT problem */ -template -void bench_dft_device_time_impl(benchmark::State& state, sycl::queue q, sycl_fft::descriptor desc) { +template +void bench_dft_device_time_impl(benchmark::State& state, sycl::queue q, portfft::descriptor desc) { using complex_type = std::complex; - using forward_t = std::conditional_t; + using forward_t = std::conditional_t; if (!q.has_property()) { throw std::runtime_error("Queue does not have the profiling property"); } @@ -159,31 +159,31 @@ void bench_dft_device_time_impl(benchmark::State& state, sycl::queue q, sycl_fft forward_t* in_dev = sycl::malloc_device(num_elements, q); complex_type* out_dev = - desc.placement == sycl_fft::placement::IN_PLACE ? nullptr : sycl::malloc_device(num_elements, q); + desc.placement == portfft::placement::IN_PLACE ? nullptr : sycl::malloc_device(num_elements, q); auto committed = desc.commit(q); q.wait(); -#ifdef SYCLFFT_VERIFY_BENCHMARK +#ifdef PORTFFT_VERIFY_BENCHMARK auto verifSpec = get_matching_spec(verification_data, desc); auto host_input = verifSpec.load_data_time(desc); q.copy(host_input.data(), in_dev, num_elements).wait(); -#endif // SYCLFFT_VERIFY_BENCHMARK +#endif // PORTFFT_VERIFY_BENCHMARK auto compute = [&]() { - return desc.placement == sycl_fft::placement::IN_PLACE ? committed.compute_forward(in_dev) - : committed.compute_forward(in_dev, out_dev); + return desc.placement == portfft::placement::IN_PLACE ? committed.compute_forward(in_dev) + : committed.compute_forward(in_dev, out_dev); }; // warmup compute().wait(); -#ifdef SYCLFFT_VERIFY_BENCHMARK +#ifdef PORTFFT_VERIFY_BENCHMARK std::vector host_output(num_elements); - q.copy(desc.placement == sycl_fft::placement::IN_PLACE ? reinterpret_cast(in_dev) : out_dev, + q.copy(desc.placement == portfft::placement::IN_PLACE ? reinterpret_cast(in_dev) : out_dev, host_output.data(), num_elements) .wait(); - verifSpec.verify_dft(desc, host_output, sycl_fft::direction::FORWARD, 1e-2); -#endif // SYCLFFT_VERIFY_BENCHMARK + verifSpec.verify_dft(desc, host_output, portfft::direction::FORWARD, 1e-2); +#endif // PORTFFT_VERIFY_BENCHMARK for (auto _ : state) { sycl::event e = compute(); @@ -203,8 +203,8 @@ void bench_dft_device_time_impl(benchmark::State& state, sycl::queue q, sycl_fft * Separate impl function to handle catching exceptions * @see bench_dft_device_time_impl */ -template -void bench_dft_device_time(benchmark::State& state, sycl::queue q, sycl_fft::descriptor desc) { +template +void bench_dft_device_time(benchmark::State& state, sycl::queue q, portfft::descriptor desc) { try { bench_dft_device_time_impl(state, q, desc); } catch (std::exception& e) { @@ -223,15 +223,15 @@ void bench_dft_device_time(benchmark::State& state, sycl::queue q, sycl_fft::des * @param profiling_q Queue used for profiling the time on the device * @param desc Description of the FFT problem */ -template +template void register_host_device_benchmark(const std::string& suffix, sycl::queue q, sycl::queue profiling_q, - const sycl_fft::descriptor& desc) { - static_assert(Domain == sycl_fft::domain::REAL || Domain == sycl_fft::domain::COMPLEX, "Unsupported domain"); + const portfft::descriptor& desc) { + static_assert(Domain == portfft::domain::REAL || Domain == portfft::domain::COMPLEX, "Unsupported domain"); static_assert(std::is_same_v || std::is_same_v, "Unsupported precision"); // Print descriptor's parameters relevant for benchmarks // Additional parameters could be added to the suffix if needed auto print_desc = [&](std::ostream& name) { - name << "d=" << (Domain == sycl_fft::domain::REAL ? "re" : "cpx"); + name << "d=" << (Domain == portfft::domain::REAL ? "re" : "cpx"); name << ",prec=" << (std::is_same_v ? "single" : "double"); name << ",n=["; for (std::size_t i = 0; i < desc.lengths.size(); ++i) { @@ -255,4 +255,4 @@ void register_host_device_benchmark(const std::string& suffix, sycl::queue q, sy ->UseManualTime(); } -#endif // SYCL_FFT_BENCH_LAUNCH_BENCH_HPP +#endif // PORTFFT_BENCH_LAUNCH_BENCH_HPP diff --git a/test/bench/sycl-fft/register_manual_bench.hpp b/test/bench/portfft/register_manual_bench.hpp similarity index 95% rename from test/bench/sycl-fft/register_manual_bench.hpp rename to test/bench/portfft/register_manual_bench.hpp index 505f572b..f3387c83 100644 --- a/test/bench/sycl-fft/register_manual_bench.hpp +++ b/test/bench/portfft/register_manual_bench.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_BENCH_REGISTER_MANUAL_BENCH_HPP -#define SYCL_FFT_BENCH_REGISTER_MANUAL_BENCH_HPP +#ifndef PORTFFT_BENCH_REGISTER_MANUAL_BENCH_HPP +#define PORTFFT_BENCH_REGISTER_MANUAL_BENCH_HPP #include #include @@ -157,8 +157,8 @@ std::vector get_vec_unsigned(const std::string_view& key, std::stri return vec; } -template -void fill_descriptor(arg_map_t& arg_map, sycl_fft::descriptor& desc) { +template +void fill_descriptor(arg_map_t& arg_map, portfft::descriptor& desc) { std::string_view arg = get_arg(arg_map, BATCH); if (!arg.empty()) { desc.number_of_transforms = get_unsigned("batch", arg); @@ -193,18 +193,18 @@ void fill_descriptor(arg_map_t& arg_map, sycl_fft::descriptor& de arg = get_arg(arg_map, STORAGE); if (arg == "complex" || arg == "cpx") { - desc.complex_storage = sycl_fft::complex_storage::COMPLEX; + desc.complex_storage = portfft::complex_storage::COMPLEX; } else if (arg == "real_real" || arg == "rr") { - desc.complex_storage = sycl_fft::complex_storage::REAL_REAL; + desc.complex_storage = portfft::complex_storage::REAL_REAL; } else if (!arg.empty()) { throw invalid_value{"storage", arg}; } arg = get_arg(arg_map, PLACEMENT); if (arg == "in_place" || arg == "ip") { - desc.placement = sycl_fft::placement::IN_PLACE; + desc.placement = portfft::placement::IN_PLACE; } else if (arg == "out_of_place" || arg == "oop") { - desc.placement = sycl_fft::placement::OUT_OF_PLACE; + desc.placement = portfft::placement::OUT_OF_PLACE; } else if (!arg.empty()) { throw invalid_value{"placement", arg}; } @@ -212,7 +212,7 @@ void fill_descriptor(arg_map_t& arg_map, sycl_fft::descriptor& de template void register_manual_benchmark(sycl::queue q, sycl::queue profiling_q, const std::string_view& desc_str) { - using namespace sycl_fft; + using namespace portfft; arg_map_t arg_map = get_arg_map(desc_str); // Set the domain and lengths first to create the descriptor @@ -324,4 +324,4 @@ int main_manual_bench(int argc, char** argv) { return 0; } -#endif // SYCL_FFT_BENCH_REGISTER_MANUAL_BENCH_HPP +#endif // PORTFFT_BENCH_REGISTER_MANUAL_BENCH_HPP diff --git a/test/bench/utils/bench_utils.hpp b/test/bench/utils/bench_utils.hpp index 7be6789d..1730a7ee 100644 --- a/test/bench/utils/bench_utils.hpp +++ b/test/bench/utils/bench_utils.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCLFFT_BENCH_BENCH_UTILS_HPP -#define SYCLFFT_BENCH_BENCH_UTILS_HPP +#ifndef PORTFFT_BENCH_BENCH_UTILS_HPP +#define PORTFFT_BENCH_BENCH_UTILS_HPP #include #include @@ -31,11 +31,11 @@ #include "enums.hpp" -#ifdef SYCLFFT_VERIFY_BENCHMARK +#ifdef PORTFFT_VERIFY_BENCHMARK // The following file in generated during the build and located at // ${BUILD_DIR}/ref_data_include/ #include -#endif // SYCLFFT_VERIFY_BENCHMARK +#endif // PORTFFT_VERIFY_BENCHMARK /** * number of runs to do when doing an average of many host runs. @@ -51,4 +51,4 @@ inline void handle_exception(benchmark::State& state, std::exception& e) { state.SkipWithError(msg.c_str()); } -#endif // SYCLFFT_BENCH_BENCH_UTILS_HPP +#endif // PORTFFT_BENCH_BENCH_UTILS_HPP diff --git a/test/bench/utils/device_number_generator.hpp b/test/bench/utils/device_number_generator.hpp index d35098a5..64b12601 100644 --- a/test/bench/utils/device_number_generator.hpp +++ b/test/bench/utils/device_number_generator.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_BENCH_DEVICE_NUMBER_GENERATOR_HPP -#define SYCL_FFT_BENCH_DEVICE_NUMBER_GENERATOR_HPP +#ifndef PORTFFT_BENCH_DEVICE_NUMBER_GENERATOR_HPP +#define PORTFFT_BENCH_DEVICE_NUMBER_GENERATOR_HPP #include #include @@ -57,4 +57,4 @@ void memFill(T* input, sycl::queue& queue, std::size_t num_elements) { queue.wait(); } -#endif // SYCL_FFT_BENCH_DEVICE_NUMBER_GENERATOR_HPP +#endif // PORTFFT_BENCH_DEVICE_NUMBER_GENERATOR_HPP diff --git a/test/bench/utils/ops_estimate.hpp b/test/bench/utils/ops_estimate.hpp index f503f7e0..24564365 100644 --- a/test/bench/utils/ops_estimate.hpp +++ b/test/bench/utils/ops_estimate.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_BENCH_OPS_ESTIMATE_HPP -#define SYCL_FFT_BENCH_OPS_ESTIMATE_HPP +#ifndef PORTFFT_BENCH_OPS_ESTIMATE_HPP +#define PORTFFT_BENCH_OPS_ESTIMATE_HPP #include @@ -49,4 +49,4 @@ inline std::size_t global_mem_transactions(std::size_t batches, std::size_t num_ return batches * (sizeof(TypeIn) * num_in + sizeof(TypeOut) * num_out); } -#endif // SYCL_FFT_BENCH_OPS_ESTIMATE_HPP +#endif // PORTFFT_BENCH_OPS_ESTIMATE_HPP diff --git a/test/bench/utils/reference_dft_set.hpp b/test/bench/utils/reference_dft_set.hpp index 3285c6e0..06fdc0b3 100644 --- a/test/bench/utils/reference_dft_set.hpp +++ b/test/bench/utils/reference_dft_set.hpp @@ -17,8 +17,8 @@ * A set of common reference DFT benchmarks. * **************************************************************************/ -#ifndef SYCL_FFT_REFERENCE_DFT_SET_HPP -#define SYCL_FFT_REFERENCE_DFT_SET_HPP +#ifndef PORTFFT_REFERENCE_DFT_SET_HPP +#define PORTFFT_REFERENCE_DFT_SET_HPP /** * A common set of reference benchmarks. To use, two macros must be defined: @@ -48,7 +48,7 @@ // 10. large real 1D fits in global memory Cooley-Tukey (batch=2*1024 N=128*1024) // 11. small real 3D (batch=1024 N=64x64x64) // -// Configurations must match with the ones in test/bench/sycl-fft/launch_bench.hpp +// Configurations must match with the ones in test/bench/portfft/launch_bench.hpp // clang-format on /** @@ -116,4 +116,4 @@ void register_real_float_benchmark_set(std::string prefix, Args&&... args) { // clang-format on } -#endif // SYCL_FFT_REFERENCE_DFT_SET_HPP +#endif // PORTFFT_REFERENCE_DFT_SET_HPP diff --git a/test/bench/utils/sycl_utils.hpp b/test/bench/utils/sycl_utils.hpp index 005a8df1..21eed6f6 100644 --- a/test/bench/utils/sycl_utils.hpp +++ b/test/bench/utils/sycl_utils.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_TEST_BENCH_UTILS_SYCL_UTILS_HPP -#define SYCL_FFT_TEST_BENCH_UTILS_SYCL_UTILS_HPP +#ifndef PORTFFT_TEST_BENCH_UTILS_SYCL_UTILS_HPP +#define PORTFFT_TEST_BENCH_UTILS_SYCL_UTILS_HPP #include @@ -76,4 +76,4 @@ void print_device(sycl::queue queue) { benchmark::AddCustomContext("Subgroup sizes", subgroup_sizes_str.str()); } -#endif // SYCL_FFT_TEST_BENCH_UTILS_SYCL_UTILS_HPP +#endif // PORTFFT_TEST_BENCH_UTILS_SYCL_UTILS_HPP diff --git a/test/common/number_generators.hpp b/test/common/number_generators.hpp index 8a033c04..7c238c28 100644 --- a/test/common/number_generators.hpp +++ b/test/common/number_generators.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_NUMBER_GENERATORS_HPP -#define SYCL_FFT_COMMON_NUMBER_GENERATORS_HPP +#ifndef PORTFFT_COMMON_NUMBER_GENERATORS_HPP +#define PORTFFT_COMMON_NUMBER_GENERATORS_HPP #include #include @@ -50,4 +50,4 @@ void populate_with_random(std::vector>& in, T lowerLimit = T(-1. } } -#endif // SYCL_FFT_COMMON_NUMBER_GENERATORS_HPP +#endif // PORTFFT_COMMON_NUMBER_GENERATORS_HPP diff --git a/test/common/reference_data_wrangler.hpp b/test/common/reference_data_wrangler.hpp index 55e12929..53302841 100644 --- a/test/common/reference_data_wrangler.hpp +++ b/test/common/reference_data_wrangler.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_COMMON_REFERENCE_DATA_WRANGLER_HPP -#define SYCL_FFT_COMMON_REFERENCE_DATA_WRANGLER_HPP +#ifndef PORTFFT_COMMON_REFERENCE_DATA_WRANGLER_HPP +#define PORTFFT_COMMON_REFERENCE_DATA_WRANGLER_HPP #include #include @@ -41,17 +41,17 @@ class verif_data_spec { /** * Constructor. Should only be needed by the python scripts that generate the reference data. */ - verif_data_spec(std::vector dftSize, std::size_t batch, std::string filePath, sycl_fft::domain domain) + verif_data_spec(std::vector dftSize, std::size_t batch, std::string filePath, portfft::domain domain) : dftSize(dftSize), batch(batch), filePath(filePath), domain(domain){}; - // The DFT real size - aka. sycl_fft::descriptor::lengths + // The DFT real size - aka. portfft::descriptor::lengths std::vector dftSize; // The number of transforms per compute call. std::size_t batch; // The path where the reference data is to be found. std::string filePath; // FFT domain - sycl_fft::domain domain; + portfft::domain domain; /** Load time-domain data from the reference file. * @@ -60,13 +60,12 @@ class verif_data_spec { * @param desc The descriptor that this data will be used for with. * @return Linearised time-domain data with batches equal to the descriptor. **/ - template - auto load_data_time(sycl_fft::descriptor& desc) { - using elem_t = std::conditional_t, Scalar>; + template + auto load_data_time(portfft::descriptor& desc) { + using elem_t = std::conditional_t, Scalar>; if (Domain != domain) { std::string errorStr = "Tried to read data as incorrect type. "; - errorStr = - errorStr + "Ref data is for " + (domain == sycl_fft::domain::COMPLEX ? "COMPLEX" : "REAL") + " domain."; + errorStr = errorStr + "Ref data is for " + (domain == portfft::domain::COMPLEX ? "COMPLEX" : "REAL") + " domain."; throw std::runtime_error(errorStr); } auto rawInputData = load_input_data(desc.number_of_transforms); @@ -81,8 +80,8 @@ class verif_data_spec { * @param desc The descriptor that this data will be used for with. * @return Linearised fourier-domain data with batches equal to the descriptor. **/ - template - std::vector> load_data_fourier(sycl_fft::descriptor& desc) { + template + std::vector> load_data_fourier(portfft::descriptor& desc) { auto rawInputData = load_output_data(desc.number_of_transforms); auto data = cast_data>(rawInputData); return data; @@ -92,7 +91,7 @@ class verif_data_spec { **/ inline std::vector fourier_domain_dims() { auto res = dftSize; - if (domain == sycl_fft::domain::REAL) { + if (domain == portfft::domain::REAL) { res.back() = (res.back() / 2 + 1) * 2; } return res; @@ -108,15 +107,15 @@ class verif_data_spec { * @param dir The DFT direction. * @param comparisonTolerance The tolerance for error. **/ - template - void verify_dft(sycl_fft::descriptor& desc, std::vector& hostOutput, sycl_fft::direction dir, + template + void verify_dft(portfft::descriptor& desc, std::vector& hostOutput, portfft::direction dir, const double comparisonTolerance) { if ((desc.lengths != dftSize) || (desc.number_of_transforms > batch) || (Domain != domain)) { throw std::runtime_error("Can't use this verification data to verify this DFT!"); } using complex_type = std::complex; - using forward_type = std::conditional_t; - const bool isForward = dir == sycl_fft::direction::FORWARD; + using forward_type = std::conditional_t; + const bool isForward = dir == portfft::direction::FORWARD; std::size_t descBatches = desc.number_of_transforms; auto dataShape = isForward ? dftSize : fourier_domain_dims(); std::size_t dftLen = std::accumulate(dataShape.cbegin(), dataShape.cend(), std::size_t(1), std::multiplies<>()); @@ -145,7 +144,7 @@ class verif_data_spec { // The number of doubles in the input data. inline std::size_t input_double_count() { return batch * std::accumulate(dftSize.cbegin(), dftSize.cend(), std::size_t(1), std::multiplies<>()) * - (domain == sycl_fft::domain::COMPLEX ? 2 : 1); + (domain == portfft::domain::COMPLEX ? 2 : 1); } // Cast double data read from file to [float, complex, double, complex] @@ -198,7 +197,7 @@ class verif_data_spec { } return load_file_data( 0, batchCount * std::accumulate(dftSize.cbegin(), dftSize.cend(), std::size_t(1), std::multiplies<>()) * - (domain == sycl_fft::domain::COMPLEX ? 2 : 1)); + (domain == portfft::domain::COMPLEX ? 2 : 1)); } /** Load fourier-domain data from the input file to a double vector. @@ -246,15 +245,15 @@ class verif_data_spec { } }; -/** Find a verif_data_spec that can be used to check an DFT described by a sycl_fft::descriptor. +/** Find a verif_data_spec that can be used to check an DFT described by a portfft::descriptor. * @tparam Scalar The descriptor scalar type. * @tparam Domain The descriptor domain. * @param verifData The generated verification data array - usually named "verification_data" * @param desc The descriptor we want data relevant for. */ -template +template verif_data_spec get_matching_spec(const std::vector& verifData, - sycl_fft::descriptor& desc) { + portfft::descriptor& desc) { for (auto& spec : verifData) { if ((desc.lengths == spec.dftSize) && (desc.number_of_transforms <= spec.batch) && (Domain == spec.domain)) { return spec; @@ -263,4 +262,4 @@ verif_data_spec get_matching_spec(const std::vector& verifData, throw std::runtime_error("Couldn't find matching specification."); } -#endif // SYCL_FFT_COMMON_REFERENCE_DATA_WRANGLER_HPP +#endif // PORTFFT_COMMON_REFERENCE_DATA_WRANGLER_HPP diff --git a/test/unit_test/CMakeLists.txt b/test/unit_test/CMakeLists.txt index 730d83b7..5003b44a 100644 --- a/test/unit_test/CMakeLists.txt +++ b/test/unit_test/CMakeLists.txt @@ -17,7 +17,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # * -# * Codeplay's SYCL-FFT +# * Codeplay's portFFT # * # * @filename CMakeLists.txt # * @@ -79,8 +79,8 @@ foreach(UNIT_TEST_FILE ${SYCLFFT_UNIT_TESTS}) target_link_libraries( ${TEST_TARGET} PRIVATE - sycl_fft - sycl_fft_warnings + portfft + portfft_warnings GTest::gtest_main Threads::Threads ) diff --git a/test/unit_test/fft_double.cpp b/test/unit_test/fft_double.cpp index 302e6e07..9867015c 100644 --- a/test/unit_test/fft_double.cpp +++ b/test/unit_test/fft_double.cpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ diff --git a/test/unit_test/fft_float.cpp b/test/unit_test/fft_float.cpp index 1870e544..d0fc5988 100644 --- a/test/unit_test/fft_float.cpp +++ b/test/unit_test/fft_float.cpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ diff --git a/test/unit_test/fft_test_utils.hpp b/test/unit_test/fft_test_utils.hpp index eb715f12..3f85ff82 100644 --- a/test/unit_test/fft_test_utils.hpp +++ b/test/unit_test/fft_test_utils.hpp @@ -14,21 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_UNIT_TEST_FFT_TEST_UTILS -#define SYCL_FFT_UNIT_TEST_FFT_TEST_UTILS +#ifndef PORTFFT_UNIT_TEST_FFT_TEST_UTILS +#define PORTFFT_UNIT_TEST_FFT_TEST_UTILS #include "instantiate_fft_tests.hpp" #include "utils.hpp" -#include +#include #include #include -using namespace sycl_fft; +using namespace portfft; using param_tuple = std::tuple; @@ -51,7 +51,7 @@ void transpose(TypeIn in, TypeOut& out, std::size_t FFT_size, std::size_t batch_ } } -template +template std::pair>, std::string> get_committed_descriptor( descriptor& desc, sycl::queue& queue) { try { @@ -66,7 +66,7 @@ template 0); { - std::vector instantiated_sizes{SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}; + std::vector instantiated_sizes{PORTFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}; if (!std::count(instantiated_sizes.cbegin(), instantiated_sizes.cend(), params.length)) { GTEST_SKIP(); } @@ -102,7 +102,7 @@ void check_fft_usm(test_params& params, sycl::queue& queue) { auto committed_descriptor = potential_committed_descriptor.first.value(); auto verifSpec = get_matching_spec(verification_data, desc); - if constexpr (Dir == sycl_fft::direction::FORWARD) { + if constexpr (Dir == portfft::direction::FORWARD) { host_input = verifSpec.template load_data_time(desc); } else { host_input = verifSpec.template load_data_fourier(desc); @@ -146,7 +146,7 @@ template 0); { - std::vector instantiated_sizes{SYCLFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}; + std::vector instantiated_sizes{PORTFFT_COOLEY_TUKEY_OPTIMIZED_SIZES}; if (!std::count(instantiated_sizes.cbegin(), instantiated_sizes.cend(), params.length)) { GTEST_SKIP(); } @@ -176,7 +176,7 @@ void check_fft_buffer(test_params& params, sycl::queue& queue) { auto committed_descriptor = potential_committed_descriptor.first.value(); auto verifSpec = get_matching_spec(verification_data, desc); - if constexpr (Dir == sycl_fft::direction::FORWARD) { + if constexpr (Dir == portfft::direction::FORWARD) { host_input = verifSpec.template load_data_time(desc); } else { host_input = verifSpec.template load_data_fourier(desc); diff --git a/test/unit_test/instantiate_fft_tests.hpp b/test/unit_test/instantiate_fft_tests.hpp index 536e36ad..620ca3ac 100644 --- a/test/unit_test/instantiate_fft_tests.hpp +++ b/test/unit_test/instantiate_fft_tests.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_UNIT_TEST_INSTANTIATE_FFT_TESTS_HPP -#define SYCL_FFT_UNIT_TEST_INSTANTIATE_FFT_TESTS_HPP +#ifndef PORTFFT_UNIT_TEST_INSTANTIATE_FFT_TESTS_HPP +#define PORTFFT_UNIT_TEST_INSTANTIATE_FFT_TESTS_HPP #include diff --git a/test/unit_test/transfers.cpp b/test/unit_test/transfers.cpp index aacf5161..9f809c55 100644 --- a/test/unit_test/transfers.cpp +++ b/test/unit_test/transfers.cpp @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ @@ -25,8 +25,8 @@ #include constexpr int N = 4; -constexpr int sg_size = (SYCLFFT_SUBGROUP_SIZES); // turn the list into the last value using commma operator -constexpr int wg_size = sg_size * SYCLFFT_SGS_IN_WG; +constexpr int sg_size = (PORTFFT_SUBGROUP_SIZES); // turn the list into the last value using commma operator +constexpr int wg_size = sg_size * PORTFFT_SGS_IN_WG; constexpr int N_sentinel_values = 64; using ftype = float; constexpr ftype sentinel_a = -999; @@ -34,10 +34,10 @@ constexpr ftype sentinel_b = -888; constexpr ftype sentinel_loc1 = -777; constexpr ftype sentinel_loc2 = -666; -template +template class test_transfers_kernel; -template +template void test() { std::vector a, b; a.resize(N * wg_size); @@ -62,7 +62,7 @@ void test() { q.fill(b_dev, sentinel_b, N * wg_size + 2 * N_sentinel_values); q.wait(); - std::size_t padded_local_size = sycl_fft::detail::pad_local(N * wg_size); + std::size_t padded_local_size = portfft::detail::pad_local(N * wg_size); q.submit([&](sycl::handler& h) { sycl::local_accessor loc1(padded_local_size + 2 * N_sentinel_values, h); @@ -80,12 +80,12 @@ void test() { } } group_barrier(it.get_group()); - sycl_fft::global2local(it, a_dev_work, loc1_work, N * wg_size); + portfft::global2local(it, a_dev_work, loc1_work, N * wg_size); group_barrier(it.get_group()); - sycl_fft::local2private(loc1_work, priv, local_id, N); - sycl_fft::private2local(priv, loc2_work, local_id, N); + portfft::local2private(loc1_work, priv, local_id, N); + portfft::private2local(priv, loc2_work, local_id, N); group_barrier(it.get_group()); - sycl_fft::local2global(it, loc2_work, b_dev_work, N * wg_size); + portfft::local2global(it, loc2_work, b_dev_work, N * wg_size); group_barrier(it.get_group()); if (local_id == 0) { for (std::size_t i = 0; i < N_sentinel_values; i++) { @@ -128,6 +128,6 @@ void test() { sycl::free(sentinels_loc2_dev, q); } -TEST(transfers, unpadded) { test(); } +TEST(transfers, unpadded) { test(); } -TEST(transfers, padded) { test(); } +TEST(transfers, padded) { test(); } diff --git a/test/unit_test/utils.hpp b/test/unit_test/utils.hpp index 31323d2d..032c8fce 100644 --- a/test/unit_test/utils.hpp +++ b/test/unit_test/utils.hpp @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * Codeplay's SYCL-FFT + * Codeplay's portFFT * **************************************************************************/ -#ifndef SYCL_FFT_UNIT_TEST_UTILS_HPP -#define SYCL_FFT_UNIT_TEST_UTILS_HPP +#ifndef PORTFFT_UNIT_TEST_UTILS_HPP +#define PORTFFT_UNIT_TEST_UTILS_HPP #include "common/subgroup.hpp" #include "common/transfers.hpp" @@ -36,7 +36,7 @@ #include using namespace std::complex_literals; -using namespace sycl_fft; +using namespace portfft; #define CHECK_QUEUE(queue) \ if (!queue.first) GTEST_SKIP() << queue.second;