diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 63e7acd1..d60fff60 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -130,9 +130,9 @@ jobs: -DCMAKE_C_COMPILER=${{ matrix.backend.c_compiler }} \ -DCMAKE_CXX_COMPILER=${{ matrix.backend.cxx_compiler }} \ -DCMAKE_CXX_STANDARD=17 \ - -DBUILD_TESTING=ON \ + -DKokkosFFT_ENABLE_TESTS=ON \ -DKokkosFFT_ENABLE_BENCHMARK=ON \ - -DKokkosFFT_INTERNAL_Kokkos=ON \ + -DKokkosFFT_ENABLE_INTERNAL_KOKKOS=ON \ ${{ matrix.backend.cmake_flags }} \ ${{ matrix.target.cmake_flags }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 02679701..73df4643 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,11 +13,12 @@ project( list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") # Options -option(BUILD_EXAMPLES "Build KokkosFFT examples" ON) option(KokkosFFT_ENABLE_HOST_AND_DEVICE "Enable FFT on both host and device" OFF) -option(KokkosFFT_INTERNAL_Kokkos "Build internal Kokkos instead of relying on external one" OFF) -option(KokkosFFT_ENABLE_DOCS "Build KokkosFFT documentaion/website" OFF) +option(KokkosFFT_ENABLE_INTERNAL_KOKKOS "Build internal Kokkos instead of relying on external one" OFF) +option(KokkosFFT_ENABLE_EXAMPLES "Build KokkosFFT examples" ON) +option(KokkosFFT_ENABLE_TESTS "Build KokkosFFT tests" OFF) option(KokkosFFT_ENABLE_BENCHMARK "Build benchmarks for KokkosFFT" OFF) +option(KokkosFFT_ENABLE_DOCS "Build KokkosFFT documentaion/website" OFF) # Version information set(KOKKOSFFT_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) @@ -26,7 +27,7 @@ set(KOKKOSFFT_VERSION_PATCH ${PROJECT_VERSION_PATCH}) set(KOKKOS_REQUIRED_VERSION "4.2.00") -if (NOT KokkosFFT_INTERNAL_Kokkos) +if (NOT KokkosFFT_ENABLE_INTERNAL_KOKKOS) # First check, Kokkos is added as subdirectory or not if(NOT TARGET Kokkos::kokkos) find_package(Kokkos REQUIRED) @@ -62,8 +63,8 @@ endif() message("") # Googletest -include(CTest) -if(BUILD_TESTING) +if(KokkosFFT_ENABLE_TESTS) + include(CTest) find_package(GTest CONFIG) if(NOT GTest_FOUND) add_subdirectory(tpls/googletest) @@ -114,7 +115,7 @@ set(KokkosFFT_EXPORT_TARGET "${PROJECT_NAME}-Targets") add_subdirectory(common) add_subdirectory(fft) -if(BUILD_EXAMPLES) +if(KokkosFFT_ENABLE_EXAMPLES) add_subdirectory(examples) endif() diff --git a/README.md b/README.md index 43a5fa4d..7bd7e798 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ UNOFFICIAL FFT interfaces for Kokkos C++ Performance Portability Programming EcoSystem -KokkosFFT implements local interfaces Kokkos and de facto standard FFT libraries, including [fftw](http://www.fftw.org), [cufft](https://developer.nvidia.com/cufft), [hipfft](https://github.com/ROCm/hipFFT), and [oneMKL](https://spec.oneapi.io/versions/latest/elements/oneMKL/source/index.html). "Local" means not using MPI, or running within a single MPI process without knowing about MPI. We are inclined to implement the [numpy.fft](https://numpy.org/doc/stable/reference/routines.fft.html)-like interfaces adapted for [Kokkos](https://github.com/kokkos/kokkos). -A key concept is that "As easy as numpy, as fast as vendor libraries". Accordingly, our API follows the API by [numpy.fft](https://numpy.org/doc/stable/reference/routines.fft.html) with minor differences. A fft library dedicated to Kokkos Device backend (e.g. [cufft](https://developer.nvidia.com/cufft) for CUDA backend) is automatically used. If something is wrong with runtime values (say `View` extents), it will raise runtime errors (C++ exceptions or assertions). See [documentations](https://kokkosfft.readthedocs.io/) for more information. +KokkosFFT implements local interfaces between [Kokkos](https://github.com/kokkos/kokkos) and de facto standard FFT libraries, including [fftw](http://www.fftw.org), [cufft](https://developer.nvidia.com/cufft), [hipfft](https://github.com/ROCm/hipFFT), and [oneMKL](https://spec.oneapi.io/versions/latest/elements/oneMKL/source/index.html). "Local" means not using MPI, or running within a single MPI process without knowing about MPI. We are inclined to implement the [numpy.fft](https://numpy.org/doc/stable/reference/routines.fft.html)-like interfaces adapted for [Kokkos](https://github.com/kokkos/kokkos). +A key concept is that **"As easy as numpy, as fast as vendor libraries"**. Accordingly, our API follows the API by [numpy.fft](https://numpy.org/doc/stable/reference/routines.fft.html) with minor differences. A fft library dedicated to Kokkos Device backend (e.g. [cufft](https://developer.nvidia.com/cufft) for CUDA backend) is automatically used. If something is wrong with runtime values (say `View` extents), it will raise runtime errors (C++ exceptions or assertions). See [documentations](https://kokkosfft.readthedocs.io/) for more information. Here is an example for 1D real to complex transform with `rfft` in python and KokkosFFT. ```python3 @@ -103,13 +103,10 @@ target_link_libraries(hello-kokkos-fft PUBLIC Kokkos::kokkos KokkosFFT::fft) For compilation, we basically rely on the CMake options for Kokkos. For example, the configure options for A100 GPU is as follows. ``` -cmake -DBUILD_TESTING=ON \ - -DCMAKE_CXX_COMPILER=/tpls/kokkos/bin/nvcc_wrapper \ +cmake -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_BUILD_TYPE=Release \ -DKokkos_ENABLE_CUDA=ON \ - -DKokkos_ENABLE_CUDA_CONSTEXPR=ON \ - -DKokkos_ARCH_AMPERE80=ON \ - -DKokkos_ENABLE_CUDA_LAMBDA=On .. + -DKokkos_ARCH_AMPERE80=ON .. ``` This way, all the functionalities are executed on A100 GPUs. @@ -121,8 +118,7 @@ export KOKKOSFFT_INSTALL_PREFIX=/kokkosFFT export KokkosFFT_DIR=/kokkosFFT/lib64/cmake/kokkos-fft mkdir build_KokkosFFT && cd build_KokkosFFT -cmake -DBUILD_TESTING=OFF \ - -DCMAKE_CXX_COMPILER=icpx \ +cmake -DCMAKE_CXX_COMPILER=icpx \ -DCMAKE_INSTALL_PREFIX=${KOKKOSFFT_INSTALL_PREFIX} .. cmake --build . -j 8 cmake --install . diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index faf5524b..52e4d48c 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -1,4 +1,4 @@ add_subdirectory(src) -if(BUILD_TESTING) +if(KokkosFFT_ENABLE_TESTS) add_subdirectory(unit_test) endif() diff --git a/common/unit_test/Test_Types.hpp b/common/unit_test/Test_Types.hpp index 0ebd33d4..6c03ac27 100644 --- a/common/unit_test/Test_Types.hpp +++ b/common/unit_test/Test_Types.hpp @@ -1,5 +1,5 @@ -#ifndef __TEST_TYPES_HPP__ -#define __TEST_TYPES_HPP__ +#ifndef TEST_TYPES_HPP +#define TEST_TYPES_HPP #include using execution_space = Kokkos::DefaultExecutionSpace; diff --git a/common/unit_test/Test_Utils.hpp b/common/unit_test/Test_Utils.hpp index e3ddeba4..1553fa44 100644 --- a/common/unit_test/Test_Utils.hpp +++ b/common/unit_test/Test_Utils.hpp @@ -1,5 +1,5 @@ -#ifndef __TEST_UTILS_HPP__ -#define __TEST_UTILS_HPP__ +#ifndef TEST_UTILS_HPP +#define TEST_UTILS_HPP #include "Test_Types.hpp" diff --git a/docs/finding_libraries.rst b/docs/finding_libraries.rst index c2c5be44..f7884559 100644 --- a/docs/finding_libraries.rst +++ b/docs/finding_libraries.rst @@ -8,8 +8,8 @@ Some tips to find FFT libraries for each backend. `fftw `_ ----------------------------- -If ``fftw`` is offered as a module, our cmake helper would likely find fftw. -Assuming fftw is installed in ````, it is expected that ```` would be found under ``LIBRARY_PATH``, ``LD_LIBRARY_PATH``, and ``PATH``. +If ``fftw`` is offered as a module, our cmake helper would likely find ``fftw``. +Assuming ``fftw`` is installed in ````, it is expected that ```` would be found under ``LIBRARY_PATH``, ``LD_LIBRARY_PATH``, and ``PATH``. It would look like .. code-block:: bash diff --git a/docs/index.rst b/docs/index.rst index 2d37664f..d0fcbbb6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,12 +9,12 @@ including `fftw `_, `hipfft `_, and `oneMKL `_. "Local" means not using MPI, or running within a single MPI process without knowing about MPI. We are inclined to implement the `numpy.fft `_-like interfaces adapted for Kokkos. -A key concept is that "As easy as numpy, as fast as vendor libraries". Accordingly, our API follows the API by numpy.fft with minor differences. -A fft library dedicated to Kokkos Device backend (e.g. cufft for CUDA backend) is automatically used. +A key concept is that *"As easy as numpy, as fast as vendor libraries"*. Accordingly, our API follows the API by ``numpy.fft`` with minor differences. +A FFT library dedicated to Kokkos Device backend (e.g. cufft for CUDA backend) is automatically used. KokkosFFT is open source and available on `GitHub `_. -Here is an example for 1D real to complex transform with rfft in python and KokkosFFT. +Here is an example for 1D real to complex transform with ``rfft`` in python and KokkosFFT. .. code-block:: python @@ -43,7 +43,7 @@ Here is an example for 1D real to complex transform with rfft in python and Kokk .. note:: - We assume that the backend FFT libraries are appropriately installed on the system. + It is assumed that backend FFT libraries are appropriately installed on the system. .. toctree:: diff --git a/docs/intro/building.rst b/docs/intro/building.rst index b9024dab..d963fc5a 100644 --- a/docs/intro/building.rst +++ b/docs/intro/building.rst @@ -29,8 +29,7 @@ Is is assumed that the Kokkos is installed under ``/kokkos`` with O export KOKKOSFFT_INSTALL_PREFIX=/kokkosFFT mkdir build_KokkosFFT && cd build_KokkosFFT - cmake -DBUILD_TESTING=OFF \ - -DCMAKE_CXX_COMPILER=icpx \ + cmake -DCMAKE_CXX_COMPILER=icpx \ -DCMAKE_PREFIX_PATH=/kokkos \ -DCMAKE_INSTALL_PREFIX=${KOKKOSFFT_INSTALL_PREFIX} .. cmake --build . -j 8 @@ -70,12 +69,12 @@ The code can be built as CMake options ------------- -We rely on CMake to build KokkosFFT, more specifically ``CMake3.22+``. Here are the lists of CMake option. +We rely on CMake to build KokkosFFT, more specifically ``CMake 3.22+``. Here are the lists of CMake option. For FFTs on Kokkos device only, we do not need to add extra compile options but for Kokkos ones. -In order to use KokkosFFT from both host and device, we need to add ``KokkosFFT_ENABLE_HOST_AND_DEVICE=ON``. +In order to use KokkosFFT from both host and device, it is necessary to add ``KokkosFFT_ENABLE_HOST_AND_DEVICE=ON``. This option may be useful, for example FFT is used for initialization at host. -However, to enable this option, we need a pre-installed ``fftw`` for FFT on host, so it is disabled in default. -(see :doc:`minimum working example<../samples/05_1DFFT_HOST_DEVICE>`) +However, to enable this option, we need a pre-installed ``fftw`` for FFT on host, so it is disabled in default +(see :doc:`minimum working example<../samples/05_1DFFT_HOST_DEVICE>`). .. list-table:: CMake options :widths: 25 25 50 diff --git a/docs/intro/quick_start.rst b/docs/intro/quick_start.rst index 73e34b6e..8fdffdbe 100644 --- a/docs/intro/quick_start.rst +++ b/docs/intro/quick_start.rst @@ -16,8 +16,8 @@ If CMake fails to find a backend FFT library, see :doc:`How to find fft librarie Requirements ------------ -KokkosFFT requieres Kokkos 4.2+ and dedicated compilers for CPUs or GPUs. -It employs CMake3.22+ for building. +KokkosFFT requieres ``Kokkos 4.2+`` and dedicated compilers for CPUs or GPUs. +It employs ``CMake 3.22+`` for building. Here are list of compilers we frequently use for testing. @@ -64,8 +64,7 @@ For compilation, we basically rely on the CMake options for Kokkos. For example, .. code-block:: bash mkdir build && cd build - cmake -DBUILD_TESTING=OFF \ - -DCMAKE_CXX_COMPILER=g++ \ + cmake -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_BUILD_TYPE=Release \ -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ARCH_AMPERE80=ON .. diff --git a/docs/intro/using.rst b/docs/intro/using.rst index a811a779..5d6300c5 100644 --- a/docs/intro/using.rst +++ b/docs/intro/using.rst @@ -11,14 +11,14 @@ Brief introduction Most of the numpy.fft APIs (``numpy.fft.``) are available in KokkosFFT (``KokkosFFT::``) on the Kokkos device. In fact, these are the only APIs available in KokkosFFT (see :doc:`API reference<../api_reference>` for detail). KokkosFFT support 1D to 3D FFT over choosen axes. -Inside FFT APIs, we first create a FFT plan for backend FFT library based on the Views and choosen axes. -Then, we execute the FFT using the created plan on the given Views. Finally, we destroy the plan. -Depending on the View Layout and choosen axes, we may need transpose operations to make data contiguous. +Inside FFT APIs, we first create a FFT plan for a backend FFT library based on the Views and choosen axes. +Then, we execute the FFT using the created plan on the given Views. Then, we may perform normalization based on the users' choice. +Finally, we destroy the plan. Depending on the View Layout and choosen axes, we may need transpose operations to make data contiguous. In that case, we perform the transpose operations internally which impose overheads in both memory and computations. .. note:: - ``KokkosFFT::Impl`` namespace is for implementation details and should not be accessed by users. + ``KokkosFFT::Impl`` namespace represents implementation details and should not be accessed by users. Basic Instruction ----------------- @@ -27,7 +27,7 @@ We have Standard and Real FFTs as APIs. Standard FFTs can be used for complex to Real FFTs perform real to complex transform. As well as ``numpy.fft``, numbers after ``fft`` represents the dimension of FFT. For example, ``KokkosFFT::fft2`` performs 2D (potentially batched) FFT in forward direction. If the rank of Views is higher than the dimension of FFT, a batched FFT plan is created. -APIs start from ``i`` are for inverse transform. +APIs start from ``i`` represent inverse transforms. For Real FFTs, users have to pay attention to the input and output data types as well as their extents. Inconsistent data types are suppressed by compilation errors. If extents are inconsistent, it will raise runtime errors (C++ exceptions or assertions). @@ -57,7 +57,7 @@ The following listing shows good and bad examples of Real FFTs. .. note:: - We have to use the same precision (either ``float`` or ``double``) for input and ouptut Views. + Input and ouptut views must have the same precision (either ``float`` or ``double``). Supported data types -------------------- @@ -65,7 +65,7 @@ Supported data types Firstly, the input and output Views must have the same LayoutType and rank. For the moment, we accept Kokkos Views with some restriction in data types and Layout. Here are the list of available types for Views. We recommend to use dynamic allocation for Views, -since we have not tested with static shaped Views. In addition, we have not tested with non-default `MemoryTraits`. +since we have not tested with static shaped Views. In addition, we have not tested with non-default `Memory Traits`_ * DataType: ``float``, ``double``, ``Kokkos::complex``, ``Kokkos::complex`` * LayoutType: ``Kokkos::LayoutLeft``, ``Kokkos::LayoutRight`` diff --git a/docs/samples/01_1DFFT.rst b/docs/samples/01_1DFFT.rst index d9de5e18..eb9020b8 100644 --- a/docs/samples/01_1DFFT.rst +++ b/docs/samples/01_1DFFT.rst @@ -1,10 +1,16 @@ .. _01_1DFFT: One dimensional FFT -------------------- +=================== + +numpy +----- .. literalinclude:: ../../examples/01_1DFFT/numpy_1DFFT.py :language: python +KokkosFFT +--------- + .. literalinclude:: ../../examples/01_1DFFT/01_1DFFT.cpp - :language: C++ + :language: C++ \ No newline at end of file diff --git a/docs/samples/02_2DFFT.rst b/docs/samples/02_2DFFT.rst index 663fa649..521eece8 100644 --- a/docs/samples/02_2DFFT.rst +++ b/docs/samples/02_2DFFT.rst @@ -1,10 +1,16 @@ .. _02_2DFFT: Two dimensional FFT -------------------- +=================== + +numpy +----- .. literalinclude:: ../../examples/02_2DFFT/numpy_2DFFT.py :language: python +KokkosFFT +--------- + .. literalinclude:: ../../examples/02_2DFFT/02_2DFFT.cpp :language: C++ diff --git a/docs/samples/03_NDFFT.rst b/docs/samples/03_NDFFT.rst index 1d819ab7..2fb9a0e9 100644 --- a/docs/samples/03_NDFFT.rst +++ b/docs/samples/03_NDFFT.rst @@ -1,10 +1,16 @@ .. _03_NDFFT: N-dimensional FFT ------------------ +================= + +numpy +----- .. literalinclude:: ../../examples/03_NDFFT/numpy_NDFFT.py :language: python +KokkosFFT +--------- + .. literalinclude:: ../../examples/03_NDFFT/03_NDFFT.cpp :language: C++ diff --git a/docs/samples/04_batchedFFT.rst b/docs/samples/04_batchedFFT.rst index 12dc9d64..88d73e1c 100644 --- a/docs/samples/04_batchedFFT.rst +++ b/docs/samples/04_batchedFFT.rst @@ -1,10 +1,16 @@ .. _04_batchedFFT: One-dimensional batched FFT ---------------------------- +=========================== + +numpy +----- .. literalinclude:: ../../examples/04_batchedFFT/numpy_batchedFFT.py :language: python +KokkosFFT +--------- + .. literalinclude:: ../../examples/04_batchedFFT/04_batchedFFT.cpp :language: C++ diff --git a/docs/samples/05_1DFFT_HOST_DEVICE.rst b/docs/samples/05_1DFFT_HOST_DEVICE.rst index 98658e97..0e26e6df 100644 --- a/docs/samples/05_1DFFT_HOST_DEVICE.rst +++ b/docs/samples/05_1DFFT_HOST_DEVICE.rst @@ -1,10 +1,16 @@ .. _05_1DFFT_HOST_DEVICE: FFT on host and device ----------------------- +====================== + +numpy +----- .. literalinclude:: ../../examples/05_1DFFT_HOST_DEVICE/numpy_1DFFT.py :language: python +KokkosFFT +--------- + .. literalinclude:: ../../examples/05_1DFFT_HOST_DEVICE/05_1DFFT_HOST_DEVICE.cpp :language: C++ diff --git a/docs/samples/06_1DFFT_reuse_plans.rst b/docs/samples/06_1DFFT_reuse_plans.rst index d561ecf7..0a3e9d73 100644 --- a/docs/samples/06_1DFFT_reuse_plans.rst +++ b/docs/samples/06_1DFFT_reuse_plans.rst @@ -1,10 +1,16 @@ .. _06_1DFFT_reuse_plans: -Reuse fft plan --------------- +Reuse FFT plan +============== + +numpy +----- .. literalinclude:: ../../examples/06_1DFFT_reuse_plans/numpy_1DFFT.py :language: python +KokkosFFT +--------- + .. literalinclude:: ../../examples/06_1DFFT_reuse_plans/06_1DFFT_reuse_plans.cpp :language: C++ diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt index e2b19192..99029cfc 100644 --- a/fft/CMakeLists.txt +++ b/fft/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(src) -if(BUILD_TESTING) +if(KokkosFFT_ENABLE_TESTS) add_subdirectory(unit_test) endif() diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 23f0642f..6c0593f8 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -10,13 +10,11 @@ #include "KokkosFFT_Plans.hpp" #if defined(KOKKOS_ENABLE_CUDA) -using default_device = Kokkos::Cuda; #include "KokkosFFT_Cuda_transform.hpp" #ifdef ENABLE_HOST_AND_DEVICE #include "KokkosFFT_OpenMP_transform.hpp" #endif #elif defined(KOKKOS_ENABLE_HIP) -using default_device = Kokkos::HIP; #include "KokkosFFT_HIP_transform.hpp" #ifdef ENABLE_HOST_AND_DEVICE #include "KokkosFFT_OpenMP_transform.hpp" @@ -27,13 +25,10 @@ using default_device = Kokkos::HIP; #include "KokkosFFT_OpenMP_transform.hpp" #endif #elif defined(KOKKOS_ENABLE_OPENMP) -using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_transform.hpp" #elif defined(KOKKOS_ENABLE_THREADS) -using default_device = Kokkos::Threads; #include "KokkosFFT_OpenMP_transform.hpp" #else -using default_device = Kokkos::Serial; #include "KokkosFFT_OpenMP_transform.hpp" #endif diff --git a/fft/unit_test/Test_Types.hpp b/fft/unit_test/Test_Types.hpp index b973593d..6eb277f4 100644 --- a/fft/unit_test/Test_Types.hpp +++ b/fft/unit_test/Test_Types.hpp @@ -1,5 +1,5 @@ -#ifndef __TEST_TYPES_HPP__ -#define __TEST_TYPES_HPP__ +#ifndef TEST_TYPES_HPP +#define TEST_TYPES_HPP #include using execution_space = Kokkos::DefaultExecutionSpace; diff --git a/fft/unit_test/Test_Utils.hpp b/fft/unit_test/Test_Utils.hpp index 3faeed5a..b7e6c016 100644 --- a/fft/unit_test/Test_Utils.hpp +++ b/fft/unit_test/Test_Utils.hpp @@ -1,5 +1,5 @@ -#ifndef __TEST_UTILS_HPP__ -#define __TEST_UTILS_HPP__ +#ifndef TEST_UTILS_HPP +#define TEST_UTILS_HPP #include #include