diff --git a/CMakeLists.txt b/CMakeLists.txt index 67805b8..519aeb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(dtFFT VERSION 0.2.0 HOMEPAGE_URL "https://github.com/ShatrovOA/dtFFT" LANGUAGES Fortran C CXX) -option(DTFFT_WITHOUT_FFTW "Build dtFFT without FFTW support" OFF) +option(DTFFT_WITH_FFTW "Build dtFFT with FFTW support" OFF) option(DTFFT_WITH_MKL "Build dtFFT with MKL DFTI support" OFF) option(DTFFT_WITH_CUFFT "Build dtFFT with cufft support" OFF) # option(DTFFT_WITH_KFR "Build dtFFT with KFR support" OFF) @@ -64,11 +64,11 @@ end program" HAVE_BLOCK_STATEMENT SRC_EXT .F90) -if( DTFFT_WITHOUT_FFTW +if( NOT DTFFT_WITH_FFTW AND NOT DTFFT_WITH_MKL # AND NOT DTFFT_WITH_KFR - AND NOT DTFFT_WITH_CUFFT - AND NOT DTFFT_WITH_VKFFT + # AND NOT DTFFT_WITH_CUFFT + # AND NOT DTFFT_WITH_VKFFT ) set ( DTFFT_TRANSPOSE_ONLY ON ) endif() diff --git a/README.md b/README.md index b246736..479f7df 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # dtFFT - DataTyped Fast Fourier Transform [![Status](https://img.shields.io/badge/status-stable-brightgreen.svg)]() -![Build Status](https://github.com/ShatrovOA/dtFFT/actions/workflows/main.yml/badge.svg) +[![dtfft workflow](https://github.com/ShatrovOA/dtFFT/actions/workflows/main.yml/badge.svg)](https://github.com/ShatrovOA/dtFFT/actions/workflows/main.yml) [![codecov](https://codecov.io/gh/ShatrovOA/dtFFT/graph/badge.svg?token=6BI4AQVH7Z)](https://codecov.io/gh/ShatrovOA/dtFFT) [![License](https://img.shields.io/github/license/ShatrovOA/dtFFT?color=brightgreen&logo=License)]() @@ -60,7 +60,7 @@ To build this library modern (2008+) Fortran compiler is required. This library | Option | Possible values | Default value | Description | | -------- | ------- | -------- | ------- | -| DTFFT_WITHOUT_FFTW | on / off | off | Build dtFFT without FFTW support. When `OFF` user need to set `FFTWDIR` environmental variable in order to find FFTW3. Both single and double precision versions are required | +| DTFFT_WITH_FFTW | on / off | off | Build dtFFT with FFTW support. When enabled user need to set `FFTWDIR` environmental variable in order to find FFTW3 located in custom directory. Both single and double precision versions of library are required | | DTFFT_WITH_MKL | on / off | off | Build dtFFT with MKL DFTI support | | DTFFT_BUILD_TESTS | on / off | off | Build tests | | DTFFT_ENABLE_COVERAGE | on / off | off | Build coverage of library. Only possible with gfortran | @@ -68,7 +68,7 @@ To build this library modern (2008+) Fortran compiler is required. This library | DTFFT_USE_MPI | on / off | on | Use Fortran `mpi` module instead of `mpi_f08` | | DTFFT_BUILD_C_CXX_API | on / off | on | Build C/C++ API | | DTFFT_ENABLE_PERSISTENT_COMM | on / off | off | In case you are planning to execute plan multiple times then it can be very beneficial to use persistent communications. But user must aware that such communications are created at first call to `execute` or `transpose` subroutines and pointers are saved internally inside MPI. All other plan executions will use those pointers. Take care not to free them. | -| DTFFT_WITH_CALIPER | on / off | off | Enable library profiler via Caliper. Additional parameter is required to find caliper: `Dcaliper_DIR` | +| DTFFT_WITH_CALIPER | on / off | off | Enable library profiler via Caliper. Additional parameter is required to find caliper: `caliper_DIR` | | DTFFT_MEASURE_ITERS | positive integer | 2 | Number of iterations to run in order to find best plan when passing `DTFFT_MEASURE` or `DTFFT_PATIENT` to effort_flag parameter during plan creation | | DTFFT_FORWARD_X_Y | 1 / 2 | 2 | Default id of transposition plan for X -> Y transpose which will be used if plan created with `DTFFT_ESTIMATE` and `DTFFT_MEASURE` effort_flags | | DTFFT_BACKWARD_X_Y | 1 / 2 | 2 | Default id of transposition plan for Y -> X transpose which will be used if plan created with `DTFFT_ESTIMATE` and `DTFFT_MEASURE` effort_flags | diff --git a/include/dtfft.h b/include/dtfft.h index 1f0eb59..b11e085 100644 --- a/include/dtfft.h +++ b/include/dtfft.h @@ -65,6 +65,7 @@ typedef struct dtfft_plan_t *dtfft_plan; #define DTFFT_ERROR_INVALID_R2R_KINDS CONF_DTFFT_ERROR_INVALID_R2R_KINDS #define DTFFT_ERROR_R2C_TRANSPOSE_PLAN CONF_DTFFT_ERROR_R2C_TRANSPOSE_PLAN #define DTFFT_ERROR_INPLACE_TRANSPOSE CONF_DTFFT_ERROR_INPLACE_TRANSPOSE +#define DTFFT_ERROR_INVALID_AUX CONF_DTFFT_ERROR_INVALID_AUX #define DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED CONF_DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED #define DTFFT_ERROR_CUFFTMP_2D_PLAN CONF_DTFFT_ERROR_CUFFTMP_2D_PLAN @@ -82,9 +83,9 @@ do { \ // dtFFT transpose_type flags -// Perform XYZ --> YXZ --> ZXY transposition +// Perform XYZ --> YXZ --> ZXY plan execution #define DTFFT_TRANSPOSE_OUT CONF_DTFFT_TRANSPOSE_OUT -// Perform ZXY --> YXZ --> XYZ transposition +// Perform ZXY --> YXZ --> XYZ plan execution #define DTFFT_TRANSPOSE_IN CONF_DTFFT_TRANSPOSE_IN // Flags for transpose only plans @@ -128,7 +129,7 @@ do { \ */ // Create transpose only plan, no executor needed #define DTFFT_EXECUTOR_NONE CONF_DTFFT_EXECUTOR_NONE -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW // Use FFTW3 #define DTFFT_EXECUTOR_FFTW3 CONF_DTFFT_EXECUTOR_FFTW3 #endif @@ -279,6 +280,8 @@ dtfft_execute(dtfft_plan plan, void *in, void *out, const int transpose_type, vo * - `DTFFT_TRANSPOSE_Y_TO_X` * - `DTFFT_TRANSPOSE_Y_TO_Z` (3d plan only) * - `DTFFT_TRANSPOSE_Z_TO_Y` (3d plan only) + * - `DTFFT_TRANSPOSE_X_TO_Z` (3d plan only) + * - `DTFFT_TRANSPOSE_Z_TO_X` (3d plan only) * * \return `DTFFT_SUCCESS` if plan was executed, error code otherwise */ @@ -309,11 +312,8 @@ dtfft_destroy(dtfft_plan *plan); * \param[out] out_starts Starts of local portion of data in 'fourier' space in reversed order * \param[out] out_counts Sizes of local portion of data in 'fourier' space in reversed order * \param[out] alloc_size Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers: - * * - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex) - * * - R2R plan: `alloc_size` * sizeof(double/float) - * * - R2C plan: `alloc_size` * sizeof(double/float) * \return `DTFFT_SUCCESS` if call was successfull, error code otherwise */ @@ -328,11 +328,8 @@ dtfft_get_local_sizes(dtfft_plan plan, int *in_starts, int *in_counts, int *out_ * * \param[in] plan Plan handle * \param[out] alloc_size Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers: - * * - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex) - * * - R2R plan: `alloc_size` * sizeof(double/float) - * * - R2C plan: `alloc_size` * sizeof(double/float) * \return `DTFFT_SUCCESS` if call was successfull, error code otherwise */ diff --git a/include/dtfft.hpp b/include/dtfft.hpp index 7790d6e..72086e7 100644 --- a/include/dtfft.hpp +++ b/include/dtfft.hpp @@ -119,8 +119,10 @@ namespace dtfft * \param[in] transpose_type Type of transpose: * - `DTFFT_TRANSPOSE_X_TO_Y` * - `DTFFT_TRANSPOSE_Y_TO_X` - * - `DTFFT_TRANSPOSE_Y_TO_Z` - * - `DTFFT_TRANSPOSE_Z_TO_Y` + * - `DTFFT_TRANSPOSE_Y_TO_Z` (3d plan only) + * - `DTFFT_TRANSPOSE_Z_TO_Y` (3d plan only) + * - `DTFFT_TRANSPOSE_X_TO_Z` (3d plan only) + * - `DTFFT_TRANSPOSE_Z_TO_X` (3d plan only) * * \return Status code of method execution */ @@ -139,8 +141,10 @@ namespace dtfft * \param[in] transpose_type Type of transpose: * - `DTFFT_TRANSPOSE_X_TO_Y` * - `DTFFT_TRANSPOSE_Y_TO_X` - * - `DTFFT_TRANSPOSE_Y_TO_Z` - * - `DTFFT_TRANSPOSE_Z_TO_Y` + * - `DTFFT_TRANSPOSE_Y_TO_Z` (3d plan only) + * - `DTFFT_TRANSPOSE_Z_TO_Y` (3d plan only) + * - `DTFFT_TRANSPOSE_X_TO_Z` (3d plan only) + * - `DTFFT_TRANSPOSE_Z_TO_X` (3d plan only) * * \return Status code of method execution */ @@ -153,11 +157,8 @@ namespace dtfft /** \brief Wrapper around `get_local_sizes` * * \param[out] alloc_size Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers: - * * - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex) - * * - R2R plan: `alloc_size` * sizeof(double/float) - * * - R2C plan: `alloc_size` * sizeof(double/float) * * \return Status code of method execution @@ -175,11 +176,8 @@ namespace dtfft * \param[out] out_starts Starts of local portion of data in 'fourier' space in reversed order * \param[out] out_counts Sizes of local portion of data in 'fourier' space in reversed order * \param[out] alloc_size Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers: - * * - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex) - * * - R2R plan: `alloc_size` * sizeof(double/float) - * * - R2C plan: `alloc_size` * sizeof(double/float) * * \return Status code of method execution @@ -197,11 +195,8 @@ namespace dtfft * \param[out] out_starts Starts of local portion of data in 'fourier' space in reversed order * \param[out] out_counts Sizes of local portion of data in 'fourier' space in reversed order * \param[out] alloc_size Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers: - * * - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex) - * * - R2R plan: `alloc_size` * sizeof(double/float) - * * - R2C plan: `alloc_size` * sizeof(double/float) * * \return Status code of method execution diff --git a/include/dtfft_config.h.in b/include/dtfft_config.h.in index 6547f0a..2a9a873 100644 --- a/include/dtfft_config.h.in +++ b/include/dtfft_config.h.in @@ -1,11 +1,11 @@ #ifndef DTFFT_CONFIG_H #define DTFFT_CONFIG_H -#cmakedefine DTFFT_WITHOUT_FFTW +#cmakedefine DTFFT_WITH_FFTW #cmakedefine DTFFT_WITH_MKL -#cmakedefine DTFFT_WITH_CUFFT -#cmakedefine DTFFT_WITH_KFR -// #cmakedefine DTFFT_WITH_VKFFT +/* #cmakedefine DTFFT_WITH_CUFFT */ +/* #cmakedefine DTFFT_WITH_KFR */ +/* #cmakedefine DTFFT_WITH_VKFFT */ #cmakedefine DTFFT_TRANSPOSE_ONLY @@ -40,9 +40,9 @@ #define CONF_DTFFT_EXECUTOR_NONE 0 #define CONF_DTFFT_EXECUTOR_FFTW3 +1 #define CONF_DTFFT_EXECUTOR_MKL +2 -#define CONF_DTFFT_EXECUTOR_CUFFT +3 +/* #define CONF_DTFFT_EXECUTOR_CUFFT +3 */ /* #define CONF_DTFFT_EXECUTOR_KFR +4 */ -#define CONF_DTFFT_EXECUTOR_VKFFT +5 +/* #define CONF_DTFFT_EXECUTOR_VKFFT +5 */ #define CONF_DTFFT_SUCCESS 0 #define CONF_DTFFT_ERROR_MPI_FINALIZED (-1) @@ -60,6 +60,7 @@ #define CONF_DTFFT_ERROR_INVALID_R2R_KINDS 12 #define CONF_DTFFT_ERROR_R2C_TRANSPOSE_PLAN 13 #define CONF_DTFFT_ERROR_INPLACE_TRANSPOSE 14 +#define CONF_DTFFT_ERROR_INVALID_AUX 15 #define CONF_DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED 101 #define CONF_DTFFT_ERROR_CUFFTMP_2D_PLAN 200 diff --git a/src/dtfft.F90 b/src/dtfft.F90 index 7553c05..b90240d 100644 --- a/src/dtfft.F90 +++ b/src/dtfft.F90 @@ -47,7 +47,7 @@ module dtfft ! 1d FFT External Executor types public :: DTFFT_EXECUTOR_NONE -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW public :: DTFFT_EXECUTOR_FFTW3 #endif #ifdef DTFFT_WITH_MKL diff --git a/src/dtfft_core_m.F90 b/src/dtfft_core_m.F90 index fcd0d92..ffc80b4 100644 --- a/src/dtfft_core_m.F90 +++ b/src/dtfft_core_m.F90 @@ -29,7 +29,7 @@ module dtfft_core_m use dtfft_precisions use dtfft_transpose_m use dtfft_abstract_executor_m -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW use dtfft_executor_fftw_m #endif #ifdef DTFFT_WITH_MKL @@ -232,17 +232,20 @@ subroutine transpose(self, in, out, transpose_type, error_code) ierr = DTFFT_SUCCESS if ( .not. self%is_created ) & ierr = DTFFT_ERROR_PLAN_NOT_CREATED + CHECK_ERROR_AND_RETURN if ( .not.any(transpose_type == VALID_TRANSPOSES) & .or. ( self%ndims == 2 .and. abs(transpose_type) > 1 ) & .or. abs(transpose_type) == 3 .and..not.self%is_z_slab) & ierr = DTFFT_ERROR_INVALID_TRANSPOSE_TYPE + CHECK_ERROR_AND_RETURN if ( is_same_ptr(LOC_FUN(in), LOC_FUN(out)) ) & ierr = DTFFT_ERROR_INPLACE_TRANSPOSE - if ( present( error_code ) ) error_code = ierr; if ( ierr /= DTFFT_SUCCESS ) return + CHECK_ERROR_AND_RETURN REGION_BEGIN("dtfft_transpose") call self%transpose_private(in, out, transpose_type) REGION_END("dtfft_transpose") + if ( present( error_code ) ) error_code = DTFFT_SUCCESS end subroutine transpose subroutine execute(self, in, out, transpose_type, aux, error_code) @@ -252,12 +255,12 @@ subroutine execute(self, in, out, transpose_type, aux, error_code) #ifdef DTFFT_WITH_CUDA , device & #endif - :: in(..) !< Incoming buffer of any rank and kind + , target :: in(..) !< Incoming buffer of any rank and kind type(*), intent(inout) & #ifdef DTFFT_WITH_CUDA , device & #endif - :: out(..) !< Resulting buffer of any rank and kind + , target :: out(..) !< Resulting buffer of any rank and kind integer(IP), intent(in) :: transpose_type !< Type of transposition. One of the: !< - `DTFFT_TRANSPOSE_OUT` !< - `DTFFT_TRANSPOSE_IN` @@ -267,28 +270,42 @@ subroutine execute(self, in, out, transpose_type, aux, error_code) #ifdef DTFFT_WITH_CUDA , device & #endif - :: aux(..) !< Optional auxiliary buffer. + , target :: aux(..) !< Optional auxiliary buffer. !< Size of buffer must be greater than value !< returned by `alloc_size` parameter of `get_local_sizes` subroutine integer(IP), optional, intent(out) :: error_code !< Optional error code returned to user integer(IP) :: ierr + logical :: inplace + inplace = is_same_ptr(LOC_FUN(in), LOC_FUN(out)) ierr = DTFFT_SUCCESS - if ( .not. self%is_created ) ierr = DTFFT_ERROR_PLAN_NOT_CREATED - if ( .not.any(transpose_type == VALID_FULL_TRANSPOSES) ) ierr = DTFFT_ERROR_INVALID_TRANSPOSE_TYPE - if ( present( error_code ) ) error_code = ierr; if ( ierr /= DTFFT_SUCCESS ) return + if ( .not. self%is_created ) & + ierr = DTFFT_ERROR_PLAN_NOT_CREATED + CHECK_ERROR_AND_RETURN + if ( .not.any(transpose_type == VALID_FULL_TRANSPOSES) ) & + ierr = DTFFT_ERROR_INVALID_TRANSPOSE_TYPE + CHECK_ERROR_AND_RETURN + if ( self%is_transpose_plan .and. self%ndims == 2 .and. inplace ) & + ierr = DTFFT_ERROR_INPLACE_TRANSPOSE + CHECK_ERROR_AND_RETURN + if ( present( aux ) ) then + if ( is_same_ptr(LOC_FUN(in), LOC_FUN(aux)) .or. is_same_ptr(LOC_FUN(out), LOC_FUN(aux)) ) & + ierr = DTFFT_ERROR_INVALID_AUX + CHECK_ERROR_AND_RETURN + endif REGION_BEGIN("dtfft_execute") call self%check_aux(aux=aux) if ( present( aux ) ) then - call self%execute_private( in, out, transpose_type, aux ) + call self%execute_private( in, out, transpose_type, aux, inplace ) else - call self%execute_private( in, out, transpose_type, self%aux ) + call self%execute_private( in, out, transpose_type, self%aux, inplace ) endif REGION_END("dtfft_execute") + if ( present( error_code ) ) error_code = DTFFT_SUCCESS end subroutine execute - subroutine execute_private(self, in, out, transpose_type, aux) + subroutine execute_private(self, in, out, transpose_type, aux, inplace) class(dtfft_core), intent(inout) :: self !< Abstract plan type(*), intent(inout) & #ifdef DTFFT_WITH_CUDA @@ -312,6 +329,8 @@ subroutine execute_private(self, in, out, transpose_type, aux) :: aux(..) !< Auxiliary buffer. !< Size of buffer must be greater than value !< returned by `alloc_size` parameter of `get_local_sizes` subroutine + logical, intent(in) :: inplace + if ( self%is_transpose_plan ) then select case ( self%ndims ) case (2) @@ -324,68 +343,69 @@ subroutine execute_private(self, in, out, transpose_type, aux) case (3) select case( transpose_type ) case ( DTFFT_TRANSPOSE_OUT ) - if ( self%is_z_slab ) then - call self%transpose_private(in, out, DTFFT_TRANSPOSE_X_TO_Z) - else + if ( inplace .or. .not. self%is_z_slab ) then call self%transpose_private(in, aux, DTFFT_TRANSPOSE_X_TO_Y) call self%transpose_private(aux, out, DTFFT_TRANSPOSE_Y_TO_Z) + return endif + call self%transpose_private(in, out, DTFFT_TRANSPOSE_X_TO_Z) case ( DTFFT_TRANSPOSE_IN ) - if ( self%is_z_slab ) then - call self%transpose_private(in, out, DTFFT_TRANSPOSE_Z_TO_X) - else + if ( inplace .or. .not. self%is_z_slab ) then call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Z_TO_Y) call self%transpose_private(aux, out, DTFFT_TRANSPOSE_Y_TO_X) + return endif + call self%transpose_private(in, out, DTFFT_TRANSPOSE_Z_TO_X) endselect endselect - else ! self%is_transpose_plan - select case ( transpose_type ) - case ( DTFFT_TRANSPOSE_OUT ) - ! 1d direct FFT X direction || 2d X-Y FFT - call self%fft(1)%fft%execute(in, aux, DTFFT_FORWARD) - if ( self%is_z_slab ) then - ! Transpose X -> Z - call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Z) - ! 1d direct FFT Z direction - call self%fft(3)%fft%execute(out, out, DTFFT_FORWARD) - return - endif - ! Transpose X -> Y - call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Y) - ! 1d FFT Y direction - call self%fft(self%fft_mapping(2))%fft%execute(out, out, DTFFT_FORWARD) - if ( self%ndims == 2 ) then - return - endif - ! Transpose Y -> Z - call self%transpose_private(out, aux, DTFFT_TRANSPOSE_Y_TO_Z) + return + endif ! self%is_transpose_plan + + select case ( transpose_type ) + case ( DTFFT_TRANSPOSE_OUT ) + ! 1d direct FFT X direction || 2d X-Y FFT + call self%fft(1)%fft%execute(in, aux, DTFFT_FORWARD) + if ( self%is_z_slab ) then + ! Transpose X -> Z + call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Z) ! 1d direct FFT Z direction - call self%fft(self%fft_mapping(3))%fft%execute(aux, out, DTFFT_FORWARD) - case ( DTFFT_TRANSPOSE_IN ) - if ( self%is_z_slab ) then - ! 1d inverse FFT Z direction - call self%fft(3)%fft%execute(in, in, DTFFT_BACKWARD) - ! Transpose Z -> X - call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Z_TO_X) - ! 2d inverse FFT X-Y direction - call self%fft(1)%fft%execute(aux, out, DTFFT_BACKWARD) - return - endif - if ( self%ndims == 3 ) then - ! 1d inverse FFT Z direction - call self%fft(self%fft_mapping(3))%fft%execute(in, aux, DTFFT_BACKWARD) - ! Transpose Z -> Y - call self%transpose_private(aux, in, DTFFT_TRANSPOSE_Z_TO_Y) - endif - ! 1d inverse FFT Y direction - call self%fft(self%fft_mapping(2))%fft%execute(in, in, DTFFT_BACKWARD) - ! Transpose Y -> X - call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Y_TO_X) - ! 1d inverse FFT X direction + call self%fft(3)%fft%execute(out, out, DTFFT_FORWARD) + return + endif + ! Transpose X -> Y + call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Y) + ! 1d FFT Y direction + call self%fft(self%fft_mapping(2))%fft%execute(out, out, DTFFT_FORWARD) + if ( self%ndims == 2 ) then + return + endif + ! Transpose Y -> Z + call self%transpose_private(out, aux, DTFFT_TRANSPOSE_Y_TO_Z) + ! 1d direct FFT Z direction + call self%fft(self%fft_mapping(3))%fft%execute(aux, out, DTFFT_FORWARD) + case ( DTFFT_TRANSPOSE_IN ) + if ( self%is_z_slab ) then + ! 1d inverse FFT Z direction + call self%fft(3)%fft%execute(in, in, DTFFT_BACKWARD) + ! Transpose Z -> X + call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Z_TO_X) + ! 2d inverse FFT X-Y direction call self%fft(1)%fft%execute(aux, out, DTFFT_BACKWARD) - endselect - endif + return + endif + if ( self%ndims == 3 ) then + ! 1d inverse FFT Z direction + call self%fft(self%fft_mapping(3))%fft%execute(in, aux, DTFFT_BACKWARD) + ! Transpose Z -> Y + call self%transpose_private(aux, in, DTFFT_TRANSPOSE_Z_TO_Y) + endif + ! 1d inverse FFT Y direction + call self%fft(self%fft_mapping(2))%fft%execute(in, in, DTFFT_BACKWARD) + ! Transpose Y -> X + call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Y_TO_X) + ! 1d inverse FFT X direction + call self%fft(1)%fft%execute(aux, out, DTFFT_BACKWARD) + endselect end subroutine execute_private subroutine destroy(self, error_code) @@ -473,7 +493,8 @@ subroutine get_local_sizes(self, in_starts, in_counts, out_starts, out_counts, a ierr = DTFFT_SUCCESS if ( .not. self%is_created ) ierr = DTFFT_ERROR_PLAN_NOT_CREATED - if ( present( error_code ) ) error_code = ierr; if ( ierr /= DTFFT_SUCCESS ) return + if ( present( error_code ) ) error_code = ierr + if ( ierr /= DTFFT_SUCCESS ) return select type ( self ) class is (dtfft_plan_r2c) @@ -829,8 +850,8 @@ subroutine test_grid_decomposition(self, base_comm, ny, nz, dims, transposed_dim endif decomps(1, latest_timer_id) = ny decomps(2, latest_timer_id) = nz - DEBUG(repeat("=", 50)) - DEBUG(" Average execution time: "//double_to_str(timers(latest_timer_id))) + ! DEBUG(repeat("=", 50)) + ! DEBUG(" Average execution time: "//double_to_str(timers(latest_timer_id))) latest_timer_id = latest_timer_id + 1 deallocate(a, b) @@ -1089,7 +1110,7 @@ subroutine alloc_fft_plans(self, kinds) self%fft_mapping(dim) = dim select case(self%executor_type) -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW case (DTFFT_EXECUTOR_FFTW3) if ( dim == 1 ) then DEBUG("Using FFTW3 executor") diff --git a/src/dtfft_helpers.c b/src/dtfft_helpers.c index ad8bc37..f6da460 100644 --- a/src/dtfft_helpers.c +++ b/src/dtfft_helpers.c @@ -1,5 +1,5 @@ #include -bool is_same_ptr(void *ptr1, void *ptr2) { +bool is_same_ptr(const void *ptr1, const void *ptr2) { return ptr1 == ptr2; } \ No newline at end of file diff --git a/src/dtfft_parameters.F90 b/src/dtfft_parameters.F90 index 34a30ff..2e5b2bb 100644 --- a/src/dtfft_parameters.F90 +++ b/src/dtfft_parameters.F90 @@ -53,7 +53,7 @@ module dtfft_parameters integer(IP), parameter, public :: DTFFT_EXECUTOR_NONE = CONF_DTFFT_EXECUTOR_NONE !< Do not setup any executor. If this type is provided, then execute method cannot be called. !< Use transpose method instead -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW integer(IP), parameter, public :: DTFFT_EXECUTOR_FFTW3 = CONF_DTFFT_EXECUTOR_FFTW3 !< FFTW3 executor #endif @@ -140,7 +140,7 @@ module dtfft_parameters integer(IP), parameter, public :: VALID_R2R_FFTS(*) = [DTFFT_DCT_1, DTFFT_DCT_2, DTFFT_DCT_3, DTFFT_DCT_4, DTFFT_DST_1, DTFFT_DST_2, DTFFT_DST_3, DTFFT_DST_4] integer(IP), parameter, public :: VALID_EXECUTORS(*) = [ & DTFFT_EXECUTOR_NONE & -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW ,DTFFT_EXECUTOR_FFTW3 & #endif #ifdef DTFFT_WITH_MKL @@ -173,6 +173,7 @@ module dtfft_parameters integer(IP), parameter, public :: DTFFT_ERROR_INVALID_R2R_KINDS = CONF_DTFFT_ERROR_INVALID_R2R_KINDS integer(IP), parameter, public :: DTFFT_ERROR_R2C_TRANSPOSE_PLAN = CONF_DTFFT_ERROR_R2C_TRANSPOSE_PLAN integer(IP), parameter, public :: DTFFT_ERROR_INPLACE_TRANSPOSE = CONF_DTFFT_ERROR_INPLACE_TRANSPOSE + integer(IP), parameter, public :: DTFFT_ERROR_INVALID_AUX = CONF_DTFFT_ERROR_INVALID_AUX integer(IP), parameter, public :: DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED = CONF_DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED integer(IP), parameter, public :: DTFFT_ERROR_CUFFTMP_2D_PLAN = CONF_DTFFT_ERROR_CUFFTMP_2D_PLAN @@ -227,6 +228,8 @@ pure function dtfft_get_error_string(error_code) result(error_string) allocate(error_string, source="Invalid values detected in `kinds` parameter") case ( DTFFT_ERROR_R2C_TRANSPOSE_PLAN ) allocate(error_string, source="Transpose plan is not supported in R2C, use R2R or C2C plan instead") + case ( DTFFT_ERROR_INVALID_AUX ) + allocate(error_string, source="Invalid `aux` buffer provided") case ( DTFFT_ERROR_INPLACE_TRANSPOSE ) allocate(error_string, source="Inplace transpose is not supported") case ( DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED ) diff --git a/src/dtfft_utils.F90 b/src/dtfft_utils.F90 index 5619d11..5430692 100644 --- a/src/dtfft_utils.F90 +++ b/src/dtfft_utils.F90 @@ -15,9 +15,9 @@ module dtfft_utils public :: dtfft_string_f2c, dtfft_astring_f2c public :: int_to_str, double_to_str public :: write_debug, dtfft_init -public :: suppress_unused public :: get_inverse_kind #ifdef DTFFT_WITH_CUDA +public :: suppress_unused public :: cufftGetErrorString public :: CUFFT_SUCCESS public :: dtfft_get_stream, dtfft_set_stream @@ -98,13 +98,6 @@ function double_to_str(n) result(string) allocate( string, source= trim(adjustl(temp))) end function double_to_str - ! Suppress warnings from linter - subroutine suppress_unused(x) - type(*) :: x(..) - integer :: i_size(1) - i_size = shape(x) - end subroutine suppress_unused - subroutine write_debug(msg) character(len=*), intent(in) :: msg integer(IP) :: comm_rank, ierr @@ -138,6 +131,13 @@ elemental integer(IP) function get_inverse_kind(r2r_kind) end function get_inverse_kind #ifdef DTFFT_WITH_CUDA + ! Suppress warnings from linter + subroutine suppress_unused(x) + type(*) :: x(..) + integer :: i_size(1) + i_size = shape(x) + end subroutine suppress_unused + integer(cuda_stream_kind) function dtfft_get_stream() integer :: ierr if (.not.is_stream_created) then diff --git a/src/interfaces/api/c/dtfft_api_c.c b/src/interfaces/api/c/dtfft_api_c.c index 3eb6d19..11cb0f5 100644 --- a/src/interfaces/api/c/dtfft_api_c.c +++ b/src/interfaces/api/c/dtfft_api_c.c @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/src/interfaces/fft/CMakeLists.txt b/src/interfaces/fft/CMakeLists.txt index 8ac3731..8b2c2f7 100644 --- a/src/interfaces/fft/CMakeLists.txt +++ b/src/interfaces/fft/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT DTFFT_WITHOUT_FFTW) +if(DTFFT_WITH_FFTW) add_subdirectory(fftw) endif() @@ -6,9 +6,9 @@ if(DTFFT_WITH_MKL) add_subdirectory(mkl) endif() -if(DTFFT_WITH_CUFFT) - add_subdirectory(cufft) -endif() +# if(DTFFT_WITH_CUFFT) +# add_subdirectory(cufft) +# endif() # if(DTFFT_WITH_KFR) # add_subdirectory(kfr) diff --git a/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90 b/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90 index fab7d0a..2599321 100644 --- a/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90 +++ b/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90 @@ -28,7 +28,7 @@ module dtfft_executor_mkl_m use dtfft_interface_mkl_native_m use dtfft_precisions, only: IP use dtfft_parameters, only: DTFFT_SUCCESS, DTFFT_FORWARD, DTFFT_BACKWARD, DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED, DTFFT_DOUBLE -use dtfft_utils, only: suppress_unused, int_to_str +use dtfft_utils, only: int_to_str #include "dtfft_mpi.h" implicit none private diff --git a/tests/c/test_c2c_2d_cxx.cpp b/tests/c/test_c2c_2d_cxx.cpp index f828fdd..70a9d21 100644 --- a/tests/c/test_c2c_2d_cxx.cpp +++ b/tests/c/test_c2c_2d_cxx.cpp @@ -46,7 +46,8 @@ int main(int argc, char *argv[]) } // Create plan const vector dims = {ny, nx}; - dtfft::PlanC2C plan(dims, MPI_COMM_WORLD, DTFFT_DOUBLE, DTFFT_MEASURE, DTFFT_EXECUTOR_NONE); + + dtfft::PlanC2C plan = dtfft::PlanC2C(dims, MPI_COMM_WORLD, DTFFT_DOUBLE, DTFFT_MEASURE, DTFFT_EXECUTOR_NONE); int local_size[2]; size_t alloc_size; @@ -67,7 +68,7 @@ int main(int argc, char *argv[]) check[i] = in[i]; } - plan.transpose(in, out, DTFFT_TRANSPOSE_X_TO_Y); + DTFFT_CALL( plan.transpose(in, out, DTFFT_TRANSPOSE_X_TO_Y) ) for ( auto & element: in) { element = complex(-1., -1.); diff --git a/tests/c/test_c2c_2d_float_c.c b/tests/c/test_c2c_2d_float_c.c index 55960d2..7d4df94 100644 --- a/tests/c/test_c2c_2d_float_c.c +++ b/tests/c/test_c2c_2d_float_c.c @@ -48,7 +48,7 @@ int main(int argc, char *argv[]) // Create plan int n[2] = {ny, nx}; -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW int executor_type = DTFFT_EXECUTOR_FFTW3; #else int executor_type = DTFFT_EXECUTOR_NONE; @@ -72,9 +72,9 @@ int main(int argc, char *argv[]) double tf = 0.0 - MPI_Wtime(); #ifdef DTFFT_TRANSPOSE_ONLY - dtfft_transpose(plan, in, out, DTFFT_TRANSPOSE_X_TO_Y); + DTFFT_CALL( dtfft_transpose(plan, in, out, DTFFT_TRANSPOSE_X_TO_Y) ) #else - dtfft_execute(plan, in, out, DTFFT_TRANSPOSE_OUT, NULL); + DTFFT_CALL( dtfft_execute(plan, in, out, DTFFT_TRANSPOSE_OUT, NULL) ) #endif tf += MPI_Wtime(); @@ -90,9 +90,9 @@ int main(int argc, char *argv[]) double tb = 0.0 - MPI_Wtime(); #ifdef DTFFT_TRANSPOSE_ONLY - dtfft_transpose(plan, out, in, DTFFT_TRANSPOSE_Y_TO_X); + DTFFT_CALL( dtfft_transpose(plan, out, in, DTFFT_TRANSPOSE_Y_TO_X) ) #else - dtfft_execute(plan, out, in, DTFFT_TRANSPOSE_IN, NULL); + DTFFT_CALL( dtfft_execute(plan, out, in, DTFFT_TRANSPOSE_IN, NULL) ) #endif tb += MPI_Wtime(); diff --git a/tests/c/test_c2c_3d_c.c b/tests/c/test_c2c_3d_c.c index 479b4f5..da2516c 100644 --- a/tests/c/test_c2c_3d_c.c +++ b/tests/c/test_c2c_3d_c.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) #ifdef DTFFT_WITH_MKL int executor_type = DTFFT_EXECUTOR_MKL; -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) int executor_type = DTFFT_EXECUTOR_FFTW3; #else int executor_type = DTFFT_EXECUTOR_NONE; diff --git a/tests/c/test_c2c_3d_float_cxx.cpp b/tests/c/test_c2c_3d_float_cxx.cpp index 1477cbe..99f268a 100644 --- a/tests/c/test_c2c_3d_float_cxx.cpp +++ b/tests/c/test_c2c_3d_float_cxx.cpp @@ -61,13 +61,13 @@ int main(int argc, char *argv[]) #ifdef DTFFT_WITH_VKFFT int executor_type = DTFFT_EXECUTOR_VKFFT; -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) int executor_type = DTFFT_EXECUTOR_FFTW3; #else int executor_type = DTFFT_EXECUTOR_NONE; #endif - dtfft::PlanC2C plan(dims, grid_comm, DTFFT_SINGLE, DTFFT_MEASURE, executor_type); + dtfft::PlanC2C plan(dims, grid_comm, DTFFT_SINGLE, DTFFT_PATIENT, executor_type); vector in_counts(3); plan.get_local_sizes(NULL, in_counts.data()); @@ -90,9 +90,7 @@ int main(int argc, char *argv[]) plan.execute(in, out, DTFFT_TRANSPOSE_OUT); tf += MPI_Wtime(); - for ( auto & element: in) { - element = complex(-1., -1.); - } + std::fill(in.begin(), in.end(), complex(-1., -1.)); #ifndef DTFFT_TRANSPOSE_ONLY float scaler = 1. / (float) (nx * ny * nz); for ( auto & element: out) { diff --git a/tests/c/test_r2c_2d_cxx.cpp b/tests/c/test_r2c_2d_cxx.cpp index e4afcf5..375d31d 100644 --- a/tests/c/test_r2c_2d_cxx.cpp +++ b/tests/c/test_r2c_2d_cxx.cpp @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) int executor_type = DTFFT_EXECUTOR_MKL; #elif defined(DTFFT_WITH_VKFFT) int executor_type = DTFFT_EXECUTOR_VKFFT; -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) int executor_type = DTFFT_EXECUTOR_FFTW3; #else if(comm_rank == 0) { diff --git a/tests/c/test_r2c_2d_float_c.c b/tests/c/test_r2c_2d_float_c.c index 6aab7dd..d2e5de4 100644 --- a/tests/c/test_r2c_2d_float_c.c +++ b/tests/c/test_r2c_2d_float_c.c @@ -52,7 +52,7 @@ int main(int argc, char *argv[]) int executor_type = DTFFT_EXECUTOR_MKL; #elif defined(DTFFT_WITH_VKFFT) int executor_type = DTFFT_EXECUTOR_VKFFT; -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) int executor_type = DTFFT_EXECUTOR_FFTW3; #else if(comm_rank == 0) { diff --git a/tests/c/test_r2c_3d_c.c b/tests/c/test_r2c_3d_c.c index eb4415d..fe86e25 100644 --- a/tests/c/test_r2c_3d_c.c +++ b/tests/c/test_r2c_3d_c.c @@ -52,7 +52,7 @@ int main(int argc, char *argv[]) int executor_type = DTFFT_EXECUTOR_MKL; #elif defined(DTFFT_WITH_VKFFT) int executor_type = DTFFT_EXECUTOR_VKFFT; -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) int executor_type = DTFFT_EXECUTOR_FFTW3; #else if(comm_rank == 0) { diff --git a/tests/c/test_r2c_3d_float_cxx.cpp b/tests/c/test_r2c_3d_float_cxx.cpp index 20aaf50..0cbee12 100644 --- a/tests/c/test_r2c_3d_float_cxx.cpp +++ b/tests/c/test_r2c_3d_float_cxx.cpp @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) int executor_type = DTFFT_EXECUTOR_MKL; #elif defined(DTFFT_WITH_VKFFT) int executor_type = DTFFT_EXECUTOR_VKFFT; -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) int executor_type = DTFFT_EXECUTOR_FFTW3; #else if(comm_rank == 0) { diff --git a/tests/c/test_r2r_2d_cxx.cpp b/tests/c/test_r2r_2d_cxx.cpp index e54bf9f..77646b4 100644 --- a/tests/c/test_r2r_2d_cxx.cpp +++ b/tests/c/test_r2r_2d_cxx.cpp @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) cout << "----------------------------------------" << endl; } -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW int executor_type = DTFFT_EXECUTOR_FFTW3; #elif defined(DTFFT_WITH_VKFFT) int executor_type = DTFFT_EXECUTOR_VKFFT; diff --git a/tests/c/test_r2r_2d_float_c.c b/tests/c/test_r2r_2d_float_c.c index ee118ab..099481b 100644 --- a/tests/c/test_r2r_2d_float_c.c +++ b/tests/c/test_r2r_2d_float_c.c @@ -47,7 +47,7 @@ int main(int argc, char *argv[]) { printf("----------------------------------------\n"); } -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW int executor_type = DTFFT_EXECUTOR_FFTW3; #else int executor_type = DTFFT_EXECUTOR_NONE; diff --git a/tests/c/test_r2r_3d_c.c b/tests/c/test_r2r_3d_c.c index 4600060..9685cba 100644 --- a/tests/c/test_r2r_3d_c.c +++ b/tests/c/test_r2r_3d_c.c @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) printf("----------------------------------------\n"); } -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW int executor_type = DTFFT_EXECUTOR_FFTW3; #else int executor_type = DTFFT_EXECUTOR_NONE; diff --git a/tests/c/test_r2r_3d_float_cxx.cpp b/tests/c/test_r2r_3d_float_cxx.cpp index 0fbb748..9c1b465 100644 --- a/tests/c/test_r2r_3d_float_cxx.cpp +++ b/tests/c/test_r2r_3d_float_cxx.cpp @@ -49,7 +49,7 @@ int main(int argc, char *argv[]) cout << "----------------------------------------" << endl; } -#ifndef DTFFT_WITHOUT_FFTW +#ifdef DTFFT_WITH_FFTW int executor_type = DTFFT_EXECUTOR_FFTW3; #elif defined(DTFFT_WITH_VKFFT) int executor_type = DTFFT_EXECUTOR_VKFFT; diff --git a/tests/fortran/test_c2c_2d_f.F90 b/tests/fortran/test_c2c_2d_f.F90 index 703ed0d..2980a24 100644 --- a/tests/fortran/test_c2c_2d_f.F90 +++ b/tests/fortran/test_c2c_2d_f.F90 @@ -49,7 +49,7 @@ program test_c2c_2d executor_type = DTFFT_EXECUTOR_MKL ! #elif defined(DTFFT_WITH_KFR) ! executor_type = DTFFT_EXECUTOR_KFR -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #endif diff --git a/tests/fortran/test_c2c_2d_float_f.F90 b/tests/fortran/test_c2c_2d_float_f.F90 index f8aba6c..2e1cff7 100644 --- a/tests/fortran/test_c2c_2d_float_f.F90 +++ b/tests/fortran/test_c2c_2d_float_f.F90 @@ -46,7 +46,7 @@ program test_c2c_2d_float write(output_unit, '(a)') "----------------------------------------" endif -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #elif defined(DTFFT_WITH_MKL) executor_type = DTFFT_EXECUTOR_MKL diff --git a/tests/fortran/test_c2c_3d_f.F90 b/tests/fortran/test_c2c_3d_f.F90 index 686f9cd..03dd7e3 100644 --- a/tests/fortran/test_c2c_3d_f.F90 +++ b/tests/fortran/test_c2c_3d_f.F90 @@ -47,7 +47,7 @@ program test_c2c_3d #if defined(DTFFT_WITH_MKL) executor_type = DTFFT_EXECUTOR_MKL -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 ! #elif defined(DTFFT_WITH_KFR) ! executor_type = DTFFT_EXECUTOR_KFR diff --git a/tests/fortran/test_c2c_3d_float_f.F90 b/tests/fortran/test_c2c_3d_float_f.F90 index ac9ec52..f87b4e2 100644 --- a/tests/fortran/test_c2c_3d_float_f.F90 +++ b/tests/fortran/test_c2c_3d_float_f.F90 @@ -48,7 +48,7 @@ program test_c2c_3d_float ! #ifdef DTFFT_WITH_KFR ! executor_type = DTFFT_EXECUTOR_KFR -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #else executor_type = DTFFT_EXECUTOR_NONE diff --git a/tests/fortran/test_r2c_2d_f.F90 b/tests/fortran/test_r2c_2d_f.F90 index 0e2ef8a..64e552d 100644 --- a/tests/fortran/test_r2c_2d_f.F90 +++ b/tests/fortran/test_r2c_2d_f.F90 @@ -53,7 +53,7 @@ program test_r2c_2d stop #endif -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #elif defined(DTFFT_WITH_MKL) executor_type = DTFFT_EXECUTOR_MKL diff --git a/tests/fortran/test_r2c_2d_float_f.F90 b/tests/fortran/test_r2c_2d_float_f.F90 index 39d096d..32dc002 100644 --- a/tests/fortran/test_r2c_2d_float_f.F90 +++ b/tests/fortran/test_r2c_2d_float_f.F90 @@ -54,7 +54,7 @@ program test_r2c_2d_float stop #endif -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #elif defined(DTFFT_WITH_MKL) executor_type = DTFFT_EXECUTOR_MKL diff --git a/tests/fortran/test_r2c_3d_f.F90 b/tests/fortran/test_r2c_3d_f.F90 index 730a836..5830502 100644 --- a/tests/fortran/test_r2c_3d_f.F90 +++ b/tests/fortran/test_r2c_3d_f.F90 @@ -58,7 +58,7 @@ program test_r2c_3d executor_type = DTFFT_EXECUTOR_MKL ! #elif defined(DTFFT_WITH_KFR) ! executor_type = DTFFT_EXECUTOR_KFR -#elif !defined(DTFFT_WITHOUT_FFTW) +#elif defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #endif diff --git a/tests/fortran/test_r2c_3d_float_f.F90 b/tests/fortran/test_r2c_3d_float_f.F90 index b353820..88b831f 100644 --- a/tests/fortran/test_r2c_3d_float_f.F90 +++ b/tests/fortran/test_r2c_3d_float_f.F90 @@ -55,7 +55,7 @@ program test_r2c_3d_float ! #if defined(DTFFT_WITH_KFR) ! executor_type = DTFFT_EXECUTOR_KFR -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #elif defined(DTFFT_WITH_MKL) executor_type = DTFFT_EXECUTOR_MKL diff --git a/tests/fortran/test_r2r_2d_f.F90 b/tests/fortran/test_r2r_2d_f.F90 index 97e7510..5cb5633 100644 --- a/tests/fortran/test_r2r_2d_f.F90 +++ b/tests/fortran/test_r2r_2d_f.F90 @@ -48,7 +48,7 @@ program test_r2r_2d ! #ifdef DTFFT_WITH_KFR ! executor_type = DTFFT_EXECUTOR_KFR ! scaler = 4._R8P / real(nx * ny, R8P) -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 scaler = 1._R8P / real(4 * (nx - 1) * (ny - 1), R8P) #else diff --git a/tests/fortran/test_r2r_2d_float_f.F90 b/tests/fortran/test_r2r_2d_float_f.F90 index f42512a..7d5f019 100644 --- a/tests/fortran/test_r2r_2d_float_f.F90 +++ b/tests/fortran/test_r2r_2d_float_f.F90 @@ -18,7 +18,7 @@ !------------------------------------------------------------------------------------------------ #include "dtfft_config.h" program test_r2r_2d_float -use iso_fortran_env, only: R8P => real64, R4P => real32, I4P => int32, output_unit, error_unit +use iso_fortran_env, only: R8P => real64, R4P => real32, IP => int32, I4P => int32, output_unit, error_unit use dtfft #include "dtfft_mpi.h" implicit none @@ -29,6 +29,7 @@ program test_r2r_2d_float type(dtfft_plan_r2r) :: plan integer(I4P) :: in_starts(2), in_counts(2), out_starts(2), out_counts(2) real(R8P) :: tf, tb, t_sum + TYPE_MPI_COMM :: comm_1d call MPI_Init(ierr) call MPI_Comm_size(MPI_COMM_WORLD, comm_size, ierr) @@ -44,13 +45,15 @@ program test_r2r_2d_float endif ! #ifdef DTFFT_WITH_KFR ! executor_type = DTFFT_EXECUTOR_KFR -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #else executor_type = DTFFT_EXECUTOR_NONE #endif - call plan%create([nx, ny], [DTFFT_DST_2, DTFFT_DST_3], precision=DTFFT_SINGLE, executor_type=executor_type) + call MPI_Cart_create(MPI_COMM_WORLD, 1, [comm_size], [.false.], .true., comm_1d, ierr) + + call plan%create([nx, ny], [DTFFT_DST_2, DTFFT_DST_3], comm=comm_1d, precision=DTFFT_SINGLE, executor_type=executor_type) call plan%get_local_sizes(in_starts, in_counts, out_starts, out_counts) allocate(in(in_starts(1):in_starts(1) + in_counts(1) - 1, & diff --git a/tests/fortran/test_r2r_3d_f.F90 b/tests/fortran/test_r2r_3d_f.F90 index 21bc5fb..4be7383 100644 --- a/tests/fortran/test_r2r_3d_f.F90 +++ b/tests/fortran/test_r2r_3d_f.F90 @@ -30,10 +30,9 @@ program test_r2r_3d real(R8P) :: local_error, global_error, rnd integer(I4P), parameter :: nx = 512, ny = 32, nz = 8 integer(I4P) :: comm_size, comm_rank, i, j, k, out_size - type(dtfft_plan_r2r) :: plan + class(dtfft_core), allocatable :: plan integer(I4P) :: in_starts(3), in_counts(3), out_counts(3), ierr, executor_type real(R8P) :: tf, tb, t_sum - TYPE_MPI_COMM :: comm_1d integer(I8P) :: alloc_size call MPI_Init(ierr) @@ -48,17 +47,19 @@ program test_r2r_3d write(output_unit, '(a, i0)') 'Number of processors: ', comm_size endif - call MPI_Cart_create(MPI_COMM_WORLD, 1, [comm_size], [.false.], .true., comm_1d, ierr) - ! #ifdef DTFFT_WITH_KFR ! executor_type = DTFFT_EXECUTOR_KFR -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 #else executor_type = DTFFT_EXECUTOR_NONE #endif - call plan%create([nx, ny, nz], [DTFFT_DCT_2, DTFFT_DCT_2, DTFFT_DCT_2], comm=comm_1d, effort_flag=DTFFT_PATIENT, executor_type=executor_type) + allocate( dtfft_plan_r2r :: plan ) + select type (plan) + class is ( dtfft_plan_r2r ) + call plan%create([nx, ny, nz], [DTFFT_DCT_2, DTFFT_DCT_2, DTFFT_DST_4], effort_flag=DTFFT_MEASURE, executor_type=executor_type) + endselect call plan%get_local_sizes(in_starts, in_counts, out_counts=out_counts, alloc_size=alloc_size) @@ -119,5 +120,6 @@ program test_r2r_3d deallocate(in, out, check) call plan%destroy() + deallocate( plan ) call MPI_Finalize(ierr) end program test_r2r_3d \ No newline at end of file diff --git a/tests/fortran/test_r2r_3d_float_f.F90 b/tests/fortran/test_r2r_3d_float_f.F90 index 8f549ef..1095953 100644 --- a/tests/fortran/test_r2r_3d_float_f.F90 +++ b/tests/fortran/test_r2r_3d_float_f.F90 @@ -46,7 +46,7 @@ program test_r2r_3d_float ! #ifdef DTFFT_WITH_KFR ! executor_type = DTFFT_EXECUTOR_KFR ! scaler = 8._R4P / real(nx * ny * nz, R4P) -#if !defined(DTFFT_WITHOUT_FFTW) +#if defined (DTFFT_WITH_FFTW) executor_type = DTFFT_EXECUTOR_FFTW3 scaler = 1._R4P / real(8 * (nx - 1) * ny * nz, R4P) #else