diff --git a/CMakeLists.txt b/CMakeLists.txt
index 67805b8..519aeb0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(dtFFT VERSION 0.2.0
               HOMEPAGE_URL "https://github.com/ShatrovOA/dtFFT"
               LANGUAGES Fortran C CXX)
 
-option(DTFFT_WITHOUT_FFTW "Build dtFFT without FFTW support" OFF)
+option(DTFFT_WITH_FFTW "Build dtFFT with FFTW support" OFF)
 option(DTFFT_WITH_MKL "Build dtFFT with MKL DFTI support" OFF)
 option(DTFFT_WITH_CUFFT "Build dtFFT with cufft support" OFF)
 # option(DTFFT_WITH_KFR "Build dtFFT with KFR support" OFF)
@@ -64,11 +64,11 @@ end program"
 HAVE_BLOCK_STATEMENT
 SRC_EXT .F90)
 
-if( DTFFT_WITHOUT_FFTW
+if( NOT DTFFT_WITH_FFTW
     AND NOT DTFFT_WITH_MKL
     # AND NOT DTFFT_WITH_KFR
-    AND NOT DTFFT_WITH_CUFFT
-    AND NOT DTFFT_WITH_VKFFT
+    # AND NOT DTFFT_WITH_CUFFT
+    # AND NOT DTFFT_WITH_VKFFT
 )
   set ( DTFFT_TRANSPOSE_ONLY ON )
 endif()
diff --git a/README.md b/README.md
index b246736..479f7df 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # dtFFT -  DataTyped Fast Fourier Transform
 
 [![Status](https://img.shields.io/badge/status-stable-brightgreen.svg)]()
-![Build Status](https://github.com/ShatrovOA/dtFFT/actions/workflows/main.yml/badge.svg)
+[![dtfft workflow](https://github.com/ShatrovOA/dtFFT/actions/workflows/main.yml/badge.svg)](https://github.com/ShatrovOA/dtFFT/actions/workflows/main.yml)
 [![codecov](https://codecov.io/gh/ShatrovOA/dtFFT/graph/badge.svg?token=6BI4AQVH7Z)](https://codecov.io/gh/ShatrovOA/dtFFT)
 [![License](https://img.shields.io/github/license/ShatrovOA/dtFFT?color=brightgreen&logo=License)]()
 
@@ -60,7 +60,7 @@ To build this library modern (2008+) Fortran compiler is required. This library
 
 | Option   | Possible values | Default value | Description |
 | -------- | ------- | -------- | ------- |
-| DTFFT_WITHOUT_FFTW | on / off | off | Build dtFFT without FFTW support. When `OFF` user need to set `FFTWDIR` environmental variable in order to find FFTW3. Both single and double precision versions are required |
+| DTFFT_WITH_FFTW | on / off | off | Build dtFFT with FFTW support. When enabled user need to set `FFTWDIR` environmental variable in order to find FFTW3 located in custom directory. Both single and double precision versions of library are required |
 | DTFFT_WITH_MKL | on / off | off | Build dtFFT with MKL DFTI support |
 | DTFFT_BUILD_TESTS | on / off | off | Build tests |
 | DTFFT_ENABLE_COVERAGE | on / off | off | Build coverage of library. Only possible with gfortran |
@@ -68,7 +68,7 @@ To build this library modern (2008+) Fortran compiler is required. This library
 | DTFFT_USE_MPI | on / off | on | Use Fortran `mpi` module instead of `mpi_f08` |
 | DTFFT_BUILD_C_CXX_API | on / off | on | Build C/C++ API |
 | DTFFT_ENABLE_PERSISTENT_COMM | on / off | off | In case you are planning to execute plan multiple times then it can be very beneficial to use persistent communications. But user must aware that such communications are created at first call to `execute` or `transpose` subroutines and pointers are saved internally inside MPI. All other plan executions will use those pointers. Take care not to free them. |
-| DTFFT_WITH_CALIPER | on / off | off | Enable library profiler via Caliper. Additional parameter is required to find caliper: `Dcaliper_DIR` |
+| DTFFT_WITH_CALIPER | on / off | off | Enable library profiler via Caliper. Additional parameter is required to find caliper: `caliper_DIR` |
 | DTFFT_MEASURE_ITERS | positive integer | 2 | Number of iterations to run in order to find best plan when passing `DTFFT_MEASURE` or `DTFFT_PATIENT` to effort_flag parameter during plan creation |
 | DTFFT_FORWARD_X_Y | 1 / 2 | 2 | Default id of transposition plan for X -> Y transpose which will be used if plan created with `DTFFT_ESTIMATE` and `DTFFT_MEASURE` effort_flags |
 | DTFFT_BACKWARD_X_Y | 1 / 2 | 2 | Default id of transposition plan for Y -> X transpose which will be used if plan created with `DTFFT_ESTIMATE` and `DTFFT_MEASURE` effort_flags |
diff --git a/include/dtfft.h b/include/dtfft.h
index 1f0eb59..b11e085 100644
--- a/include/dtfft.h
+++ b/include/dtfft.h
@@ -65,6 +65,7 @@ typedef struct dtfft_plan_t *dtfft_plan;
 #define DTFFT_ERROR_INVALID_R2R_KINDS CONF_DTFFT_ERROR_INVALID_R2R_KINDS
 #define DTFFT_ERROR_R2C_TRANSPOSE_PLAN CONF_DTFFT_ERROR_R2C_TRANSPOSE_PLAN
 #define DTFFT_ERROR_INPLACE_TRANSPOSE CONF_DTFFT_ERROR_INPLACE_TRANSPOSE
+#define DTFFT_ERROR_INVALID_AUX CONF_DTFFT_ERROR_INVALID_AUX
 #define DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED CONF_DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED
 #define DTFFT_ERROR_CUFFTMP_2D_PLAN CONF_DTFFT_ERROR_CUFFTMP_2D_PLAN
 
@@ -82,9 +83,9 @@ do {                                                                          \
 
 // dtFFT transpose_type flags
 
-// Perform XYZ --> YXZ --> ZXY transposition
+// Perform XYZ --> YXZ --> ZXY plan execution
 #define DTFFT_TRANSPOSE_OUT CONF_DTFFT_TRANSPOSE_OUT
-// Perform ZXY --> YXZ --> XYZ transposition
+// Perform ZXY --> YXZ --> XYZ plan execution
 #define DTFFT_TRANSPOSE_IN CONF_DTFFT_TRANSPOSE_IN
 
 // Flags for transpose only plans
@@ -128,7 +129,7 @@ do {                                                                          \
 */
 // Create transpose only plan, no executor needed
 #define DTFFT_EXECUTOR_NONE CONF_DTFFT_EXECUTOR_NONE
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
 // Use FFTW3
 #define DTFFT_EXECUTOR_FFTW3 CONF_DTFFT_EXECUTOR_FFTW3
 #endif
@@ -279,6 +280,8 @@ dtfft_execute(dtfft_plan plan, void *in, void *out, const int transpose_type, vo
   *                                   - `DTFFT_TRANSPOSE_Y_TO_X`
   *                                   - `DTFFT_TRANSPOSE_Y_TO_Z` (3d plan only)
   *                                   - `DTFFT_TRANSPOSE_Z_TO_Y` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_X_TO_Z` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_Z_TO_X` (3d plan only)
   *
   * \return `DTFFT_SUCCESS` if plan was executed, error code otherwise
 */
@@ -309,11 +312,8 @@ dtfft_destroy(dtfft_plan *plan);
   * \param[out]     out_starts      Starts of local portion of data in 'fourier' space in reversed order
   * \param[out]     out_counts      Sizes  of local portion of data in 'fourier' space in reversed order
   * \param[out]     alloc_size      Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers:
-  *
   *                                   - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex)
-  *
   *                                   - R2R plan: `alloc_size` * sizeof(double/float)
-  *
   *                                   - R2C plan: `alloc_size` * sizeof(double/float)
   * \return `DTFFT_SUCCESS` if call was successfull, error code otherwise
 */
@@ -328,11 +328,8 @@ dtfft_get_local_sizes(dtfft_plan plan, int *in_starts, int *in_counts, int *out_
   *
   * \param[in]      plan            Plan handle
   * \param[out]     alloc_size      Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers:
-  *
   *                                   - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex)
-  *
   *                                   - R2R plan: `alloc_size` * sizeof(double/float)
-  *
   *                                   - R2C plan: `alloc_size` * sizeof(double/float)
   * \return `DTFFT_SUCCESS` if call was successfull, error code otherwise
 */
diff --git a/include/dtfft.hpp b/include/dtfft.hpp
index 7790d6e..72086e7 100644
--- a/include/dtfft.hpp
+++ b/include/dtfft.hpp
@@ -119,8 +119,10 @@ namespace dtfft
   * \param[in]      transpose_type  Type of transpose:
   *                                   - `DTFFT_TRANSPOSE_X_TO_Y`
   *                                   - `DTFFT_TRANSPOSE_Y_TO_X`
-  *                                   - `DTFFT_TRANSPOSE_Y_TO_Z`
-  *                                   - `DTFFT_TRANSPOSE_Z_TO_Y`
+  *                                   - `DTFFT_TRANSPOSE_Y_TO_Z` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_Z_TO_Y` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_X_TO_Z` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_Z_TO_X` (3d plan only)
   *
   * \return Status code of method execution
 */
@@ -139,8 +141,10 @@ namespace dtfft
   * \param[in]      transpose_type  Type of transpose:
   *                                   - `DTFFT_TRANSPOSE_X_TO_Y`
   *                                   - `DTFFT_TRANSPOSE_Y_TO_X`
-  *                                   - `DTFFT_TRANSPOSE_Y_TO_Z`
-  *                                   - `DTFFT_TRANSPOSE_Z_TO_Y`
+  *                                   - `DTFFT_TRANSPOSE_Y_TO_Z` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_Z_TO_Y` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_X_TO_Z` (3d plan only)
+  *                                   - `DTFFT_TRANSPOSE_Z_TO_X` (3d plan only)
   *
   * \return Status code of method execution
 */
@@ -153,11 +157,8 @@ namespace dtfft
 /** \brief Wrapper around `get_local_sizes`
   *
   * \param[out]     alloc_size      Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers:
-  *
   *                                   - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex)
-  *
   *                                   - R2R plan: `alloc_size` * sizeof(double/float)
-  *
   *                                   - R2C plan: `alloc_size` * sizeof(double/float)
   *
   * \return Status code of method execution
@@ -175,11 +176,8 @@ namespace dtfft
   * \param[out]   out_starts            Starts of local portion of data in 'fourier' space in reversed order
   * \param[out]   out_counts            Sizes  of local portion of data in 'fourier' space in reversed order
   * \param[out]   alloc_size            Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers:
-  *
   *                                       - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex)
-  *
   *                                       - R2R plan: `alloc_size` * sizeof(double/float)
-  *
   *                                       - R2C plan: `alloc_size` * sizeof(double/float)
   *
   * \return Status code of method execution
@@ -197,11 +195,8 @@ namespace dtfft
   * \param[out]   out_starts            Starts of local portion of data in 'fourier' space in reversed order
   * \param[out]   out_counts            Sizes  of local portion of data in 'fourier' space in reversed order
   * \param[out]   alloc_size            Minimum number of elements needs to be allocated for `in`, `out` or `aux` buffers:
-  *
   *                                       - C2C plan: 2 * `alloc_size` * sizeof(double/float) or `alloc_size` * sizeof(dtfft_complex/dtfftf_complex)
-  *
   *                                       - R2R plan: `alloc_size` * sizeof(double/float)
-  *
   *                                       - R2C plan: `alloc_size` * sizeof(double/float)
   *
   * \return Status code of method execution
diff --git a/include/dtfft_config.h.in b/include/dtfft_config.h.in
index 6547f0a..2a9a873 100644
--- a/include/dtfft_config.h.in
+++ b/include/dtfft_config.h.in
@@ -1,11 +1,11 @@
 #ifndef DTFFT_CONFIG_H
 #define DTFFT_CONFIG_H
 
-#cmakedefine DTFFT_WITHOUT_FFTW
+#cmakedefine DTFFT_WITH_FFTW
 #cmakedefine DTFFT_WITH_MKL
-#cmakedefine DTFFT_WITH_CUFFT
-#cmakedefine DTFFT_WITH_KFR
-// #cmakedefine DTFFT_WITH_VKFFT
+/* #cmakedefine DTFFT_WITH_CUFFT */
+/* #cmakedefine DTFFT_WITH_KFR */
+/* #cmakedefine DTFFT_WITH_VKFFT */
 #cmakedefine DTFFT_TRANSPOSE_ONLY
 
 
@@ -40,9 +40,9 @@
 #define CONF_DTFFT_EXECUTOR_NONE 0
 #define CONF_DTFFT_EXECUTOR_FFTW3 +1
 #define CONF_DTFFT_EXECUTOR_MKL +2
-#define CONF_DTFFT_EXECUTOR_CUFFT +3
+/* #define CONF_DTFFT_EXECUTOR_CUFFT +3 */
 /* #define CONF_DTFFT_EXECUTOR_KFR +4 */
-#define CONF_DTFFT_EXECUTOR_VKFFT +5
+/* #define CONF_DTFFT_EXECUTOR_VKFFT +5 */
 
 #define CONF_DTFFT_SUCCESS 0
 #define CONF_DTFFT_ERROR_MPI_FINALIZED (-1)
@@ -60,6 +60,7 @@
 #define CONF_DTFFT_ERROR_INVALID_R2R_KINDS 12
 #define CONF_DTFFT_ERROR_R2C_TRANSPOSE_PLAN 13
 #define CONF_DTFFT_ERROR_INPLACE_TRANSPOSE 14
+#define CONF_DTFFT_ERROR_INVALID_AUX 15
 #define CONF_DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED 101
 #define CONF_DTFFT_ERROR_CUFFTMP_2D_PLAN 200
 
diff --git a/src/dtfft.F90 b/src/dtfft.F90
index 7553c05..b90240d 100644
--- a/src/dtfft.F90
+++ b/src/dtfft.F90
@@ -47,7 +47,7 @@ module dtfft
 
 ! 1d FFT External Executor types
 public :: DTFFT_EXECUTOR_NONE
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
 public  :: DTFFT_EXECUTOR_FFTW3
 #endif
 #ifdef DTFFT_WITH_MKL
diff --git a/src/dtfft_core_m.F90 b/src/dtfft_core_m.F90
index fcd0d92..ffc80b4 100644
--- a/src/dtfft_core_m.F90
+++ b/src/dtfft_core_m.F90
@@ -29,7 +29,7 @@ module dtfft_core_m
 use dtfft_precisions
 use dtfft_transpose_m
 use dtfft_abstract_executor_m
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
 use dtfft_executor_fftw_m
 #endif
 #ifdef DTFFT_WITH_MKL
@@ -232,17 +232,20 @@ subroutine transpose(self, in, out, transpose_type, error_code)
     ierr = DTFFT_SUCCESS
     if ( .not. self%is_created )                                  &
       ierr = DTFFT_ERROR_PLAN_NOT_CREATED
+    CHECK_ERROR_AND_RETURN
     if ( .not.any(transpose_type == VALID_TRANSPOSES)             &
          .or. ( self%ndims == 2 .and. abs(transpose_type) > 1 )   &
          .or. abs(transpose_type) == 3 .and..not.self%is_z_slab)  &
       ierr = DTFFT_ERROR_INVALID_TRANSPOSE_TYPE
+    CHECK_ERROR_AND_RETURN
     if ( is_same_ptr(LOC_FUN(in), LOC_FUN(out)) )                 &
       ierr = DTFFT_ERROR_INPLACE_TRANSPOSE
-    if ( present( error_code ) ) error_code = ierr; if ( ierr /= DTFFT_SUCCESS ) return
+    CHECK_ERROR_AND_RETURN
 
     REGION_BEGIN("dtfft_transpose")
     call self%transpose_private(in, out, transpose_type)
     REGION_END("dtfft_transpose")
+    if ( present( error_code ) ) error_code = DTFFT_SUCCESS
   end subroutine transpose
 
   subroutine execute(self, in, out, transpose_type, aux, error_code)
@@ -252,12 +255,12 @@ subroutine execute(self, in, out, transpose_type, aux, error_code)
 #ifdef DTFFT_WITH_CUDA
       , device                            &
 #endif
-                                          :: in(..)                 !< Incoming buffer of any rank and kind
+      , target                            :: in(..)                 !< Incoming buffer of any rank and kind
     type(*),                intent(inout) &
 #ifdef DTFFT_WITH_CUDA
       , device                            &
 #endif
-                                          :: out(..)                !< Resulting buffer of any rank and kind
+      , target                            :: out(..)                !< Resulting buffer of any rank and kind
     integer(IP),            intent(in)    :: transpose_type         !< Type of transposition. One of the:
                                                                     !< - `DTFFT_TRANSPOSE_OUT`
                                                                     !< - `DTFFT_TRANSPOSE_IN`
@@ -267,28 +270,42 @@ subroutine execute(self, in, out, transpose_type, aux, error_code)
 #ifdef DTFFT_WITH_CUDA
       , device                            &
 #endif
-                                          :: aux(..)                !< Optional auxiliary buffer.
+      , target                            :: aux(..)                !< Optional auxiliary buffer.
                                                                     !< Size of buffer must be greater than value 
                                                                     !< returned by `alloc_size` parameter of `get_local_sizes` subroutine
     integer(IP),  optional, intent(out)   :: error_code             !< Optional error code returned to user
     integer(IP) :: ierr
+    logical     :: inplace
 
+    inplace = is_same_ptr(LOC_FUN(in), LOC_FUN(out))
     ierr = DTFFT_SUCCESS
-    if ( .not. self%is_created ) ierr = DTFFT_ERROR_PLAN_NOT_CREATED
-    if ( .not.any(transpose_type == VALID_FULL_TRANSPOSES) ) ierr = DTFFT_ERROR_INVALID_TRANSPOSE_TYPE
-    if ( present( error_code ) ) error_code = ierr; if ( ierr /= DTFFT_SUCCESS ) return
+    if ( .not. self%is_created )                                                                      &
+      ierr = DTFFT_ERROR_PLAN_NOT_CREATED
+    CHECK_ERROR_AND_RETURN
+    if ( .not.any(transpose_type == VALID_FULL_TRANSPOSES) )                                          &
+      ierr = DTFFT_ERROR_INVALID_TRANSPOSE_TYPE
+    CHECK_ERROR_AND_RETURN
+    if ( self%is_transpose_plan .and. self%ndims == 2 .and. inplace )                                 &
+      ierr = DTFFT_ERROR_INPLACE_TRANSPOSE
+    CHECK_ERROR_AND_RETURN
+    if ( present( aux ) ) then
+      if ( is_same_ptr(LOC_FUN(in), LOC_FUN(aux)) .or. is_same_ptr(LOC_FUN(out), LOC_FUN(aux)) )      &
+        ierr = DTFFT_ERROR_INVALID_AUX
+      CHECK_ERROR_AND_RETURN
+    endif
 
     REGION_BEGIN("dtfft_execute")
     call self%check_aux(aux=aux)
     if ( present( aux ) ) then
-      call self%execute_private( in, out, transpose_type, aux )
+      call self%execute_private( in, out, transpose_type, aux, inplace )
     else
-      call self%execute_private( in, out, transpose_type, self%aux )
+      call self%execute_private( in, out, transpose_type, self%aux, inplace )
     endif
     REGION_END("dtfft_execute")
+    if ( present( error_code ) ) error_code = DTFFT_SUCCESS
   end subroutine execute
 
-  subroutine execute_private(self, in, out, transpose_type, aux)
+  subroutine execute_private(self, in, out, transpose_type, aux, inplace)
     class(dtfft_core),      intent(inout) :: self                   !< Abstract plan
     type(*),                intent(inout) &
 #ifdef DTFFT_WITH_CUDA
@@ -312,6 +329,8 @@ subroutine execute_private(self, in, out, transpose_type, aux)
                                           :: aux(..)                !< Auxiliary buffer.
                                                                     !< Size of buffer must be greater than value
                                                                     !< returned by `alloc_size` parameter of `get_local_sizes` subroutine
+    logical,                intent(in)    :: inplace
+
     if ( self%is_transpose_plan ) then
       select case ( self%ndims )
       case (2)
@@ -324,68 +343,69 @@ subroutine execute_private(self, in, out, transpose_type, aux)
       case (3)
         select case( transpose_type )
         case ( DTFFT_TRANSPOSE_OUT )
-          if ( self%is_z_slab ) then
-            call self%transpose_private(in, out, DTFFT_TRANSPOSE_X_TO_Z)
-          else
+          if ( inplace .or. .not. self%is_z_slab ) then
             call self%transpose_private(in, aux, DTFFT_TRANSPOSE_X_TO_Y)
             call self%transpose_private(aux, out, DTFFT_TRANSPOSE_Y_TO_Z)
+            return
           endif
+          call self%transpose_private(in, out, DTFFT_TRANSPOSE_X_TO_Z)
         case ( DTFFT_TRANSPOSE_IN )
-          if ( self%is_z_slab ) then
-            call self%transpose_private(in, out, DTFFT_TRANSPOSE_Z_TO_X)
-          else
+          if ( inplace .or. .not. self%is_z_slab ) then
             call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Z_TO_Y)
             call self%transpose_private(aux, out, DTFFT_TRANSPOSE_Y_TO_X)
+            return
           endif
+          call self%transpose_private(in, out, DTFFT_TRANSPOSE_Z_TO_X)
         endselect
       endselect
-    else ! self%is_transpose_plan
-      select case ( transpose_type )
-      case ( DTFFT_TRANSPOSE_OUT )
-        ! 1d direct FFT X direction || 2d X-Y FFT
-        call self%fft(1)%fft%execute(in, aux, DTFFT_FORWARD)
-        if ( self%is_z_slab ) then
-          ! Transpose X -> Z
-          call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Z)
-          ! 1d direct FFT Z direction
-          call self%fft(3)%fft%execute(out, out, DTFFT_FORWARD)
-          return
-        endif
-        ! Transpose X -> Y
-        call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Y)
-        ! 1d FFT Y direction
-        call self%fft(self%fft_mapping(2))%fft%execute(out, out, DTFFT_FORWARD)
-        if ( self%ndims == 2 ) then
-          return
-        endif
-        ! Transpose Y -> Z
-        call self%transpose_private(out, aux, DTFFT_TRANSPOSE_Y_TO_Z)
+      return
+    endif ! self%is_transpose_plan
+
+    select case ( transpose_type )
+    case ( DTFFT_TRANSPOSE_OUT )
+      ! 1d direct FFT X direction || 2d X-Y FFT
+      call self%fft(1)%fft%execute(in, aux, DTFFT_FORWARD)
+      if ( self%is_z_slab ) then
+        ! Transpose X -> Z
+        call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Z)
         ! 1d direct FFT Z direction
-        call self%fft(self%fft_mapping(3))%fft%execute(aux, out, DTFFT_FORWARD)
-      case ( DTFFT_TRANSPOSE_IN )
-        if ( self%is_z_slab ) then
-          ! 1d inverse FFT Z direction
-          call self%fft(3)%fft%execute(in, in, DTFFT_BACKWARD)
-          ! Transpose Z -> X
-          call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Z_TO_X)
-          ! 2d inverse FFT X-Y direction
-          call self%fft(1)%fft%execute(aux, out, DTFFT_BACKWARD)
-          return
-        endif
-        if ( self%ndims == 3 ) then
-          ! 1d inverse FFT Z direction
-          call self%fft(self%fft_mapping(3))%fft%execute(in, aux, DTFFT_BACKWARD)
-          ! Transpose Z -> Y
-          call self%transpose_private(aux, in, DTFFT_TRANSPOSE_Z_TO_Y)
-        endif
-        ! 1d inverse FFT Y direction
-        call self%fft(self%fft_mapping(2))%fft%execute(in, in, DTFFT_BACKWARD)
-        ! Transpose Y -> X
-        call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Y_TO_X)
-        ! 1d inverse FFT X direction
+        call self%fft(3)%fft%execute(out, out, DTFFT_FORWARD)
+        return
+      endif
+      ! Transpose X -> Y
+      call self%transpose_private(aux, out, DTFFT_TRANSPOSE_X_TO_Y)
+      ! 1d FFT Y direction
+      call self%fft(self%fft_mapping(2))%fft%execute(out, out, DTFFT_FORWARD)
+      if ( self%ndims == 2 ) then
+        return
+      endif
+      ! Transpose Y -> Z
+      call self%transpose_private(out, aux, DTFFT_TRANSPOSE_Y_TO_Z)
+      ! 1d direct FFT Z direction
+      call self%fft(self%fft_mapping(3))%fft%execute(aux, out, DTFFT_FORWARD)
+    case ( DTFFT_TRANSPOSE_IN )
+      if ( self%is_z_slab ) then
+        ! 1d inverse FFT Z direction
+        call self%fft(3)%fft%execute(in, in, DTFFT_BACKWARD)
+        ! Transpose Z -> X
+        call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Z_TO_X)
+        ! 2d inverse FFT X-Y direction
         call self%fft(1)%fft%execute(aux, out, DTFFT_BACKWARD)
-      endselect
-    endif
+        return
+      endif
+      if ( self%ndims == 3 ) then
+        ! 1d inverse FFT Z direction
+        call self%fft(self%fft_mapping(3))%fft%execute(in, aux, DTFFT_BACKWARD)
+        ! Transpose Z -> Y
+        call self%transpose_private(aux, in, DTFFT_TRANSPOSE_Z_TO_Y)
+      endif
+      ! 1d inverse FFT Y direction
+      call self%fft(self%fft_mapping(2))%fft%execute(in, in, DTFFT_BACKWARD)
+      ! Transpose Y -> X
+      call self%transpose_private(in, aux, DTFFT_TRANSPOSE_Y_TO_X)
+      ! 1d inverse FFT X direction
+      call self%fft(1)%fft%execute(aux, out, DTFFT_BACKWARD)
+    endselect
   end subroutine execute_private
 
   subroutine destroy(self, error_code)
@@ -473,7 +493,8 @@ subroutine get_local_sizes(self, in_starts, in_counts, out_starts, out_counts, a
 
     ierr = DTFFT_SUCCESS
     if ( .not. self%is_created ) ierr = DTFFT_ERROR_PLAN_NOT_CREATED
-    if ( present( error_code ) ) error_code = ierr; if ( ierr /= DTFFT_SUCCESS ) return
+    if ( present( error_code ) ) error_code = ierr
+    if ( ierr /= DTFFT_SUCCESS ) return
 
     select type ( self )
     class is (dtfft_plan_r2c)
@@ -829,8 +850,8 @@ subroutine test_grid_decomposition(self, base_comm, ny, nz, dims, transposed_dim
     endif
     decomps(1, latest_timer_id) = ny
     decomps(2, latest_timer_id) = nz
-    DEBUG(repeat("=", 50))
-    DEBUG("    Average execution time: "//double_to_str(timers(latest_timer_id)))
+    ! DEBUG(repeat("=", 50))
+    ! DEBUG("    Average execution time: "//double_to_str(timers(latest_timer_id)))
     latest_timer_id = latest_timer_id + 1
 
     deallocate(a, b)
@@ -1089,7 +1110,7 @@ subroutine alloc_fft_plans(self, kinds)
       self%fft_mapping(dim) = dim
 
       select case(self%executor_type)
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
       case (DTFFT_EXECUTOR_FFTW3)
         if ( dim == 1 ) then
           DEBUG("Using FFTW3 executor")
diff --git a/src/dtfft_helpers.c b/src/dtfft_helpers.c
index ad8bc37..f6da460 100644
--- a/src/dtfft_helpers.c
+++ b/src/dtfft_helpers.c
@@ -1,5 +1,5 @@
 #include <stdbool.h>
 
-bool is_same_ptr(void *ptr1, void *ptr2) {
+bool is_same_ptr(const void *ptr1, const void *ptr2) {
   return ptr1 == ptr2;
 }
\ No newline at end of file
diff --git a/src/dtfft_parameters.F90 b/src/dtfft_parameters.F90
index 34a30ff..2e5b2bb 100644
--- a/src/dtfft_parameters.F90
+++ b/src/dtfft_parameters.F90
@@ -53,7 +53,7 @@ module dtfft_parameters
   integer(IP),  parameter,  public :: DTFFT_EXECUTOR_NONE          = CONF_DTFFT_EXECUTOR_NONE
   !< Do not setup any executor. If this type is provided, then execute method cannot be called.
   !< Use transpose method instead
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
   integer(IP),  parameter,  public :: DTFFT_EXECUTOR_FFTW3         = CONF_DTFFT_EXECUTOR_FFTW3
   !< FFTW3 executor
 #endif
@@ -140,7 +140,7 @@ module dtfft_parameters
   integer(IP),  parameter,  public :: VALID_R2R_FFTS(*) = [DTFFT_DCT_1, DTFFT_DCT_2, DTFFT_DCT_3, DTFFT_DCT_4, DTFFT_DST_1, DTFFT_DST_2, DTFFT_DST_3, DTFFT_DST_4]
   integer(IP),  parameter,  public :: VALID_EXECUTORS(*) = [   &
     DTFFT_EXECUTOR_NONE                               &
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
     ,DTFFT_EXECUTOR_FFTW3                             &
 #endif
 #ifdef DTFFT_WITH_MKL
@@ -173,6 +173,7 @@ module dtfft_parameters
   integer(IP),  parameter,  public  :: DTFFT_ERROR_INVALID_R2R_KINDS = CONF_DTFFT_ERROR_INVALID_R2R_KINDS
   integer(IP),  parameter,  public  :: DTFFT_ERROR_R2C_TRANSPOSE_PLAN = CONF_DTFFT_ERROR_R2C_TRANSPOSE_PLAN
   integer(IP),  parameter,  public  :: DTFFT_ERROR_INPLACE_TRANSPOSE = CONF_DTFFT_ERROR_INPLACE_TRANSPOSE
+  integer(IP),  parameter,  public  :: DTFFT_ERROR_INVALID_AUX = CONF_DTFFT_ERROR_INVALID_AUX
   integer(IP),  parameter,  public  :: DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED = CONF_DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED
   integer(IP),  parameter,  public  :: DTFFT_ERROR_CUFFTMP_2D_PLAN = CONF_DTFFT_ERROR_CUFFTMP_2D_PLAN
 
@@ -227,6 +228,8 @@ pure function dtfft_get_error_string(error_code) result(error_string)
       allocate(error_string, source="Invalid values detected in `kinds` parameter")
     case ( DTFFT_ERROR_R2C_TRANSPOSE_PLAN )
       allocate(error_string, source="Transpose plan is not supported in R2C, use R2R or C2C plan instead")
+    case ( DTFFT_ERROR_INVALID_AUX )
+      allocate(error_string, source="Invalid `aux` buffer provided")
     case ( DTFFT_ERROR_INPLACE_TRANSPOSE )
       allocate(error_string, source="Inplace transpose is not supported")
     case ( DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED )
diff --git a/src/dtfft_utils.F90 b/src/dtfft_utils.F90
index 5619d11..5430692 100644
--- a/src/dtfft_utils.F90
+++ b/src/dtfft_utils.F90
@@ -15,9 +15,9 @@ module dtfft_utils
 public :: dtfft_string_f2c, dtfft_astring_f2c
 public :: int_to_str, double_to_str
 public :: write_debug, dtfft_init
-public :: suppress_unused
 public :: get_inverse_kind
 #ifdef DTFFT_WITH_CUDA
+public :: suppress_unused
 public :: cufftGetErrorString
 public :: CUFFT_SUCCESS
 public :: dtfft_get_stream, dtfft_set_stream
@@ -98,13 +98,6 @@ function double_to_str(n) result(string)
     allocate( string, source= trim(adjustl(temp)))
   end function double_to_str
 
-  ! Suppress warnings from linter
-  subroutine suppress_unused(x)
-    type(*)   :: x(..)
-    integer   :: i_size(1)
-    i_size = shape(x)
-  end subroutine suppress_unused
-
   subroutine write_debug(msg)
     character(len=*), intent(in)  :: msg
     integer(IP) :: comm_rank, ierr
@@ -138,6 +131,13 @@ elemental integer(IP) function get_inverse_kind(r2r_kind)
   end function get_inverse_kind
 
 #ifdef DTFFT_WITH_CUDA
+  ! Suppress warnings from linter
+  subroutine suppress_unused(x)
+    type(*)   :: x(..)
+    integer   :: i_size(1)
+    i_size = shape(x)
+  end subroutine suppress_unused
+
   integer(cuda_stream_kind) function dtfft_get_stream()
     integer :: ierr
     if (.not.is_stream_created) then
diff --git a/src/interfaces/api/c/dtfft_api_c.c b/src/interfaces/api/c/dtfft_api_c.c
index 3eb6d19..11cb0f5 100644
--- a/src/interfaces/api/c/dtfft_api_c.c
+++ b/src/interfaces/api/c/dtfft_api_c.c
@@ -19,7 +19,6 @@
 
 #include <mpi.h>
 #include <stdlib.h>
-#include <stdio.h>
 
 #include <dtfft.h>
 #include <dtfft_api.h>
diff --git a/src/interfaces/fft/CMakeLists.txt b/src/interfaces/fft/CMakeLists.txt
index 8ac3731..8b2c2f7 100644
--- a/src/interfaces/fft/CMakeLists.txt
+++ b/src/interfaces/fft/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(NOT DTFFT_WITHOUT_FFTW)
+if(DTFFT_WITH_FFTW)
   add_subdirectory(fftw)
 endif()
 
@@ -6,9 +6,9 @@ if(DTFFT_WITH_MKL)
   add_subdirectory(mkl)
 endif()
 
-if(DTFFT_WITH_CUFFT)
-  add_subdirectory(cufft)
-endif()
+# if(DTFFT_WITH_CUFFT)
+#   add_subdirectory(cufft)
+# endif()
 
 # if(DTFFT_WITH_KFR)
 #   add_subdirectory(kfr)
diff --git a/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90 b/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90
index fab7d0a..2599321 100644
--- a/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90
+++ b/src/interfaces/fft/mkl/dtfft_executor_mkl_m.F90
@@ -28,7 +28,7 @@ module dtfft_executor_mkl_m
 use dtfft_interface_mkl_native_m
 use dtfft_precisions,          only: IP
 use dtfft_parameters,          only: DTFFT_SUCCESS, DTFFT_FORWARD, DTFFT_BACKWARD, DTFFT_ERROR_R2R_FFT_NOT_SUPPORTED, DTFFT_DOUBLE
-use dtfft_utils,               only: suppress_unused, int_to_str
+use dtfft_utils,               only: int_to_str
 #include "dtfft_mpi.h"
 implicit none
 private
diff --git a/tests/c/test_c2c_2d_cxx.cpp b/tests/c/test_c2c_2d_cxx.cpp
index f828fdd..70a9d21 100644
--- a/tests/c/test_c2c_2d_cxx.cpp
+++ b/tests/c/test_c2c_2d_cxx.cpp
@@ -46,7 +46,8 @@ int main(int argc, char *argv[])
   }
   // Create plan
   const vector<int> dims = {ny, nx};
-  dtfft::PlanC2C plan(dims, MPI_COMM_WORLD, DTFFT_DOUBLE, DTFFT_MEASURE, DTFFT_EXECUTOR_NONE);
+
+  dtfft::PlanC2C plan = dtfft::PlanC2C(dims, MPI_COMM_WORLD, DTFFT_DOUBLE, DTFFT_MEASURE, DTFFT_EXECUTOR_NONE);
 
   int local_size[2];
   size_t alloc_size;
@@ -67,7 +68,7 @@ int main(int argc, char *argv[])
     check[i] = in[i];
   }
 
-  plan.transpose(in, out, DTFFT_TRANSPOSE_X_TO_Y);
+  DTFFT_CALL( plan.transpose(in, out, DTFFT_TRANSPOSE_X_TO_Y) )
 
   for ( auto & element: in) {
     element = complex<double>(-1., -1.);
diff --git a/tests/c/test_c2c_2d_float_c.c b/tests/c/test_c2c_2d_float_c.c
index 55960d2..7d4df94 100644
--- a/tests/c/test_c2c_2d_float_c.c
+++ b/tests/c/test_c2c_2d_float_c.c
@@ -48,7 +48,7 @@ int main(int argc, char *argv[])
   // Create plan
   int n[2] = {ny, nx};
 
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   int executor_type = DTFFT_EXECUTOR_NONE;
@@ -72,9 +72,9 @@ int main(int argc, char *argv[])
 
   double tf = 0.0 - MPI_Wtime();
 #ifdef DTFFT_TRANSPOSE_ONLY
-  dtfft_transpose(plan, in, out, DTFFT_TRANSPOSE_X_TO_Y);
+  DTFFT_CALL( dtfft_transpose(plan, in, out, DTFFT_TRANSPOSE_X_TO_Y) )
 #else
-  dtfft_execute(plan, in, out, DTFFT_TRANSPOSE_OUT, NULL);
+  DTFFT_CALL( dtfft_execute(plan, in, out, DTFFT_TRANSPOSE_OUT, NULL) )
 #endif
   tf += MPI_Wtime();
 
@@ -90,9 +90,9 @@ int main(int argc, char *argv[])
 
   double tb = 0.0 - MPI_Wtime();
 #ifdef DTFFT_TRANSPOSE_ONLY
-  dtfft_transpose(plan, out, in, DTFFT_TRANSPOSE_Y_TO_X);
+  DTFFT_CALL( dtfft_transpose(plan, out, in, DTFFT_TRANSPOSE_Y_TO_X) )
 #else
-  dtfft_execute(plan, out, in, DTFFT_TRANSPOSE_IN, NULL);
+  DTFFT_CALL( dtfft_execute(plan, out, in, DTFFT_TRANSPOSE_IN, NULL) )
 #endif
   tb += MPI_Wtime();
 
diff --git a/tests/c/test_c2c_3d_c.c b/tests/c/test_c2c_3d_c.c
index 479b4f5..da2516c 100644
--- a/tests/c/test_c2c_3d_c.c
+++ b/tests/c/test_c2c_3d_c.c
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
 
 #ifdef DTFFT_WITH_MKL
   int executor_type = DTFFT_EXECUTOR_MKL;
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   int executor_type = DTFFT_EXECUTOR_NONE;
diff --git a/tests/c/test_c2c_3d_float_cxx.cpp b/tests/c/test_c2c_3d_float_cxx.cpp
index 1477cbe..99f268a 100644
--- a/tests/c/test_c2c_3d_float_cxx.cpp
+++ b/tests/c/test_c2c_3d_float_cxx.cpp
@@ -61,13 +61,13 @@ int main(int argc, char *argv[])
 
 #ifdef DTFFT_WITH_VKFFT
   int executor_type = DTFFT_EXECUTOR_VKFFT;
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   int executor_type = DTFFT_EXECUTOR_NONE;
 #endif
 
-  dtfft::PlanC2C plan(dims, grid_comm, DTFFT_SINGLE, DTFFT_MEASURE, executor_type);
+  dtfft::PlanC2C plan(dims, grid_comm, DTFFT_SINGLE, DTFFT_PATIENT, executor_type);
   vector<int> in_counts(3);
   plan.get_local_sizes(NULL, in_counts.data());
 
@@ -90,9 +90,7 @@ int main(int argc, char *argv[])
   plan.execute(in, out, DTFFT_TRANSPOSE_OUT);
   tf += MPI_Wtime();
 
-  for ( auto & element: in) {
-    element = complex<float>(-1., -1.);
-  }
+  std::fill(in.begin(), in.end(), complex<float>(-1., -1.));
 #ifndef DTFFT_TRANSPOSE_ONLY
   float scaler = 1. / (float) (nx * ny * nz);
   for ( auto & element: out) {
diff --git a/tests/c/test_r2c_2d_cxx.cpp b/tests/c/test_r2c_2d_cxx.cpp
index e4afcf5..375d31d 100644
--- a/tests/c/test_r2c_2d_cxx.cpp
+++ b/tests/c/test_r2c_2d_cxx.cpp
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
   int executor_type = DTFFT_EXECUTOR_MKL;
 #elif defined(DTFFT_WITH_VKFFT)
   int executor_type = DTFFT_EXECUTOR_VKFFT;
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   if(comm_rank == 0) {
diff --git a/tests/c/test_r2c_2d_float_c.c b/tests/c/test_r2c_2d_float_c.c
index 6aab7dd..d2e5de4 100644
--- a/tests/c/test_r2c_2d_float_c.c
+++ b/tests/c/test_r2c_2d_float_c.c
@@ -52,7 +52,7 @@ int main(int argc, char *argv[])
   int executor_type = DTFFT_EXECUTOR_MKL;
 #elif defined(DTFFT_WITH_VKFFT)
   int executor_type = DTFFT_EXECUTOR_VKFFT;
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   if(comm_rank == 0) {
diff --git a/tests/c/test_r2c_3d_c.c b/tests/c/test_r2c_3d_c.c
index eb4415d..fe86e25 100644
--- a/tests/c/test_r2c_3d_c.c
+++ b/tests/c/test_r2c_3d_c.c
@@ -52,7 +52,7 @@ int main(int argc, char *argv[])
   int executor_type = DTFFT_EXECUTOR_MKL;
 #elif defined(DTFFT_WITH_VKFFT)
   int executor_type = DTFFT_EXECUTOR_VKFFT;
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   if(comm_rank == 0) {
diff --git a/tests/c/test_r2c_3d_float_cxx.cpp b/tests/c/test_r2c_3d_float_cxx.cpp
index 20aaf50..0cbee12 100644
--- a/tests/c/test_r2c_3d_float_cxx.cpp
+++ b/tests/c/test_r2c_3d_float_cxx.cpp
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
   int executor_type = DTFFT_EXECUTOR_MKL;
 #elif defined(DTFFT_WITH_VKFFT)
   int executor_type = DTFFT_EXECUTOR_VKFFT;
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   if(comm_rank == 0) {
diff --git a/tests/c/test_r2r_2d_cxx.cpp b/tests/c/test_r2r_2d_cxx.cpp
index e54bf9f..77646b4 100644
--- a/tests/c/test_r2r_2d_cxx.cpp
+++ b/tests/c/test_r2r_2d_cxx.cpp
@@ -46,7 +46,7 @@ int main(int argc, char *argv[])
     cout << "----------------------------------------" << endl;
   }
 
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #elif defined(DTFFT_WITH_VKFFT)
   int executor_type = DTFFT_EXECUTOR_VKFFT;
diff --git a/tests/c/test_r2r_2d_float_c.c b/tests/c/test_r2r_2d_float_c.c
index ee118ab..099481b 100644
--- a/tests/c/test_r2r_2d_float_c.c
+++ b/tests/c/test_r2r_2d_float_c.c
@@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
     printf("----------------------------------------\n");
   }
 
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   int executor_type = DTFFT_EXECUTOR_NONE;
diff --git a/tests/c/test_r2r_3d_c.c b/tests/c/test_r2r_3d_c.c
index 4600060..9685cba 100644
--- a/tests/c/test_r2r_3d_c.c
+++ b/tests/c/test_r2r_3d_c.c
@@ -46,7 +46,7 @@ int main(int argc, char *argv[])
     printf("----------------------------------------\n");
   }
 
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #else
   int executor_type = DTFFT_EXECUTOR_NONE;
diff --git a/tests/c/test_r2r_3d_float_cxx.cpp b/tests/c/test_r2r_3d_float_cxx.cpp
index 0fbb748..9c1b465 100644
--- a/tests/c/test_r2r_3d_float_cxx.cpp
+++ b/tests/c/test_r2r_3d_float_cxx.cpp
@@ -49,7 +49,7 @@ int main(int argc, char *argv[])
     cout << "----------------------------------------"          << endl;
   }
 
-#ifndef DTFFT_WITHOUT_FFTW
+#ifdef DTFFT_WITH_FFTW
   int executor_type = DTFFT_EXECUTOR_FFTW3;
 #elif defined(DTFFT_WITH_VKFFT)
   int executor_type = DTFFT_EXECUTOR_VKFFT;
diff --git a/tests/fortran/test_c2c_2d_f.F90 b/tests/fortran/test_c2c_2d_f.F90
index 703ed0d..2980a24 100644
--- a/tests/fortran/test_c2c_2d_f.F90
+++ b/tests/fortran/test_c2c_2d_f.F90
@@ -49,7 +49,7 @@ program test_c2c_2d
   executor_type = DTFFT_EXECUTOR_MKL
 ! #elif defined(DTFFT_WITH_KFR)
 !   executor_type = DTFFT_EXECUTOR_KFR
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #endif
 
diff --git a/tests/fortran/test_c2c_2d_float_f.F90 b/tests/fortran/test_c2c_2d_float_f.F90
index f8aba6c..2e1cff7 100644
--- a/tests/fortran/test_c2c_2d_float_f.F90
+++ b/tests/fortran/test_c2c_2d_float_f.F90
@@ -46,7 +46,7 @@ program test_c2c_2d_float
     write(output_unit, '(a)') "----------------------------------------"
   endif
 
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #elif defined(DTFFT_WITH_MKL)
   executor_type = DTFFT_EXECUTOR_MKL
diff --git a/tests/fortran/test_c2c_3d_f.F90 b/tests/fortran/test_c2c_3d_f.F90
index 686f9cd..03dd7e3 100644
--- a/tests/fortran/test_c2c_3d_f.F90
+++ b/tests/fortran/test_c2c_3d_f.F90
@@ -47,7 +47,7 @@ program test_c2c_3d
 
 #if defined(DTFFT_WITH_MKL)
   executor_type = DTFFT_EXECUTOR_MKL
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 ! #elif defined(DTFFT_WITH_KFR)
 !   executor_type = DTFFT_EXECUTOR_KFR
diff --git a/tests/fortran/test_c2c_3d_float_f.F90 b/tests/fortran/test_c2c_3d_float_f.F90
index ac9ec52..f87b4e2 100644
--- a/tests/fortran/test_c2c_3d_float_f.F90
+++ b/tests/fortran/test_c2c_3d_float_f.F90
@@ -48,7 +48,7 @@ program test_c2c_3d_float
 
 ! #ifdef DTFFT_WITH_KFR
 !   executor_type = DTFFT_EXECUTOR_KFR
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #else
   executor_type = DTFFT_EXECUTOR_NONE
diff --git a/tests/fortran/test_r2c_2d_f.F90 b/tests/fortran/test_r2c_2d_f.F90
index 0e2ef8a..64e552d 100644
--- a/tests/fortran/test_r2c_2d_f.F90
+++ b/tests/fortran/test_r2c_2d_f.F90
@@ -53,7 +53,7 @@ program test_r2c_2d
   stop
 #endif
 
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #elif defined(DTFFT_WITH_MKL)
   executor_type = DTFFT_EXECUTOR_MKL
diff --git a/tests/fortran/test_r2c_2d_float_f.F90 b/tests/fortran/test_r2c_2d_float_f.F90
index 39d096d..32dc002 100644
--- a/tests/fortran/test_r2c_2d_float_f.F90
+++ b/tests/fortran/test_r2c_2d_float_f.F90
@@ -54,7 +54,7 @@ program test_r2c_2d_float
   stop
 #endif
 
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #elif defined(DTFFT_WITH_MKL)
   executor_type = DTFFT_EXECUTOR_MKL
diff --git a/tests/fortran/test_r2c_3d_f.F90 b/tests/fortran/test_r2c_3d_f.F90
index 730a836..5830502 100644
--- a/tests/fortran/test_r2c_3d_f.F90
+++ b/tests/fortran/test_r2c_3d_f.F90
@@ -58,7 +58,7 @@ program test_r2c_3d
   executor_type = DTFFT_EXECUTOR_MKL
 ! #elif defined(DTFFT_WITH_KFR)
 !   executor_type = DTFFT_EXECUTOR_KFR
-#elif !defined(DTFFT_WITHOUT_FFTW)
+#elif defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #endif
 
diff --git a/tests/fortran/test_r2c_3d_float_f.F90 b/tests/fortran/test_r2c_3d_float_f.F90
index b353820..88b831f 100644
--- a/tests/fortran/test_r2c_3d_float_f.F90
+++ b/tests/fortran/test_r2c_3d_float_f.F90
@@ -55,7 +55,7 @@ program test_r2c_3d_float
 
 ! #if defined(DTFFT_WITH_KFR)
 !   executor_type = DTFFT_EXECUTOR_KFR
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #elif defined(DTFFT_WITH_MKL)
   executor_type = DTFFT_EXECUTOR_MKL
diff --git a/tests/fortran/test_r2r_2d_f.F90 b/tests/fortran/test_r2r_2d_f.F90
index 97e7510..5cb5633 100644
--- a/tests/fortran/test_r2r_2d_f.F90
+++ b/tests/fortran/test_r2r_2d_f.F90
@@ -48,7 +48,7 @@ program test_r2r_2d
 ! #ifdef DTFFT_WITH_KFR
 !   executor_type = DTFFT_EXECUTOR_KFR
 !   scaler = 4._R8P / real(nx * ny, R8P)
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
   scaler = 1._R8P / real(4 * (nx - 1) * (ny - 1), R8P)
 #else
diff --git a/tests/fortran/test_r2r_2d_float_f.F90 b/tests/fortran/test_r2r_2d_float_f.F90
index f42512a..7d5f019 100644
--- a/tests/fortran/test_r2r_2d_float_f.F90
+++ b/tests/fortran/test_r2r_2d_float_f.F90
@@ -18,7 +18,7 @@
 !------------------------------------------------------------------------------------------------
 #include "dtfft_config.h"
 program test_r2r_2d_float
-use iso_fortran_env, only: R8P => real64, R4P => real32, I4P => int32, output_unit, error_unit
+use iso_fortran_env, only: R8P => real64, R4P => real32, IP => int32, I4P => int32, output_unit, error_unit
 use dtfft
 #include "dtfft_mpi.h"
 implicit none
@@ -29,6 +29,7 @@ program test_r2r_2d_float
   type(dtfft_plan_r2r) :: plan
   integer(I4P) :: in_starts(2), in_counts(2), out_starts(2), out_counts(2)
   real(R8P) :: tf, tb, t_sum
+  TYPE_MPI_COMM :: comm_1d
 
   call MPI_Init(ierr)
   call MPI_Comm_size(MPI_COMM_WORLD, comm_size, ierr)
@@ -44,13 +45,15 @@ program test_r2r_2d_float
   endif
 ! #ifdef DTFFT_WITH_KFR
 !   executor_type = DTFFT_EXECUTOR_KFR
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #else
   executor_type = DTFFT_EXECUTOR_NONE
 #endif
 
-  call plan%create([nx, ny], [DTFFT_DST_2, DTFFT_DST_3], precision=DTFFT_SINGLE, executor_type=executor_type)
+  call MPI_Cart_create(MPI_COMM_WORLD, 1, [comm_size], [.false.], .true., comm_1d, ierr)
+
+  call plan%create([nx, ny], [DTFFT_DST_2, DTFFT_DST_3], comm=comm_1d, precision=DTFFT_SINGLE, executor_type=executor_type)
   call plan%get_local_sizes(in_starts, in_counts, out_starts, out_counts)
 
   allocate(in(in_starts(1):in_starts(1) + in_counts(1) - 1,                     &
diff --git a/tests/fortran/test_r2r_3d_f.F90 b/tests/fortran/test_r2r_3d_f.F90
index 21bc5fb..4be7383 100644
--- a/tests/fortran/test_r2r_3d_f.F90
+++ b/tests/fortran/test_r2r_3d_f.F90
@@ -30,10 +30,9 @@ program test_r2r_3d
   real(R8P) :: local_error, global_error, rnd
   integer(I4P), parameter :: nx = 512, ny = 32, nz = 8
   integer(I4P) :: comm_size, comm_rank, i, j, k, out_size
-  type(dtfft_plan_r2r) :: plan
+  class(dtfft_core), allocatable :: plan
   integer(I4P) :: in_starts(3), in_counts(3), out_counts(3), ierr, executor_type
   real(R8P) :: tf, tb, t_sum
-  TYPE_MPI_COMM :: comm_1d
   integer(I8P) :: alloc_size
 
   call MPI_Init(ierr)
@@ -48,17 +47,19 @@ program test_r2r_3d
     write(output_unit, '(a, i0)') 'Number of processors: ', comm_size
   endif
 
-  call MPI_Cart_create(MPI_COMM_WORLD, 1, [comm_size], [.false.], .true., comm_1d, ierr)
-
 ! #ifdef DTFFT_WITH_KFR
 !   executor_type = DTFFT_EXECUTOR_KFR
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
 #else
   executor_type = DTFFT_EXECUTOR_NONE
 #endif
 
-  call plan%create([nx, ny, nz], [DTFFT_DCT_2, DTFFT_DCT_2, DTFFT_DCT_2], comm=comm_1d, effort_flag=DTFFT_PATIENT, executor_type=executor_type)
+  allocate( dtfft_plan_r2r :: plan )
+  select type (plan)
+  class is ( dtfft_plan_r2r )
+    call plan%create([nx, ny, nz], [DTFFT_DCT_2, DTFFT_DCT_2, DTFFT_DST_4], effort_flag=DTFFT_MEASURE, executor_type=executor_type)
+  endselect
 
   call plan%get_local_sizes(in_starts, in_counts, out_counts=out_counts, alloc_size=alloc_size)
 
@@ -119,5 +120,6 @@ program test_r2r_3d
 
   deallocate(in, out, check)
   call plan%destroy()
+  deallocate( plan )
   call MPI_Finalize(ierr)
 end program test_r2r_3d
\ No newline at end of file
diff --git a/tests/fortran/test_r2r_3d_float_f.F90 b/tests/fortran/test_r2r_3d_float_f.F90
index 8f549ef..1095953 100644
--- a/tests/fortran/test_r2r_3d_float_f.F90
+++ b/tests/fortran/test_r2r_3d_float_f.F90
@@ -46,7 +46,7 @@ program test_r2r_3d_float
 ! #ifdef DTFFT_WITH_KFR
 !   executor_type = DTFFT_EXECUTOR_KFR
 !   scaler = 8._R4P / real(nx * ny * nz, R4P)
-#if !defined(DTFFT_WITHOUT_FFTW)
+#if defined (DTFFT_WITH_FFTW)
   executor_type = DTFFT_EXECUTOR_FFTW3
   scaler = 1._R4P / real(8 * (nx - 1) * ny * nz, R4P)
 #else