Fix Wake Function Eval and Simplify Padding

BLAST-ImpactX · Aug 23, 2024 · 65658a2 · 65658a2
1 parent 15a5d77
commit 65658a2
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 57 deletions.
diff --git a/src/particles/wakefields/ExecuteWakefield.H b/src/particles/wakefields/ExecuteWakefield.H
@@ -58,14 +58,13 @@ namespace impactx::particles::wakefields
             bool const is_unity_particle_weight = false; // Only true if w = 1
             bool const GetNumberDensity = true;
 
-            int const padding_factor = 1; // Set amount of zero-padding
             int const num_bins = csr_bins;  // Set resolution
             amrex::Real const bin_min = t_min;
             amrex::Real const bin_max = t_max;
-            amrex::Real const bin_size = (bin_max - bin_min) / (num_bins - 1);
+            amrex::Real const bin_size = (bin_max - bin_min) / (num_bins - 1);  // number of evaluation points
 
             // Allocate memory for the charge profile
-            amrex::Gpu::DeviceVector<amrex::Real> charge_distribution(num_bins, 0.0);
+            amrex::Gpu::DeviceVector<amrex::Real> charge_distribution(num_bins + 1, 0.0);
             amrex::Gpu::DeviceVector<amrex::Real> mean_x(num_bins, 0.0);
             amrex::Gpu::DeviceVector<amrex::Real> mean_y(num_bins, 0.0);
 
@@ -88,21 +87,32 @@ namespace impactx::particles::wakefields
                                                                        bin_size, is_unity_particle_weight);
 
                 // Call charge density derivative function
-                amrex::Gpu::DeviceVector<amrex::Real> slopes(num_bins - 1, 0.0);
+                amrex::Gpu::DeviceVector<amrex::Real> slopes(charge_distribution.size() - 1, 0.0);
                 impactx::particles::wakefields::DerivativeCharge1D(charge_distribution, slopes, bin_size,
                                                                    GetNumberDensity); // Use number derivatives for convolution with CSR
 
-                // Call CSR wake function
-                amrex::Gpu::DeviceVector<amrex::Real> wake_function(num_bins, 0.0);
+                // Construct CSR wake function on 2N support
+                amrex::Gpu::DeviceVector<amrex::Real> wake_function(num_bins*2, 0.0);
                 amrex::Real *const dptr_wake_function = wake_function.data();
                 auto const dR = R;  // for NVCC capture
-                amrex::ParallelFor(num_bins, [=] AMREX_GPU_DEVICE(int i) {
-                    amrex::Real const s = bin_min + i * bin_size;
-                    dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
+                amrex::ParallelFor(num_bins*2, [=] AMREX_GPU_DEVICE(int i) {
+                    if (i == 0) {
+                        amrex::Real const s = bin_size;
+                        dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
+                        // dptr_wake_function[i] = 0.0;  // TODO: which value to use for this divergent point?
+                    }
+                    else if (i < num_bins) {
+                        amrex::Real const s = static_cast<amrex::Real>(i) * bin_size;
+                        dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
+                    }
+                    else if (i > num_bins) {
+                        amrex::Real const s = static_cast<amrex::Real>(i - 2*num_bins) * bin_size;
+                        dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
+                    }
                 });
 
                 // Call convolution function
-                convoluted_wakefield = impactx::particles::wakefields::convolve_fft(slopes, wake_function, bin_size, padding_factor);
+                convoluted_wakefield = impactx::particles::wakefields::convolve_fft(slopes, wake_function, bin_size);
             }
 
             // Broadcast the global wakefield to every MPI rank
@@ -126,7 +136,7 @@ namespace impactx::particles::wakefields
             }
 
             // Call function to kick particles with wake
-            impactx::particles::wakefields::WakePush(particle_container, convoluted_wakefield, slice_ds, bin_size, t_min, padding_factor);
+            impactx::particles::wakefields::WakePush(particle_container, convoluted_wakefield, slice_ds, bin_size, t_min);
         }
     }
 

diff --git a/src/particles/wakefields/WakeConvolution.H b/src/particles/wakefields/WakeConvolution.H
@@ -116,17 +116,15 @@ namespace impactx::particles::wakefields
     /** Perform FFT-based convolution
      *
      * @param[in] beam_profile_slope number density slope along s [1/m]
-     * @param[in] wake_func wake functions in along s [V*pc/mm]
+     * @param[in] wake_func wake function along s [V*pc/mm]
      * @param[in] delta_t size of a bin in wake_func [m]
-     * @param[in] padding_factor N times zero-padding of the input arrays' lengths
-     * @return FFT convolution of beam_profile_slope & wake_func
+     * @return FFT convolution of beam_profile_slope & wake_func (N = len(beam_profile_slope) = len(wake_func)/2)
      */
     amrex::Gpu::DeviceVector<amrex::Real>
     convolve_fft (
         amrex::Gpu::DeviceVector<amrex::Real> const & beam_profile_slope,
         amrex::Gpu::DeviceVector<amrex::Real> const & wake_func,
-        amrex::Real delta_t,
-        int padding_factor = 1
+        amrex::Real delta_t
     );
 
 } // namespace impactx::particles::wakefields

diff --git a/src/particles/wakefields/WakeConvolution.cpp b/src/particles/wakefields/WakeConvolution.cpp
@@ -42,7 +42,7 @@ namespace impactx::particles::wakefields
 
         amrex::Real const s0 = (0.169_rt * std::pow(a, 1.79_rt) * std::pow(g, 0.38_rt)) / std::pow(L, 1.17_rt);
         amrex::Real const term = std::sqrt(std::abs(s) / s0) * std::exp(-std::sqrt(std::abs(s) / s0));
-        return (4 * impactx::particles::wakefields::Z0 * ablastr::constant::SI::c * s0 * unit_step(s)) / (amrex::Real(M_PI) * std::pow(a, 4)) * term;
+        return (4_rt * impactx::particles::wakefields::Z0 * ablastr::constant::SI::c * s0 * unit_step(s)) / (amrex::Real(M_PI) * std::pow(a, 4)) * term;
     }
 
     amrex::Real w_l_rf (
@@ -62,48 +62,40 @@ namespace impactx::particles::wakefields
     convolve_fft (
         amrex::Gpu::DeviceVector<amrex::Real> const & beam_profile_slope,
         amrex::Gpu::DeviceVector<amrex::Real> const & wake_func,
-        amrex::Real delta_t,
-        int padding_factor
+        amrex::Real delta_t
     )
     {
-    #ifdef ImpactX_USE_FFT
+#ifdef ImpactX_USE_FFT
         int const beam_profile_slope_size = beam_profile_slope.size();
         int const wake_func_size = wake_func.size();
 
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(beam_profile_slope_size * 2 == wake_func_size, "Signal sizes don't match"); // TODO: exact 2N later on
+
         // Length of convolution result (complex) is N/2+1
         // e.g., https://www.fftw.org/fftw3_doc/One_002dDimensional-DFTs-of-Real-Data.html
-        int const signal_size = padding_factor * std::max(beam_profile_slope_size, wake_func_size);  // zero-pad slope by one element
-        int const complex_size = signal_size / 2 + 1;
+        int const padded_signal_size = wake_func_size;  // zero-pad slope by one element
+        int const complex_size = padded_signal_size / 2 + 1;
 
         // Allocate memory for FFT inputs and outputs
         using ablastr::math::anyfft::Complex;
 
-        // Zero-pad the input (signal) arrays to equal length
-        amrex::Gpu::DeviceVector<amrex::Real> in1(signal_size, 0.0);
-        amrex::Gpu::DeviceVector<amrex::Real> in2(signal_size, 0.0);
+        // Signal padding for periodicity of FFTs:
+        // - slope starts/end on zero, zero-pad the rest
+        // - wake function is copied (constructed well defined)
+        amrex::Gpu::DeviceVector<amrex::Real> in1(padded_signal_size, 0.0);
+        amrex::Gpu::DeviceVector<amrex::Real> in2(padded_signal_size, 0.0);
         amrex::Real * const dptr_in1 = in1.data();
         amrex::Real * const dptr_in2 = in2.data();
         amrex::Real const * const dptr_beam_profile_slope = beam_profile_slope.data();
         amrex::Real const * const dptr_wake_func = wake_func.data();
-        amrex::ParallelFor(signal_size, [=] AMREX_GPU_DEVICE(int i)
+        amrex::ParallelFor(padded_signal_size, [=] AMREX_GPU_DEVICE(int i)
         {
             if (i < beam_profile_slope_size)
             {
                 dptr_in1[i] = dptr_beam_profile_slope[i];
             }
-            else
-            {
-                dptr_in1[i] = 0;
-            }
 
-            if (i < wake_func_size)
-            {
-                dptr_in2[i] = dptr_wake_func[i];
-            }
-            else
-            {
-                dptr_in2[i] = 0;
-            }
+            dptr_in2[i] = dptr_wake_func[i];
         });
 
         // Define Forward FFT
@@ -112,11 +104,10 @@ namespace impactx::particles::wakefields
         // TODO: n does not change usually, so we can keep the plans alive over the simulation
         //       runtime. To do that, we can make this function a functor class.
         auto p1 = ablastr::math::anyfft::CreatePlan(
-            amrex::IntVect{signal_size}, in1.data(), out1.data(), ablastr::math::anyfft::direction::R2C, 1
-
+                amrex::IntVect(padded_signal_size), in1.data(), out1.data(), ablastr::math::anyfft::direction::R2C, 1
         );
         auto p2 = ablastr::math::anyfft::CreatePlan(
-            amrex::IntVect{signal_size}, in2.data(), out2.data(), ablastr::math::anyfft::direction::R2C, 1
+                amrex::IntVect(padded_signal_size), in2.data(), out2.data(), ablastr::math::anyfft::direction::R2C, 1
         );
 
         // Perform Forward FFT - Convert inputs into frequency domain
@@ -136,21 +127,23 @@ namespace impactx::particles::wakefields
         });
 
         // Define Backward FFT - Revert from frequency domain to time/space domain
-        amrex::Gpu::DeviceVector<amrex::Real> result(signal_size, 0.0);
+        amrex::Gpu::DeviceVector<amrex::Real> out3(padded_signal_size, 0.0);
         // TODO: n does not change usually, so we can keep the plans alive over the simulation
         //       runtime. To do that, we can make this function a functor class.
-        amrex::Real * const dptr_result = result.data();
+        amrex::Real * const dptr_out3 = out3.data();
         auto p3 = ablastr::math::anyfft::CreatePlan(
-            amrex::IntVect{signal_size}, dptr_result, dptr_conv_result, ablastr::math::anyfft::direction::C2R, 1
+                amrex::IntVect(padded_signal_size), dptr_out3, dptr_conv_result, ablastr::math::anyfft::direction::C2R, 1
         );
 
         // Perform Backward FFT
         ablastr::math::anyfft::Execute(p3);
 
-        // Normalize result by the output size and multiply result by bin size
-        amrex::ParallelFor(signal_size, [=] AMREX_GPU_DEVICE (int i) noexcept
+        // Crop to first N values, normalize result by the output size and multiply result by bin size
+        amrex::Gpu::DeviceVector<amrex::Real> result(beam_profile_slope_size, 0.0);
+        amrex::Real * const dptr_result = result.data();
+        amrex::ParallelFor(beam_profile_slope_size, [=] AMREX_GPU_DEVICE (int i) noexcept
         {
-            dptr_result[i] = dptr_result[i] / signal_size * delta_t;
+            dptr_result[i] = dptr_out3[i] / padded_signal_size * delta_t;  // TODO: padded_signal_size or beam_profile_slope_size (2N or N)?
         });
 
         // Clean up intermediate declarations
@@ -161,10 +154,10 @@ namespace impactx::particles::wakefields
         ablastr::math::anyfft::DestroyPlan(p3);
 
         return result;
-    #else
+#else
         throw std::runtime_error("convolve_fft: To use this function, recompile with ImpactX_FFT=ON.");
 
         return amrex::Gpu::DeviceVector<amrex::Real>();
-    #endif
+#endif
     }
 }
diff --git a/src/particles/wakefields/WakePush.H b/src/particles/wakefields/WakePush.H
@@ -26,15 +26,13 @@ namespace impactx::particles::wakefields
      * @param[in] slice_ds slice spacing along s
      * @param[in] bin_size size of the beam in s divided by num_bins
      * @param[in] bin_min lower end of the beam in s
-     * @param[in] padding_factor N times zero-padding of the input arrays' lengths
      */
     void WakePush (
         ImpactXParticleContainer & pc,
         amrex::Gpu::DeviceVector<amrex::Real> const & convoluted_wakefield,
         amrex::ParticleReal slice_ds,
         amrex::Real bin_size,
-        amrex::Real bin_min,
-        int padding_factor
+        amrex::Real bin_min
     );
 
 } // namespace impactx::particles::wakefields

diff --git a/src/particles/wakefields/WakePush.cpp b/src/particles/wakefields/WakePush.cpp
@@ -23,15 +23,14 @@ namespace impactx::particles::wakefields
         amrex::Gpu::DeviceVector<amrex::Real> const & convoluted_wakefield,
         amrex::ParticleReal slice_ds,
         amrex::Real bin_size,
-        amrex::Real bin_min,
-        int padding_factor
+        amrex::Real bin_min
     )
     {
         BL_PROFILE("impactx::particles::wakefields::WakePush")
 
         using namespace amrex::literals;
 
-        int const cw_size = convoluted_wakefield.size();
+        int const cw_size = convoluted_wakefield.size(); // no padding anymore
 
         // Loop over refinement levels
         int const nLevel = pc.finestLevel();
@@ -70,8 +69,7 @@ namespace impactx::particles::wakefields
                     amrex::ParticleReal & AMREX_RESTRICT pt = part_pt[i];
 
                     // Update longitudinal momentum with the convoluted wakefield force
-                    amrex::Real const lower_bound = padding_factor * bin_min;
-                    int const idx = static_cast<int>((t - lower_bound) / bin_size);  // Find index position along t
+                    int const idx = static_cast<int>((t - bin_min) / bin_size);  // Find index position along t
 
                     if (idx < 0 || idx >= cw_size)
                     {