Skip to content

Commit

Permalink
Fix Wake Function Eval and Simplify Padding
Browse files Browse the repository at this point in the history
  • Loading branch information
ax3l committed Aug 23, 2024
1 parent 15a5d77 commit 65658a2
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 57 deletions.
32 changes: 21 additions & 11 deletions src/particles/wakefields/ExecuteWakefield.H
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,13 @@ namespace impactx::particles::wakefields
bool const is_unity_particle_weight = false; // Only true if w = 1
bool const GetNumberDensity = true;

int const padding_factor = 1; // Set amount of zero-padding
int const num_bins = csr_bins; // Set resolution
amrex::Real const bin_min = t_min;
amrex::Real const bin_max = t_max;
amrex::Real const bin_size = (bin_max - bin_min) / (num_bins - 1);
amrex::Real const bin_size = (bin_max - bin_min) / (num_bins - 1); // number of evaluation points

// Allocate memory for the charge profile
amrex::Gpu::DeviceVector<amrex::Real> charge_distribution(num_bins, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> charge_distribution(num_bins + 1, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> mean_x(num_bins, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> mean_y(num_bins, 0.0);

Expand All @@ -88,21 +87,32 @@ namespace impactx::particles::wakefields
bin_size, is_unity_particle_weight);

// Call charge density derivative function
amrex::Gpu::DeviceVector<amrex::Real> slopes(num_bins - 1, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> slopes(charge_distribution.size() - 1, 0.0);
impactx::particles::wakefields::DerivativeCharge1D(charge_distribution, slopes, bin_size,
GetNumberDensity); // Use number derivatives for convolution with CSR

// Call CSR wake function
amrex::Gpu::DeviceVector<amrex::Real> wake_function(num_bins, 0.0);
// Construct CSR wake function on 2N support
amrex::Gpu::DeviceVector<amrex::Real> wake_function(num_bins*2, 0.0);
amrex::Real *const dptr_wake_function = wake_function.data();
auto const dR = R; // for NVCC capture
amrex::ParallelFor(num_bins, [=] AMREX_GPU_DEVICE(int i) {
amrex::Real const s = bin_min + i * bin_size;
dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
amrex::ParallelFor(num_bins*2, [=] AMREX_GPU_DEVICE(int i) {
if (i == 0) {
amrex::Real const s = bin_size;
dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
// dptr_wake_function[i] = 0.0; // TODO: which value to use for this divergent point?
}
else if (i < num_bins) {
amrex::Real const s = static_cast<amrex::Real>(i) * bin_size;
dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
}
else if (i > num_bins) {
amrex::Real const s = static_cast<amrex::Real>(i - 2*num_bins) * bin_size;
dptr_wake_function[i] = impactx::particles::wakefields::w_l_csr(s, dR);
}
});

// Call convolution function
convoluted_wakefield = impactx::particles::wakefields::convolve_fft(slopes, wake_function, bin_size, padding_factor);
convoluted_wakefield = impactx::particles::wakefields::convolve_fft(slopes, wake_function, bin_size);
}

// Broadcast the global wakefield to every MPI rank
Expand All @@ -126,7 +136,7 @@ namespace impactx::particles::wakefields
}

// Call function to kick particles with wake
impactx::particles::wakefields::WakePush(particle_container, convoluted_wakefield, slice_ds, bin_size, t_min, padding_factor);
impactx::particles::wakefields::WakePush(particle_container, convoluted_wakefield, slice_ds, bin_size, t_min);
}
}

Expand Down
8 changes: 3 additions & 5 deletions src/particles/wakefields/WakeConvolution.H
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,15 @@ namespace impactx::particles::wakefields
/** Perform FFT-based convolution
*
* @param[in] beam_profile_slope number density slope along s [1/m]
* @param[in] wake_func wake functions in along s [V*pc/mm]
* @param[in] wake_func wake function along s [V*pc/mm]
* @param[in] delta_t size of a bin in wake_func [m]
* @param[in] padding_factor N times zero-padding of the input arrays' lengths
* @return FFT convolution of beam_profile_slope & wake_func
* @return FFT convolution of beam_profile_slope & wake_func (N = len(beam_profile_slope) = len(wake_func)/2)
*/
amrex::Gpu::DeviceVector<amrex::Real>
convolve_fft (
amrex::Gpu::DeviceVector<amrex::Real> const & beam_profile_slope,
amrex::Gpu::DeviceVector<amrex::Real> const & wake_func,
amrex::Real delta_t,
int padding_factor = 1
amrex::Real delta_t
);

} // namespace impactx::particles::wakefields
Expand Down
59 changes: 26 additions & 33 deletions src/particles/wakefields/WakeConvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace impactx::particles::wakefields

amrex::Real const s0 = (0.169_rt * std::pow(a, 1.79_rt) * std::pow(g, 0.38_rt)) / std::pow(L, 1.17_rt);
amrex::Real const term = std::sqrt(std::abs(s) / s0) * std::exp(-std::sqrt(std::abs(s) / s0));
return (4 * impactx::particles::wakefields::Z0 * ablastr::constant::SI::c * s0 * unit_step(s)) / (amrex::Real(M_PI) * std::pow(a, 4)) * term;
return (4_rt * impactx::particles::wakefields::Z0 * ablastr::constant::SI::c * s0 * unit_step(s)) / (amrex::Real(M_PI) * std::pow(a, 4)) * term;
}

amrex::Real w_l_rf (
Expand All @@ -62,48 +62,40 @@ namespace impactx::particles::wakefields
convolve_fft (
amrex::Gpu::DeviceVector<amrex::Real> const & beam_profile_slope,
amrex::Gpu::DeviceVector<amrex::Real> const & wake_func,
amrex::Real delta_t,
int padding_factor
amrex::Real delta_t
)
{
#ifdef ImpactX_USE_FFT
#ifdef ImpactX_USE_FFT
int const beam_profile_slope_size = beam_profile_slope.size();
int const wake_func_size = wake_func.size();

AMREX_ALWAYS_ASSERT_WITH_MESSAGE(beam_profile_slope_size * 2 == wake_func_size, "Signal sizes don't match"); // TODO: exact 2N later on

// Length of convolution result (complex) is N/2+1
// e.g., https://www.fftw.org/fftw3_doc/One_002dDimensional-DFTs-of-Real-Data.html
int const signal_size = padding_factor * std::max(beam_profile_slope_size, wake_func_size); // zero-pad slope by one element
int const complex_size = signal_size / 2 + 1;
int const padded_signal_size = wake_func_size; // zero-pad slope by one element
int const complex_size = padded_signal_size / 2 + 1;

// Allocate memory for FFT inputs and outputs
using ablastr::math::anyfft::Complex;

// Zero-pad the input (signal) arrays to equal length
amrex::Gpu::DeviceVector<amrex::Real> in1(signal_size, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> in2(signal_size, 0.0);
// Signal padding for periodicity of FFTs:
// - slope starts/end on zero, zero-pad the rest
// - wake function is copied (constructed well defined)
amrex::Gpu::DeviceVector<amrex::Real> in1(padded_signal_size, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> in2(padded_signal_size, 0.0);
amrex::Real * const dptr_in1 = in1.data();
amrex::Real * const dptr_in2 = in2.data();
amrex::Real const * const dptr_beam_profile_slope = beam_profile_slope.data();
amrex::Real const * const dptr_wake_func = wake_func.data();
amrex::ParallelFor(signal_size, [=] AMREX_GPU_DEVICE(int i)
amrex::ParallelFor(padded_signal_size, [=] AMREX_GPU_DEVICE(int i)
{
if (i < beam_profile_slope_size)
{
dptr_in1[i] = dptr_beam_profile_slope[i];
}
else
{
dptr_in1[i] = 0;
}

if (i < wake_func_size)
{
dptr_in2[i] = dptr_wake_func[i];
}
else
{
dptr_in2[i] = 0;
}
dptr_in2[i] = dptr_wake_func[i];
});

// Define Forward FFT
Expand All @@ -112,11 +104,10 @@ namespace impactx::particles::wakefields
// TODO: n does not change usually, so we can keep the plans alive over the simulation
// runtime. To do that, we can make this function a functor class.
auto p1 = ablastr::math::anyfft::CreatePlan(
amrex::IntVect{signal_size}, in1.data(), out1.data(), ablastr::math::anyfft::direction::R2C, 1

amrex::IntVect(padded_signal_size), in1.data(), out1.data(), ablastr::math::anyfft::direction::R2C, 1
);
auto p2 = ablastr::math::anyfft::CreatePlan(
amrex::IntVect{signal_size}, in2.data(), out2.data(), ablastr::math::anyfft::direction::R2C, 1
amrex::IntVect(padded_signal_size), in2.data(), out2.data(), ablastr::math::anyfft::direction::R2C, 1
);

// Perform Forward FFT - Convert inputs into frequency domain
Expand All @@ -136,21 +127,23 @@ namespace impactx::particles::wakefields
});

// Define Backward FFT - Revert from frequency domain to time/space domain
amrex::Gpu::DeviceVector<amrex::Real> result(signal_size, 0.0);
amrex::Gpu::DeviceVector<amrex::Real> out3(padded_signal_size, 0.0);
// TODO: n does not change usually, so we can keep the plans alive over the simulation
// runtime. To do that, we can make this function a functor class.
amrex::Real * const dptr_result = result.data();
amrex::Real * const dptr_out3 = out3.data();
auto p3 = ablastr::math::anyfft::CreatePlan(
amrex::IntVect{signal_size}, dptr_result, dptr_conv_result, ablastr::math::anyfft::direction::C2R, 1
amrex::IntVect(padded_signal_size), dptr_out3, dptr_conv_result, ablastr::math::anyfft::direction::C2R, 1
);

// Perform Backward FFT
ablastr::math::anyfft::Execute(p3);

// Normalize result by the output size and multiply result by bin size
amrex::ParallelFor(signal_size, [=] AMREX_GPU_DEVICE (int i) noexcept
// Crop to first N values, normalize result by the output size and multiply result by bin size
amrex::Gpu::DeviceVector<amrex::Real> result(beam_profile_slope_size, 0.0);
amrex::Real * const dptr_result = result.data();
amrex::ParallelFor(beam_profile_slope_size, [=] AMREX_GPU_DEVICE (int i) noexcept
{
dptr_result[i] = dptr_result[i] / signal_size * delta_t;
dptr_result[i] = dptr_out3[i] / padded_signal_size * delta_t; // TODO: padded_signal_size or beam_profile_slope_size (2N or N)?
});

// Clean up intermediate declarations
Expand All @@ -161,10 +154,10 @@ namespace impactx::particles::wakefields
ablastr::math::anyfft::DestroyPlan(p3);

return result;
#else
#else
throw std::runtime_error("convolve_fft: To use this function, recompile with ImpactX_FFT=ON.");

return amrex::Gpu::DeviceVector<amrex::Real>();
#endif
#endif
}
}
4 changes: 1 addition & 3 deletions src/particles/wakefields/WakePush.H
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,13 @@ namespace impactx::particles::wakefields
* @param[in] slice_ds slice spacing along s
* @param[in] bin_size size of the beam in s divided by num_bins
* @param[in] bin_min lower end of the beam in s
* @param[in] padding_factor N times zero-padding of the input arrays' lengths
*/
void WakePush (
ImpactXParticleContainer & pc,
amrex::Gpu::DeviceVector<amrex::Real> const & convoluted_wakefield,
amrex::ParticleReal slice_ds,
amrex::Real bin_size,
amrex::Real bin_min,
int padding_factor
amrex::Real bin_min
);

} // namespace impactx::particles::wakefields
Expand Down
8 changes: 3 additions & 5 deletions src/particles/wakefields/WakePush.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ namespace impactx::particles::wakefields
amrex::Gpu::DeviceVector<amrex::Real> const & convoluted_wakefield,
amrex::ParticleReal slice_ds,
amrex::Real bin_size,
amrex::Real bin_min,
int padding_factor
amrex::Real bin_min
)
{
BL_PROFILE("impactx::particles::wakefields::WakePush")

using namespace amrex::literals;

int const cw_size = convoluted_wakefield.size();
int const cw_size = convoluted_wakefield.size(); // no padding anymore

// Loop over refinement levels
int const nLevel = pc.finestLevel();
Expand Down Expand Up @@ -70,8 +69,7 @@ namespace impactx::particles::wakefields
amrex::ParticleReal & AMREX_RESTRICT pt = part_pt[i];

// Update longitudinal momentum with the convoluted wakefield force
amrex::Real const lower_bound = padding_factor * bin_min;
int const idx = static_cast<int>((t - lower_bound) / bin_size); // Find index position along t
int const idx = static_cast<int>((t - bin_min) / bin_size); // Find index position along t

if (idx < 0 || idx >= cw_size)
{
Expand Down

0 comments on commit 65658a2

Please sign in to comment.