Skip to content

Commit

Permalink
update comments
Browse files Browse the repository at this point in the history
  • Loading branch information
FMarno committed Feb 6, 2024
1 parent f8a8661 commit dee36f5
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/portfft/dispatcher/workitem_dispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,10 @@ PORTFFT_INLINE void workitem_impl(const T* input, T* output, const T* input_imag
IdxGlobal global_input_offset = static_cast<IdxGlobal>(input_distance_in_reals) * leader_i;
IdxGlobal global_output_offset = static_cast<IdxGlobal>(output_distance_in_reals) * leader_i;

// This is checking for LayoutIn is PACKED or UNPACKED but we don't actually ever launch kernels with LayoutIn
// as UNPACKED.
if (is_packed_input) {
// copy into local memory cooperatively as a subgroup, allowing coalesced memory access for when elements of a
// single FFT are sequential. BATCH_INTERLEAVED skips this step and loads straight from global to registers since
// the sequential work-items already access sequential elements.
// single FFT are sequential. When distance < stride, skip this step and load straight from global to registers
// since the sequential work-items already access sequential elements.
if (storage == complex_storage::INTERLEAVED_COMPLEX) {
global_data.log_message_global(__func__, "loading packed data from global to local memory");
global2local<level::SUBGROUP, SubgroupSize>(global_data, input, loc_view, n_reals * n_working, global_offset,
Expand Down

0 comments on commit dee36f5

Please sign in to comment.