diff --git a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf
index 0cf2629b92..30b7bdf8eb 100644
Binary files a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf and b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf differ
diff --git a/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html b/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html
index ac106782ee..64b899193d 100644
--- a/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html
+++ b/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html
@@ -1320,7 +1320,7 @@
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
Definition: AMReX_Box.H:1634
-void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:580
+void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:576
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
Definition: AMReX_Box.H:1620
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
@@ -1328,7 +1328,7 @@
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi, const Box &domain) noexcept
Definition: AMReX_ParticleUtil.H:362
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:272
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:107
-bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:568
+bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:564
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition: AMReX_ParticleUtil.H:34
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box shift(const Box &b, int dir, int nzones) noexcept
Return a Box with indices shifted by nzones in dir direction.
Definition: AMReX_Box.H:1294
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:212
diff --git a/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html b/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html
index 8bfa879c9a..f850f5b482 100644
--- a/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html
+++ b/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html
@@ -2750,7 +2750,7 @@
static int f(sunrealtype t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:42
@ max
Definition: AMReX_ParallelReduce.H:17
static constexpr int P
Definition: AMReX_OpenBC.H:14
-void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:596
+void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:592
int verbose
Definition: AMReX.cpp:101
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void amrex_deposit_cic(P const &p, int nc, amrex::Array4< amrex::Real > const &rho, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi)
Definition: AMReX_Particle_mod_K.H:14
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, const UnpackPolicy &&policy)
Definition: AMReX_ParticleCommunication.H:393
diff --git a/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html b/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html
index 9ef9b257f4..776812da6f 100644
--- a/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html
+++ b/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html
@@ -515,253 +515,249 @@
-
-
- 492 p_prime.
pos(1) = src_data.pos(1, i+this_offset);,
- 493 p_prime.
pos(2) = src_data.pos(2, i+this_offset););
-
-
- 496 auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
- 497 assigned_grid = amrex::get<0>(tup_prime);
- 498 assigned_lev = amrex::get<1>(tup_prime);
- 499 if (assigned_grid >= 0)
-
-
- 502 src_data.pos(1, i+this_offset) = p_prime.
pos(1);,
- 503 src_data.pos(2, i+this_offset) = p_prime.
pos(2););
-
- 505 else if (lev_min > 0)
-
-
- 508 p_prime.
pos(1) = src_data.pos(1, i+this_offset);,
- 509 p_prime.
pos(2) = src_data.pos(2, i+this_offset););
- 510 auto tup = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
- 511 assigned_grid = amrex::get<0>(tup);
- 512 assigned_lev = amrex::get<1>(tup);
-
+ 490 auto p_prime = src_data.getSuperParticle(i+this_offset);
+
+ 492 auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
+ 493 assigned_grid = amrex::get<0>(tup_prime);
+ 494 assigned_lev = amrex::get<1>(tup_prime);
+ 495 if (assigned_grid >= 0)
+
+ 497 AMREX_D_TERM(src_data.pos(0, i+this_offset) = p_prime.pos(0);,
+ 498 src_data.pos(1, i+this_offset) = p_prime.pos(1);,
+ 499 src_data.pos(2, i+this_offset) = p_prime.pos(2););
+
+ 501 else if (lev_min > 0)
+
+ 503 AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);,
+ 504 p_prime.pos(1) = src_data.pos(1, i+this_offset);,
+ 505 p_prime.pos(2) = src_data.pos(2, i+this_offset););
+ 506 auto tup = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
+ 507 assigned_grid = amrex::get<0>(tup);
+ 508 assigned_lev = amrex::get<1>(tup);
+
+
+
+ 512 if ((remove_negative ==
false) && (src_data.id(i+this_offset) < 0)) {
+
- 516 if ((remove_negative ==
false) && (src_data.id(i+this_offset) < 0)) {
-
-
-
- 520 return ((assigned_grid == gid) && (assigned_lev == lev) && (getPID(lev, gid) == pid));
-
-
- 523 num_stay = Scan::PrefixSum<int> (this_chunk_size,
-
+ 516 return ((assigned_grid == gid) && (assigned_lev == lev) && (getPID(lev, gid) == pid));
+
+
+ 519 num_stay = Scan::PrefixSum<int> (this_chunk_size,
+
+
+ 522 return particle_stays(i);
+
+
- 526 return particle_stays(i);
-
-
-
- 530 int src_i = i + this_offset;
- 531 int dst_i = particle_stays(i) ? s : this_chunk_size-1-(i-s);
-
-
-
-
-
-
+ 526 int src_i = i + this_offset;
+ 527 int dst_i = particle_stays(i) ? s : this_chunk_size-1-(i-s);
+
+
+
+
+
+
+
+ 535 ptile.swap(ptile_tmp);
+
+
- 539 ptile.swap(ptile_tmp);
-
-
-
-
-
-
-
-
-
-
-
- 551 int num_swap =
std::min(this_offset - last_offset, num_stay);
-
-
-
- 555 this_offset + num_stay - 1 - i);
-
-
-
- 559 last_offset += num_stay;
-
-
-
-
-
-
-
- 567 template <
class PC1,
class PC2>
-
- 569 if (pc1.numLevels() != pc2.numLevels()) {
return false;}
- 570 if (pc1.do_tiling != pc2.do_tiling) {
return false;}
- 571 if (pc1.tile_size != pc2.tile_size) {
return false;}
- 572 for (
int lev = 0; lev < pc1.numLevels(); ++lev) {
- 573 if (pc1.ParticleBoxArray(lev) != pc2.ParticleBoxArray(lev)) {
return false;}
- 574 if (pc1.ParticleDistributionMap(lev) != pc2.ParticleDistributionMap(lev)) {
return false;}
-
-
-
-
-
-
- 581 using Iter =
typename PC::ParIterType;
- 582 for (
int lev = 0; lev < pc.numLevels(); ++lev) {
- 583 for (Iter pti(pc, lev); pti.isValid(); ++pti) {
- 584 pc.DefineAndReturnParticleTile(lev, pti);
-
-
-
+
+
+
+
+
+
+
+
+ 547 int num_swap =
std::min(this_offset - last_offset, num_stay);
+
+
+
+ 551 this_offset + num_stay - 1 - i);
+
+
+
+ 555 last_offset += num_stay;
+
+
+
+
+
+
+
+ 563 template <
class PC1,
class PC2>
+
+ 565 if (pc1.numLevels() != pc2.numLevels()) {
return false;}
+ 566 if (pc1.do_tiling != pc2.do_tiling) {
return false;}
+ 567 if (pc1.tile_size != pc2.tile_size) {
return false;}
+ 568 for (
int lev = 0; lev < pc1.numLevels(); ++lev) {
+ 569 if (pc1.ParticleBoxArray(lev) != pc2.ParticleBoxArray(lev)) {
return false;}
+ 570 if (pc1.ParticleDistributionMap(lev) != pc2.ParticleDistributionMap(lev)) {
return false;}
+
+
+
+
+
+
+ 577 using Iter =
typename PC::ParIterType;
+ 578 for (
int lev = 0; lev < pc.numLevels(); ++lev) {
+ 579 for (Iter pti(pc, lev); pti.isValid(); ++pti) {
+ 580 pc.DefineAndReturnParticleTile(lev, pti);
+
+
+
+
+ 585 IntVect
computeRefFac (
const ParGDBBase* a_gdb,
int src_lev,
int lev);
+
+
- 589 IntVect
computeRefFac (
const ParGDBBase* a_gdb,
int src_lev,
int lev);
-
-
-
-
-
- 595 template <
typename C>
-
-
- 598 for (
auto c_it = c.begin(); c_it != c.end(); )
-
- 600 if (c_it->second.empty()) { c.erase(c_it++); }
-
-
-
-
-
- 606 template <
class index_type,
typename F>
-
- 608 index_type nbins, F&&
f)
-
-
+
+
+ 591 template <
typename C>
+
+
+ 594 for (
auto c_it = c.begin(); c_it != c.end(); )
+
+ 596 if (c_it->second.empty()) { c.erase(c_it++); }
+
+
+
+
+
+ 602 template <
class index_type,
typename F>
+
+ 604 index_type nbins, F&&
f)
+
+
+
+ 608 constexpr index_type gpu_block_size = 1024;
+ 609 constexpr index_type gpu_block_size_m1 = gpu_block_size - 1;
+
- 612 constexpr index_type gpu_block_size = 1024;
- 613 constexpr index_type gpu_block_size_m1 = gpu_block_size - 1;
-
-
-
- 617 nbins = (nbins + gpu_block_size_m1) / gpu_block_size * gpu_block_size;
-
-
-
-
-
-
- 624 index_type* pllist_start = llist_start.
dataPtr();
- 625 index_type* pllist_next = llist_next.
dataPtr();
- 626 index_type* pperm = perm.
dataPtr();
- 627 index_type* pglobal_idx = global_idx.
dataPtr();
-
-
-
-
-
-
-
- 635 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
- 636 amrex::launch<gpu_block_size>(nbins / gpu_block_size,
Gpu::gpuStream(),
-
- 638 __shared__ index_type sdata[gpu_block_size];
- 639 index_type current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
+
+ 613 nbins = (nbins + gpu_block_size_m1) / gpu_block_size * gpu_block_size;
+
+
+
+
+
+
+ 620 index_type* pllist_start = llist_start.
dataPtr();
+ 621 index_type* pllist_next = llist_next.
dataPtr();
+ 622 index_type* pperm = perm.
dataPtr();
+ 623 index_type* pglobal_idx = global_idx.
dataPtr();
+
+
+
+
+
+
+
+ 631 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
+ 632 amrex::launch<gpu_block_size>(nbins / gpu_block_size,
Gpu::gpuStream(),
+
+ 634 __shared__ index_type sdata[gpu_block_size];
+ 635 index_type current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
+
+
+ 638 sdata[threadIdx.x] = index_type(current_idx != llist_guard);
+
-
- 642 sdata[threadIdx.x] = index_type(current_idx != llist_guard);
-
-
-
- 646 for (index_type i = 1; i<gpu_block_size; i*=2) {
+
+ 642 for (index_type i = 1; i<gpu_block_size; i*=2) {
+
+ 644 if (threadIdx.x >= i) {
+ 645 x = sdata[threadIdx.x - i];
+
648 if (threadIdx.x >= i) {
- 649 x = sdata[threadIdx.x - i];
+ 649 sdata[threadIdx.x] += x;
-
- 652 if (threadIdx.x >= i) {
- 653 sdata[threadIdx.x] += x;
-
+
+
+ 653 if (sdata[gpu_block_size_m1] == 0) {
+
- 657 if (sdata[gpu_block_size_m1] == 0) {
-
-
-
- 661 if (threadIdx.x == gpu_block_size_m1) {
- 662 x = sdata[gpu_block_size_m1];
-
+ 657 if (threadIdx.x == gpu_block_size_m1) {
+ 658 x = sdata[gpu_block_size_m1];
+
+
+
+ 662 if (threadIdx.x < gpu_block_size_m1) {
+ 663 sdata[threadIdx.x] += sdata[gpu_block_size_m1];
- 666 if (threadIdx.x < gpu_block_size_m1) {
- 667 sdata[threadIdx.x] += sdata[gpu_block_size_m1];
+ 666 if (threadIdx.x == gpu_block_size_m1) {
+ 667 sdata[gpu_block_size_m1] += x;
- 670 if (threadIdx.x == gpu_block_size_m1) {
- 671 sdata[gpu_block_size_m1] += x;
-
-
-
- 675 if (current_idx != llist_guard) {
- 676 pperm[sdata[threadIdx.x] - 1] = current_idx;
- 677 current_idx = pllist_next[current_idx];
-
-
-
-
- 682 Abort(
"Not implemented");
-
-
-
-
-
- 688 template <
class index_type,
class PTile>
-
-
-
-
-
-
- 695 const IntVect type_vect = idx_type - idx_type / 2 * 2;
+
+ 671 if (current_idx != llist_guard) {
+ 672 pperm[sdata[threadIdx.x] - 1] = current_idx;
+ 673 current_idx = pllist_next[current_idx];
+
+
+
+
+ 678 Abort(
"Not implemented");
+
+
+
+
+
+ 684 template <
class index_type,
class PTile>
+
+
+
+
+
+
+ 691 const IntVect type_vect = idx_type - idx_type / 2 * 2;
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
- 708 const int ref_product =
AMREX_D_TERM(refine_vect[0], * refine_vect[1], * refine_vect[2]);
- 709 const IntVect ref_offset(
AMREX_D_DECL(1, refine_vect[0], refine_vect[0] * refine_vect[1]));
-
- 711 auto ptd = ptile.getConstParticleTileData();
- 712 using ParticleType =
typename PTile::ParticleType::ConstType;
- 713 PermutationForDeposition<index_type>(perm, nitems, bx.
numPts() * ref_product,
-
-
-
-
- 718 IntVect iv = ((p.pos() - pos_offset) * dxi).round();
-
- 720 IntVect iv_coarse = iv / refine_vect;
- 721 IntVect iv_remainder = iv - iv_coarse * refine_vect;
-
-
-
- 725 return bx.
index(iv_coarse) + bx.
numPts() * (iv_remainder * ref_offset).
sum();
-
-
-
-
- 730 #ifdef AMREX_USE_HDF5_ASYNC
- 731 void async_vol_es_wait_particle();
- 732 void async_vol_es_wait_close_particle();
-
-
-
-
+ 704 const int ref_product =
AMREX_D_TERM(refine_vect[0], * refine_vect[1], * refine_vect[2]);
+ 705 const IntVect ref_offset(
AMREX_D_DECL(1, refine_vect[0], refine_vect[0] * refine_vect[1]));
+
+ 707 auto ptd = ptile.getConstParticleTileData();
+ 708 using ParticleType =
typename PTile::ParticleType::ConstType;
+ 709 PermutationForDeposition<index_type>(perm, nitems, bx.
numPts() * ref_product,
+
+
+
+
+ 714 IntVect iv = ((p.pos() - pos_offset) * dxi).round();
+
+ 716 IntVect iv_coarse = iv / refine_vect;
+ 717 IntVect iv_remainder = iv - iv_coarse * refine_vect;
+
+
+
+ 721 return bx.
index(iv_coarse) + bx.
numPts() * (iv_remainder * ref_offset).
sum();
+
+
+
+
+ 726 #ifdef AMREX_USE_HDF5_ASYNC
+ 727 void async_vol_es_wait_particle();
+ 728 void async_vol_es_wait_close_particle();
+
+
+
+
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:558
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition: AMReX_BLassert.H:50
@@ -829,11 +825,11 @@
@ max
Definition: AMReX_ParallelReduce.H:17
@ sum
Definition: AMReX_ParallelReduce.H:19
static constexpr int P
Definition: AMReX_OpenBC.H:14
-void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:596
+void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:592
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void swapParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle swapping routine that can run on the GPU.
Definition: AMReX_ParticleTransformation.H:111
-void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:580
-void PermutationForDeposition(Gpu::DeviceVector< index_type > &perm, index_type nitems, index_type nbins, F &&f)
Definition: AMReX_ParticleUtil.H:607
+void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:576
+void PermutationForDeposition(Gpu::DeviceVector< index_type > &perm, index_type nitems, index_type nbins, F &&f)
Definition: AMReX_ParticleUtil.H:603
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi, const Box &domain) noexcept
Definition: AMReX_ParticleUtil.H:362
@@ -842,7 +838,7 @@
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:97
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic(P &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &phi, amrex::GpuArray< amrex::ParticleReal, AMREX_SPACEDIM > const &rlo, amrex::GpuArray< amrex::ParticleReal, AMREX_SPACEDIM > const &rhi, amrex::GpuArray< int, AMREX_SPACEDIM > const &is_per) noexcept
Definition: AMReX_ParticleUtil.H:403
Vector< int > computeNeighborProcs(const ParGDBBase *a_gdb, int ngrow)
Definition: AMReX_ParticleUtil.cpp:22
-bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:568
+bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:564
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition: AMReX_ParticleUtil.H:34
int partitionParticlesByDest(PTile &ptile, const PLocator &ploc, CellAssignor &&assignor, const ParticleBufferMap &pmap, const GpuArray< Real, AMREX_SPACEDIM > &plo, const GpuArray< Real, AMREX_SPACEDIM > &phi, const GpuArray< ParticleReal, AMREX_SPACEDIM > &rlo, const GpuArray< ParticleReal, AMREX_SPACEDIM > &rhi, const GpuArray< int, AMREX_SPACEDIM > &is_per, int lev, int gid, int, int lev_min, int lev_max, int nGrow, bool remove_negative)
Definition: AMReX_ParticleUtil.H:444
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:212
@@ -876,8 +872,6 @@
Definition: AMReX_GpuMemory.H:56
T * dataPtr()
Definition: AMReX_GpuMemory.H:90
-The struct used to store particles.
Definition: AMReX_Particle.H:240
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealVect pos() const &
Definition: AMReX_Particle.H:283
Definition: AMReX_MakeParticle.H:16
diff --git a/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml b/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml
index 9865d8abf4..7f431aa9eb 100644
--- a/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml
+++ b/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml
@@ -2688,253 +2688,249 @@
}
else
{
-[amrex::Particle<0>]p_prime;
-[AMREX_D_TERM](p_prime.[pos](0)=src_data.pos(0,i+this_offset);,
-p_prime.[pos](1)=src_data.pos(1,i+this_offset);,
-p_prime.[pos](2)=src_data.pos(2,i+this_offset););
-
-[enforcePeriodic](p_prime,plo,phi,rlo,rhi,is_per);
-autotup_prime=ploc(p_prime,lev_min,lev_max,nGrow,assignor);
-assigned_grid=amrex::get<0>(tup_prime);
-assigned_lev=amrex::get<1>(tup_prime);
-if(assigned_grid>=0)
-{
-[AMREX_D_TERM](src_data.pos(0,i+this_offset)=p_prime.[pos](0);,
-src_data.pos(1,i+this_offset)=p_prime.[pos](1);,
-src_data.pos(2,i+this_offset)=p_prime.[pos](2););
-}
-elseif(lev_min>0)
-{
-[AMREX_D_TERM](p_prime.[pos](0)=src_data.pos(0,i+this_offset);,
-p_prime.[pos](1)=src_data.pos(1,i+this_offset);,
-p_prime.[pos](2)=src_data.pos(2,i+this_offset););
-autotup=ploc(p_prime,lev_min,lev_max,nGrow,assignor);
-assigned_grid=amrex::get<0>(tup);
-assigned_lev=amrex::get<1>(tup);
-}
+autop_prime=src_data.getSuperParticle(i+this_offset);
+[enforcePeriodic](p_prime,plo,phi,rlo,rhi,is_per);
+autotup_prime=ploc(p_prime,lev_min,lev_max,nGrow,assignor);
+assigned_grid=amrex::get<0>(tup_prime);
+assigned_lev=amrex::get<1>(tup_prime);
+if(assigned_grid>=0)
+{
+[AMREX_D_TERM](src_data.pos(0,i+this_offset)=p_prime.pos(0);,
+src_data.pos(1,i+this_offset)=p_prime.pos(1);,
+src_data.pos(2,i+this_offset)=p_prime.pos(2););
+}
+elseif(lev_min>0)
+{
+[AMREX_D_TERM](p_prime.pos(0)=src_data.pos(0,i+this_offset);,
+p_prime.pos(1)=src_data.pos(1,i+this_offset);,
+p_prime.pos(2)=src_data.pos(2,i+this_offset););
+autotup=ploc(p_prime,lev_min,lev_max,nGrow,assignor);
+assigned_grid=amrex::get<0>(tup);
+assigned_lev=amrex::get<1>(tup);
+}
+}
+
+if((remove_negative==false)&&(src_data.id(i+this_offset)<0)){
+returntrue;
}
-if((remove_negative==false)&&(src_data.id(i+this_offset)<0)){
-returntrue;
-}
-
-return((assigned_grid==gid)&&(assigned_lev==lev)&&(getPID(lev,gid)==pid));
-};
-
-num_stay=Scan::PrefixSum<int>(this_chunk_size,
-[=][AMREX_GPU_DEVICE](inti)->int
+return((assigned_grid==gid)&&(assigned_lev==lev)&&(getPID(lev,gid)==pid));
+};
+
+num_stay=Scan::PrefixSum<int>(this_chunk_size,
+[=][AMREX_GPU_DEVICE](inti)->int
+{
+returnparticle_stays(i);
+},
+[=][AMREX_GPU_DEVICE](inti,intconst&s)
{
-returnparticle_stays(i);
-},
-[=][AMREX_GPU_DEVICE](inti,intconst&s)
-{
-intsrc_i=i+this_offset;
-intdst_i=particle_stays(i)?s:this_chunk_size-1-(i-s);
-[copyParticle](dst_data,src_data,src_i,dst_i);
-},
-[Scan::Type::exclusive]);
-}
-
-if(num_chunks==1)
+intsrc_i=i+this_offset;
+intdst_i=particle_stays(i)?s:this_chunk_size-1-(i-s);
+[copyParticle](dst_data,src_data,src_i,dst_i);
+},
+[Scan::Type::exclusive]);
+}
+
+if(num_chunks==1)
+{
+ptile.swap(ptile_tmp);
+}
+else
{
-ptile.swap(ptile_tmp);
-}
-else
-{
-[AMREX_FOR_1D](this_chunk_size,i,
-{
-[copyParticle](src_data,dst_data,i,i+this_offset);
-});
-}
-
-if(ichunk>0)
-{
-intnum_swap=[std::min](this_offset-last_offset,num_stay);
-[AMREX_FOR_1D](num_swap,i,
-{
-[swapParticle](src_data,src_data,last_offset+i,
-this_offset+num_stay-1-i);
-});
-}
-
-last_offset+=num_stay;
-}
-
-returnlast_offset;
-}
-
-#endif
-
-template<classPC1,classPC2>
-bool[SameIteratorsOK](constPC1&pc1,constPC2&pc2){
-if(pc1.numLevels()!=pc2.numLevels()){returnfalse;}
-if(pc1.do_tiling!=pc2.do_tiling){returnfalse;}
-if(pc1.tile_size!=pc2.tile_size){returnfalse;}
-for(intlev=0;lev<pc1.numLevels();++lev){
-if(pc1.ParticleBoxArray(lev)!=pc2.ParticleBoxArray(lev)){returnfalse;}
-if(pc1.ParticleDistributionMap(lev)!=pc2.ParticleDistributionMap(lev)){returnfalse;}
-}
-returntrue;
-}
-
-template<classPC>
-void[EnsureThreadSafeTiles](PC&pc){
-usingIter=typenamePC::ParIterType;
-for(intlev=0;lev<pc.numLevels();++lev){
-for(Iterpti(pc,lev);pti.isValid();++pti){
-pc.DefineAndReturnParticleTile(lev,pti);
-}
-}
-}
+[AMREX_FOR_1D](this_chunk_size,i,
+{
+[copyParticle](src_data,dst_data,i,i+this_offset);
+});
+}
+
+if(ichunk>0)
+{
+intnum_swap=[std::min](this_offset-last_offset,num_stay);
+[AMREX_FOR_1D](num_swap,i,
+{
+[swapParticle](src_data,src_data,last_offset+i,
+this_offset+num_stay-1-i);
+});
+}
+
+last_offset+=num_stay;
+}
+
+returnlast_offset;
+}
+
+#endif
+
+template<classPC1,classPC2>
+bool[SameIteratorsOK](constPC1&pc1,constPC2&pc2){
+if(pc1.numLevels()!=pc2.numLevels()){returnfalse;}
+if(pc1.do_tiling!=pc2.do_tiling){returnfalse;}
+if(pc1.tile_size!=pc2.tile_size){returnfalse;}
+for(intlev=0;lev<pc1.numLevels();++lev){
+if(pc1.ParticleBoxArray(lev)!=pc2.ParticleBoxArray(lev)){returnfalse;}
+if(pc1.ParticleDistributionMap(lev)!=pc2.ParticleDistributionMap(lev)){returnfalse;}
+}
+returntrue;
+}
+
+template<classPC>
+void[EnsureThreadSafeTiles](PC&pc){
+usingIter=typenamePC::ParIterType;
+for(intlev=0;lev<pc.numLevels();++lev){
+for(Iterpti(pc,lev);pti.isValid();++pti){
+pc.DefineAndReturnParticleTile(lev,pti);
+}
+}
+}
+
+IntVect[computeRefFac](constParGDBBase*a_gdb,intsrc_lev,intlev);
+
+Vector<int>[computeNeighborProcs](constParGDBBase*a_gdb,intngrow);
-IntVect[computeRefFac](constParGDBBase*a_gdb,intsrc_lev,intlev);
-
-Vector<int>[computeNeighborProcs](constParGDBBase*a_gdb,intngrow);
-
-namespace[particle_detail]
-{
-template<typenameC>
-void[clearEmptyEntries](C&c)
-{
-for(autoc_it=c.begin();c_it!=c.end();)
-{
-if(c_it->second.empty()){c.erase(c_it++);}
-else{++c_it;}
-}
-}
-}
-
-template<classindex_type,typenameF>
-void[PermutationForDeposition]([Gpu::DeviceVector<index_type>]&perm,index_typenitems,
-index_typenbins,F&&[f])
-{
-[BL_PROFILE]("PermutationForDeposition()");
+namespace[particle_detail]
+{
+template<typenameC>
+void[clearEmptyEntries](C&c)
+{
+for(autoc_it=c.begin();c_it!=c.end();)
+{
+if(c_it->second.empty()){c.erase(c_it++);}
+else{++c_it;}
+}
+}
+}
+
+template<classindex_type,typenameF>
+void[PermutationForDeposition]([Gpu::DeviceVector<index_type>]&perm,index_typenitems,
+index_typenbins,F&&[f])
+{
+[BL_PROFILE]("PermutationForDeposition()");
+
+constexprindex_typegpu_block_size=1024;
+constexprindex_typegpu_block_size_m1=gpu_block_size-1;
+constexprindex_typellist_guard=[std::numeric_limits<index_type>::max]();
-constexprindex_typegpu_block_size=1024;
-constexprindex_typegpu_block_size_m1=gpu_block_size-1;
-constexprindex_typellist_guard=[std::numeric_limits<index_type>::max]();
-
-
-nbins=(nbins+gpu_block_size_m1)/gpu_block_size*gpu_block_size;
-
-[Gpu::DeviceVector<index_type>]llist_start(nbins,llist_guard);
-[Gpu::DeviceVector<index_type>]llist_next(nitems);
-perm.[resize](nitems);
-[Gpu::DeviceScalar<index_type>]global_idx(0);
-
-index_type*pllist_start=llist_start.[dataPtr]();
-index_type*pllist_next=llist_next.[dataPtr]();
-index_type*pperm=perm.[dataPtr]();
-index_type*pglobal_idx=global_idx.[dataPtr]();
-
-[amrex::ParallelFor](nitems,[=][AMREX_GPU_DEVICE](index_typei)noexcept
-{
-i=nitems-i-1;
-pllist_next[i]=[Gpu::Atomic::Exch](pllist_start+[f](i),i);
-});
-
-#ifdefined(AMREX_USE_CUDA)||defined(AMREX_USE_HIP)
-amrex::launch<gpu_block_size>(nbins/gpu_block_size,[Gpu::gpuStream](),
-[=][AMREX_GPU_DEVICE](){
-__shared__index_typesdata[gpu_block_size];
-index_typecurrent_idx=pllist_start[threadIdx.x+gpu_block_size*blockIdx.x];
+
+nbins=(nbins+gpu_block_size_m1)/gpu_block_size*gpu_block_size;
+
+[Gpu::DeviceVector<index_type>]llist_start(nbins,llist_guard);
+[Gpu::DeviceVector<index_type>]llist_next(nitems);
+perm.[resize](nitems);
+[Gpu::DeviceScalar<index_type>]global_idx(0);
+
+index_type*pllist_start=llist_start.[dataPtr]();
+index_type*pllist_next=llist_next.[dataPtr]();
+index_type*pperm=perm.[dataPtr]();
+index_type*pglobal_idx=global_idx.[dataPtr]();
+
+[amrex::ParallelFor](nitems,[=][AMREX_GPU_DEVICE](index_typei)noexcept
+{
+i=nitems-i-1;
+pllist_next[i]=[Gpu::Atomic::Exch](pllist_start+[f](i),i);
+});
+
+#ifdefined(AMREX_USE_CUDA)||defined(AMREX_USE_HIP)
+amrex::launch<gpu_block_size>(nbins/gpu_block_size,[Gpu::gpuStream](),
+[=][AMREX_GPU_DEVICE](){
+__shared__index_typesdata[gpu_block_size];
+index_typecurrent_idx=pllist_start[threadIdx.x+gpu_block_size*blockIdx.x];
+
+while(true){
+sdata[threadIdx.x]=index_type(current_idx!=llist_guard);
+index_typex=0;
-while(true){
-sdata[threadIdx.x]=index_type(current_idx!=llist_guard);
-index_typex=0;
-
-
-for(index_typei=1;i<gpu_block_size;i*=2){
+
+for(index_typei=1;i<gpu_block_size;i*=2){
+__syncthreads();
+if(threadIdx.x>=i){
+x=sdata[threadIdx.x-i];
+}
__syncthreads();
if(threadIdx.x>=i){
-x=sdata[threadIdx.x-i];
+sdata[threadIdx.x]+=x;
}
-__syncthreads();
-if(threadIdx.x>=i){
-sdata[threadIdx.x]+=x;
-}
+}
+__syncthreads();
+if(sdata[gpu_block_size_m1]==0){
+break;
}
__syncthreads();
-if(sdata[gpu_block_size_m1]==0){
-break;
-}
-__syncthreads();
-if(threadIdx.x==gpu_block_size_m1){
-x=sdata[gpu_block_size_m1];
-sdata[gpu_block_size_m1]=[Gpu::Atomic::Add](pglobal_idx,x);
+if(threadIdx.x==gpu_block_size_m1){
+x=sdata[gpu_block_size_m1];
+sdata[gpu_block_size_m1]=[Gpu::Atomic::Add](pglobal_idx,x);
+}
+__syncthreads();
+if(threadIdx.x<gpu_block_size_m1){
+sdata[threadIdx.x]+=sdata[gpu_block_size_m1];
}
__syncthreads();
-if(threadIdx.x<gpu_block_size_m1){
-sdata[threadIdx.x]+=sdata[gpu_block_size_m1];
+if(threadIdx.x==gpu_block_size_m1){
+sdata[gpu_block_size_m1]+=x;
}
__syncthreads();
-if(threadIdx.x==gpu_block_size_m1){
-sdata[gpu_block_size_m1]+=x;
-}
-__syncthreads();
-
-if(current_idx!=llist_guard){
-pperm[sdata[threadIdx.x]-1]=current_idx;
-current_idx=pllist_next[current_idx];
-}
-}
-});
-#else
-[Abort]("Notimplemented");
-#endif
-
-[Gpu::Device::streamSynchronize]();
-}
-
-template<classindex_type,classPTile>
-void[PermutationForDeposition]([Gpu::DeviceVector<index_type>]&perm,index_typenitems,
-constPTile&ptile,[Box]bx,[Geometry]geom,const[IntVect]idx_type)
-{
-[AMREX_ALWAYS_ASSERT](idx_type.[allGE]([IntVect](0))&&idx_type.[allLE]([IntVect](2)));
-
-const[IntVect]refine_vect=[max](idx_type,[IntVect](1)).min([IntVect](2));
-const[IntVect]type_vect=idx_type-idx_type/2*2;
+
+if(current_idx!=llist_guard){
+pperm[sdata[threadIdx.x]-1]=current_idx;
+current_idx=pllist_next[current_idx];
+}
+}
+});
+#else
+[Abort]("Notimplemented");
+#endif
+
+[Gpu::Device::streamSynchronize]();
+}
+
+template<classindex_type,classPTile>
+void[PermutationForDeposition]([Gpu::DeviceVector<index_type>]&perm,index_typenitems,
+constPTile&ptile,[Box]bx,[Geometry]geom,const[IntVect]idx_type)
+{
+[AMREX_ALWAYS_ASSERT](idx_type.[allGE]([IntVect](0))&&idx_type.[allLE]([IntVect](2)));
+
+const[IntVect]refine_vect=[max](idx_type,[IntVect](1)).min([IntVect](2));
+const[IntVect]type_vect=idx_type-idx_type/2*2;
+
+geom.[refine](refine_vect);
+
+[Box]domain=geom.[Domain]();
-geom.[refine](refine_vect);
-
-[Box]domain=geom.[Domain]();
-
-bx.[convert](type_vect);
-domain.[convert](type_vect);
+bx.[convert](type_vect);
+domain.[convert](type_vect);
+
+const[RealVect]dxi(geom.[InvCellSize]());
+const[RealVect]pos_offset=Real(0.5)*([RealVect](geom.[ProbLo]())+[RealVect](geom.[ProbHi]())
+-[RealVect](geom.[CellSize]())*[RealVect](domain.[smallEnd]()+domain.[bigEnd]()));
-const[RealVect]dxi(geom.[InvCellSize]());
-const[RealVect]pos_offset=Real(0.5)*([RealVect](geom.[ProbLo]())+[RealVect](geom.[ProbHi]())
--[RealVect](geom.[CellSize]())*[RealVect](domain.[smallEnd]()+domain.[bigEnd]()));
-
-constintref_product=[AMREX_D_TERM](refine_vect[0],*refine_vect[1],*refine_vect[2]);
-const[IntVect]ref_offset([AMREX_D_DECL](1,refine_vect[0],refine_vect[0]*refine_vect[1]));
-
-autoptd=ptile.getConstParticleTileData();
-usingParticleType=typenamePTile::ParticleType::ConstType;
-PermutationForDeposition<index_type>(perm,nitems,bx.[numPts]()*ref_product,
-[=][AMREX_GPU_DEVICE](index_typeidx)noexcept
-{
-constauto&p=[make_particle<ParticleType>]{}(ptd,idx);
-
-[IntVect]iv=((p.pos()-pos_offset)*dxi).round();
-
-[IntVect]iv_coarse=iv/refine_vect;
-[IntVect]iv_remainder=iv-iv_coarse*refine_vect;
-
-iv_coarse=iv_coarse.[max](bx.[smallEnd]());
-iv_coarse=iv_coarse.[min](bx.[bigEnd]());
-returnbx.[index](iv_coarse)+bx.[numPts]()*(iv_remainder*ref_offset).[sum]();
-});
-}
-
-
-#ifdefAMREX_USE_HDF5_ASYNC
-voidasync_vol_es_wait_particle();
-voidasync_vol_es_wait_close_particle();
-#endif
-}
-
-#endif
+constintref_product=[AMREX_D_TERM](refine_vect[0],*refine_vect[1],*refine_vect[2]);
+const[IntVect]ref_offset([AMREX_D_DECL](1,refine_vect[0],refine_vect[0]*refine_vect[1]));
+
+autoptd=ptile.getConstParticleTileData();
+usingParticleType=typenamePTile::ParticleType::ConstType;
+PermutationForDeposition<index_type>(perm,nitems,bx.[numPts]()*ref_product,
+[=][AMREX_GPU_DEVICE](index_typeidx)noexcept
+{
+constauto&p=[make_particle<ParticleType>]{}(ptd,idx);
+
+[IntVect]iv=((p.pos()-pos_offset)*dxi).round();
+
+[IntVect]iv_coarse=iv/refine_vect;
+[IntVect]iv_remainder=iv-iv_coarse*refine_vect;
+
+iv_coarse=iv_coarse.[max](bx.[smallEnd]());
+iv_coarse=iv_coarse.[min](bx.[bigEnd]());
+returnbx.[index](iv_coarse)+bx.[numPts]()*(iv_remainder*ref_offset).[sum]();
+});
+}
+
+
+#ifdefAMREX_USE_HDF5_ASYNC
+voidasync_vol_es_wait_particle();
+voidasync_vol_es_wait_close_particle();
+#endif
+}
+
+#endif
diff --git a/amrex/docs_xml/doxygen/namespaceamrex.xml b/amrex/docs_xml/doxygen/namespaceamrex.xml
index 71e7bc6992..43568ad1d8 100644
--- a/amrex/docs_xml/doxygen/namespaceamrex.xml
+++ b/amrex/docs_xml/doxygen/namespaceamrex.xml
@@ -69326,7 +69326,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
[Vector]< [int] >
@@ -69347,7 +69347,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
@@ -70018,7 +70018,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
@@ -70047,7 +70047,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
@@ -70069,7 +70069,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
@@ -70106,7 +70106,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
@@ -70151,7 +70151,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu
-
+
diff --git a/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml b/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml
index 6cb594fccb..c8adec0068 100644
--- a/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml
+++ b/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml
@@ -129,7 +129,7 @@
-
+