diff --git a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf index 0cf2629b92..30b7bdf8eb 100644 Binary files a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf and b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf differ diff --git a/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html b/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html index ac106782ee..64b899193d 100644 --- a/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html +++ b/amrex/docs_html/doxygen/AMReX__NeighborParticlesI_8H_source.html @@ -1320,7 +1320,7 @@
amrex::ParallelContext::MyProcSub
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
amrex::ParallelContext::global_to_local_rank
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
amrex::end
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(Box const &box) noexcept
Definition: AMReX_Box.H:1634
-
amrex::EnsureThreadSafeTiles
void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:580
+
amrex::EnsureThreadSafeTiles
void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:576
amrex::max
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
amrex::begin
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(Box const &box) noexcept
Definition: AMReX_Box.H:1620
amrex::min
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
@@ -1328,7 +1328,7 @@
amrex::getParticleCell
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi, const Box &domain) noexcept
Definition: AMReX_ParticleUtil.H:362
amrex::lbound
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:272
amrex::ignore_unused
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:107
-
amrex::SameIteratorsOK
bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:568
+
amrex::SameIteratorsOK
bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:564
amrex::numParticlesOutOfRange
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition: AMReX_ParticleUtil.H:34
amrex::shift
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box shift(const Box &b, int dir, int nzones) noexcept
Return a Box with indices shifted by nzones in dir direction.
Definition: AMReX_Box.H:1294
amrex::Abort
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:212
diff --git a/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html b/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html index 8bfa879c9a..f850f5b482 100644 --- a/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html +++ b/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html @@ -2750,7 +2750,7 @@
amrex::SundialsUserFun::f
static int f(sunrealtype t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:42
amrex::detail::max
@ max
Definition: AMReX_ParallelReduce.H:17
amrex::openbc::P
static constexpr int P
Definition: AMReX_OpenBC.H:14
-
amrex::particle_detail::clearEmptyEntries
void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:596
+
amrex::particle_detail::clearEmptyEntries
void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:592
amrex::system::verbose
int verbose
Definition: AMReX.cpp:101
amrex::amrex_deposit_cic
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void amrex_deposit_cic(P const &p, int nc, amrex::Array4< amrex::Real > const &rho, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi)
Definition: AMReX_Particle_mod_K.H:14
amrex::unpackBuffer
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, const UnpackPolicy &&policy)
Definition: AMReX_ParticleCommunication.H:393
diff --git a/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html b/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html index 9ef9b257f4..776812da6f 100644 --- a/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html +++ b/amrex/docs_html/doxygen/AMReX__ParticleUtil_8H_source.html @@ -515,253 +515,249 @@
487  }
488  else
489  {
-
490  amrex::Particle<0> p_prime;
-
491  AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);,
-
492  p_prime.pos(1) = src_data.pos(1, i+this_offset);,
-
493  p_prime.pos(2) = src_data.pos(2, i+this_offset););
-
494 
-
495  enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per);
-
496  auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
-
497  assigned_grid = amrex::get<0>(tup_prime);
-
498  assigned_lev = amrex::get<1>(tup_prime);
-
499  if (assigned_grid >= 0)
-
500  {
-
501  AMREX_D_TERM(src_data.pos(0, i+this_offset) = p_prime.pos(0);,
-
502  src_data.pos(1, i+this_offset) = p_prime.pos(1);,
-
503  src_data.pos(2, i+this_offset) = p_prime.pos(2););
-
504  }
-
505  else if (lev_min > 0)
-
506  {
-
507  AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);,
-
508  p_prime.pos(1) = src_data.pos(1, i+this_offset);,
-
509  p_prime.pos(2) = src_data.pos(2, i+this_offset););
-
510  auto tup = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
-
511  assigned_grid = amrex::get<0>(tup);
-
512  assigned_lev = amrex::get<1>(tup);
-
513  }
+
490  auto p_prime = src_data.getSuperParticle(i+this_offset);
+
491  enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per);
+
492  auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
+
493  assigned_grid = amrex::get<0>(tup_prime);
+
494  assigned_lev = amrex::get<1>(tup_prime);
+
495  if (assigned_grid >= 0)
+
496  {
+
497  AMREX_D_TERM(src_data.pos(0, i+this_offset) = p_prime.pos(0);,
+
498  src_data.pos(1, i+this_offset) = p_prime.pos(1);,
+
499  src_data.pos(2, i+this_offset) = p_prime.pos(2););
+
500  }
+
501  else if (lev_min > 0)
+
502  {
+
503  AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);,
+
504  p_prime.pos(1) = src_data.pos(1, i+this_offset);,
+
505  p_prime.pos(2) = src_data.pos(2, i+this_offset););
+
506  auto tup = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
+
507  assigned_grid = amrex::get<0>(tup);
+
508  assigned_lev = amrex::get<1>(tup);
+
509  }
+
510  }
+
511 
+
512  if ((remove_negative == false) && (src_data.id(i+this_offset) < 0)) {
+
513  return true;
514  }
515 
-
516  if ((remove_negative == false) && (src_data.id(i+this_offset) < 0)) {
-
517  return true;
-
518  }
-
519 
-
520  return ((assigned_grid == gid) && (assigned_lev == lev) && (getPID(lev, gid) == pid));
-
521  };
-
522 
-
523  num_stay = Scan::PrefixSum<int> (this_chunk_size,
-
524  [=] AMREX_GPU_DEVICE (int i) -> int
+
516  return ((assigned_grid == gid) && (assigned_lev == lev) && (getPID(lev, gid) == pid));
+
517  };
+
518 
+
519  num_stay = Scan::PrefixSum<int> (this_chunk_size,
+
520  [=] AMREX_GPU_DEVICE (int i) -> int
+
521  {
+
522  return particle_stays(i);
+
523  },
+
524  [=] AMREX_GPU_DEVICE (int i, int const& s)
525  {
-
526  return particle_stays(i);
-
527  },
-
528  [=] AMREX_GPU_DEVICE (int i, int const& s)
-
529  {
-
530  int src_i = i + this_offset;
-
531  int dst_i = particle_stays(i) ? s : this_chunk_size-1-(i-s);
-
532  copyParticle(dst_data, src_data, src_i, dst_i);
-
533  },
-
534  Scan::Type::exclusive);
-
535  }
-
536 
-
537  if (num_chunks == 1)
+
526  int src_i = i + this_offset;
+
527  int dst_i = particle_stays(i) ? s : this_chunk_size-1-(i-s);
+
528  copyParticle(dst_data, src_data, src_i, dst_i);
+
529  },
+
530  Scan::Type::exclusive);
+
531  }
+
532 
+
533  if (num_chunks == 1)
+
534  {
+
535  ptile.swap(ptile_tmp);
+
536  }
+
537  else
538  {
-
539  ptile.swap(ptile_tmp);
-
540  }
-
541  else
-
542  {
-
543  AMREX_FOR_1D(this_chunk_size, i,
-
544  {
-
545  copyParticle(src_data, dst_data, i, i + this_offset);
-
546  });
-
547  }
-
548 
-
549  if ( ichunk > 0 )
-
550  {
-
551  int num_swap = std::min(this_offset - last_offset, num_stay);
-
552  AMREX_FOR_1D( num_swap, i,
-
553  {
-
554  swapParticle(src_data, src_data, last_offset + i,
-
555  this_offset + num_stay - 1 - i);
-
556  });
-
557  }
-
558 
-
559  last_offset += num_stay;
-
560  }
-
561 
-
562  return last_offset;
-
563 }
-
564 
-
565 #endif
-
566 
-
567 template <class PC1, class PC2>
-
568 bool SameIteratorsOK (const PC1& pc1, const PC2& pc2) {
-
569  if (pc1.numLevels() != pc2.numLevels()) {return false;}
-
570  if (pc1.do_tiling != pc2.do_tiling) {return false;}
-
571  if (pc1.tile_size != pc2.tile_size) {return false;}
-
572  for (int lev = 0; lev < pc1.numLevels(); ++lev) {
-
573  if (pc1.ParticleBoxArray(lev) != pc2.ParticleBoxArray(lev)) {return false;}
-
574  if (pc1.ParticleDistributionMap(lev) != pc2.ParticleDistributionMap(lev)) {return false;}
-
575  }
-
576  return true;
-
577 }
-
578 
-
579 template <class PC>
-
580 void EnsureThreadSafeTiles(PC& pc) {
-
581  using Iter = typename PC::ParIterType;
-
582  for (int lev = 0; lev < pc.numLevels(); ++lev) {
-
583  for (Iter pti(pc, lev); pti.isValid(); ++pti) {
-
584  pc.DefineAndReturnParticleTile(lev, pti);
-
585  }
-
586  }
-
587 }
+
539  AMREX_FOR_1D(this_chunk_size, i,
+
540  {
+
541  copyParticle(src_data, dst_data, i, i + this_offset);
+
542  });
+
543  }
+
544 
+
545  if ( ichunk > 0 )
+
546  {
+
547  int num_swap = std::min(this_offset - last_offset, num_stay);
+
548  AMREX_FOR_1D( num_swap, i,
+
549  {
+
550  swapParticle(src_data, src_data, last_offset + i,
+
551  this_offset + num_stay - 1 - i);
+
552  });
+
553  }
+
554 
+
555  last_offset += num_stay;
+
556  }
+
557 
+
558  return last_offset;
+
559 }
+
560 
+
561 #endif
+
562 
+
563 template <class PC1, class PC2>
+
564 bool SameIteratorsOK (const PC1& pc1, const PC2& pc2) {
+
565  if (pc1.numLevels() != pc2.numLevels()) {return false;}
+
566  if (pc1.do_tiling != pc2.do_tiling) {return false;}
+
567  if (pc1.tile_size != pc2.tile_size) {return false;}
+
568  for (int lev = 0; lev < pc1.numLevels(); ++lev) {
+
569  if (pc1.ParticleBoxArray(lev) != pc2.ParticleBoxArray(lev)) {return false;}
+
570  if (pc1.ParticleDistributionMap(lev) != pc2.ParticleDistributionMap(lev)) {return false;}
+
571  }
+
572  return true;
+
573 }
+
574 
+
575 template <class PC>
+
576 void EnsureThreadSafeTiles(PC& pc) {
+
577  using Iter = typename PC::ParIterType;
+
578  for (int lev = 0; lev < pc.numLevels(); ++lev) {
+
579  for (Iter pti(pc, lev); pti.isValid(); ++pti) {
+
580  pc.DefineAndReturnParticleTile(lev, pti);
+
581  }
+
582  }
+
583 }
+
584 
+
585 IntVect computeRefFac (const ParGDBBase* a_gdb, int src_lev, int lev);
+
586 
+
587 Vector<int> computeNeighborProcs (const ParGDBBase* a_gdb, int ngrow);
588 
-
589 IntVect computeRefFac (const ParGDBBase* a_gdb, int src_lev, int lev);
-
590 
-
591 Vector<int> computeNeighborProcs (const ParGDBBase* a_gdb, int ngrow);
-
592 
-
593 namespace particle_detail
-
594 {
-
595 template <typename C>
-
596 void clearEmptyEntries (C& c)
-
597 {
-
598  for (auto c_it = c.begin(); c_it != c.end(); /* no ++ */)
-
599  {
-
600  if (c_it->second.empty()) { c.erase(c_it++); }
-
601  else { ++c_it; }
-
602  }
-
603 }
-
604 }
-
605 
-
606 template <class index_type, typename F>
-
607 void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type nitems,
-
608  index_type nbins, F&& f)
-
609 {
-
610  BL_PROFILE("PermutationForDeposition()");
+
589 namespace particle_detail
+
590 {
+
591 template <typename C>
+
592 void clearEmptyEntries (C& c)
+
593 {
+
594  for (auto c_it = c.begin(); c_it != c.end(); /* no ++ */)
+
595  {
+
596  if (c_it->second.empty()) { c.erase(c_it++); }
+
597  else { ++c_it; }
+
598  }
+
599 }
+
600 }
+
601 
+
602 template <class index_type, typename F>
+
603 void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type nitems,
+
604  index_type nbins, F&& f)
+
605 {
+
606  BL_PROFILE("PermutationForDeposition()");
+
607 
+
608  constexpr index_type gpu_block_size = 1024;
+
609  constexpr index_type gpu_block_size_m1 = gpu_block_size - 1;
+
610  constexpr index_type llist_guard = std::numeric_limits<index_type>::max();
611 
-
612  constexpr index_type gpu_block_size = 1024;
-
613  constexpr index_type gpu_block_size_m1 = gpu_block_size - 1;
-
614  constexpr index_type llist_guard = std::numeric_limits<index_type>::max();
-
615 
-
616  // round up to gpu_block_size
-
617  nbins = (nbins + gpu_block_size_m1) / gpu_block_size * gpu_block_size;
-
618 
-
619  Gpu::DeviceVector<index_type> llist_start(nbins, llist_guard);
-
620  Gpu::DeviceVector<index_type> llist_next(nitems);
-
621  perm.resize(nitems);
-
622  Gpu::DeviceScalar<index_type> global_idx(0);
-
623 
-
624  index_type* pllist_start = llist_start.dataPtr();
-
625  index_type* pllist_next = llist_next.dataPtr();
-
626  index_type* pperm = perm.dataPtr();
-
627  index_type* pglobal_idx = global_idx.dataPtr();
-
628 
-
629  amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (index_type i) noexcept
-
630  {
-
631  i = nitems - i - 1;
-
632  pllist_next[i] = Gpu::Atomic::Exch(pllist_start + f(i), i);
-
633  });
-
634 
-
635 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
-
636  amrex::launch<gpu_block_size>(nbins / gpu_block_size, Gpu::gpuStream(),
-
637  [=] AMREX_GPU_DEVICE () {
-
638  __shared__ index_type sdata[gpu_block_size];
-
639  index_type current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
+
612  // round up to gpu_block_size
+
613  nbins = (nbins + gpu_block_size_m1) / gpu_block_size * gpu_block_size;
+
614 
+
615  Gpu::DeviceVector<index_type> llist_start(nbins, llist_guard);
+
616  Gpu::DeviceVector<index_type> llist_next(nitems);
+
617  perm.resize(nitems);
+
618  Gpu::DeviceScalar<index_type> global_idx(0);
+
619 
+
620  index_type* pllist_start = llist_start.dataPtr();
+
621  index_type* pllist_next = llist_next.dataPtr();
+
622  index_type* pperm = perm.dataPtr();
+
623  index_type* pglobal_idx = global_idx.dataPtr();
+
624 
+
625  amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (index_type i) noexcept
+
626  {
+
627  i = nitems - i - 1;
+
628  pllist_next[i] = Gpu::Atomic::Exch(pllist_start + f(i), i);
+
629  });
+
630 
+
631 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
+
632  amrex::launch<gpu_block_size>(nbins / gpu_block_size, Gpu::gpuStream(),
+
633  [=] AMREX_GPU_DEVICE () {
+
634  __shared__ index_type sdata[gpu_block_size];
+
635  index_type current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
+
636 
+
637  while (true) {
+
638  sdata[threadIdx.x] = index_type(current_idx != llist_guard);
+
639  index_type x = 0;
640 
-
641  while (true) {
-
642  sdata[threadIdx.x] = index_type(current_idx != llist_guard);
-
643  index_type x = 0;
-
644 
-
645  // simple block wide prefix sum
-
646  for (index_type i = 1; i<gpu_block_size; i*=2) {
+
641  // simple block wide prefix sum
+
642  for (index_type i = 1; i<gpu_block_size; i*=2) {
+
643  __syncthreads();
+
644  if (threadIdx.x >= i) {
+
645  x = sdata[threadIdx.x - i];
+
646  }
647  __syncthreads();
648  if (threadIdx.x >= i) {
-
649  x = sdata[threadIdx.x - i];
+
649  sdata[threadIdx.x] += x;
650  }
-
651  __syncthreads();
-
652  if (threadIdx.x >= i) {
-
653  sdata[threadIdx.x] += x;
-
654  }
+
651  }
+
652  __syncthreads();
+
653  if (sdata[gpu_block_size_m1] == 0) {
+
654  break;
655  }
656  __syncthreads();
-
657  if (sdata[gpu_block_size_m1] == 0) {
-
658  break;
-
659  }
-
660  __syncthreads();
-
661  if (threadIdx.x == gpu_block_size_m1) {
-
662  x = sdata[gpu_block_size_m1];
-
663  sdata[gpu_block_size_m1] = Gpu::Atomic::Add(pglobal_idx, x);
+
657  if (threadIdx.x == gpu_block_size_m1) {
+
658  x = sdata[gpu_block_size_m1];
+
659  sdata[gpu_block_size_m1] = Gpu::Atomic::Add(pglobal_idx, x);
+
660  }
+
661  __syncthreads();
+
662  if (threadIdx.x < gpu_block_size_m1) {
+
663  sdata[threadIdx.x] += sdata[gpu_block_size_m1];
664  }
665  __syncthreads();
-
666  if (threadIdx.x < gpu_block_size_m1) {
-
667  sdata[threadIdx.x] += sdata[gpu_block_size_m1];
+
666  if (threadIdx.x == gpu_block_size_m1) {
+
667  sdata[gpu_block_size_m1] += x;
668  }
669  __syncthreads();
-
670  if (threadIdx.x == gpu_block_size_m1) {
-
671  sdata[gpu_block_size_m1] += x;
-
672  }
-
673  __syncthreads();
-
674 
-
675  if (current_idx != llist_guard) {
-
676  pperm[sdata[threadIdx.x] - 1] = current_idx;
-
677  current_idx = pllist_next[current_idx];
-
678  }
-
679  }
-
680  });
-
681 #else
-
682  Abort("Not implemented");
-
683 #endif
-
684 
-
685  Gpu::Device::streamSynchronize();
-
686 }
-
687 
-
688 template <class index_type, class PTile>
-
689 void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type nitems,
-
690  const PTile& ptile, Box bx, Geometry geom, const IntVect idx_type)
-
691 {
-
692  AMREX_ALWAYS_ASSERT(idx_type.allGE(IntVect(0)) && idx_type.allLE(IntVect(2)));
-
693 
-
694  const IntVect refine_vect = max(idx_type, IntVect(1)).min(IntVect(2));
-
695  const IntVect type_vect = idx_type - idx_type / 2 * 2;
+
670 
+
671  if (current_idx != llist_guard) {
+
672  pperm[sdata[threadIdx.x] - 1] = current_idx;
+
673  current_idx = pllist_next[current_idx];
+
674  }
+
675  }
+
676  });
+
677 #else
+
678  Abort("Not implemented");
+
679 #endif
+
680 
+
681  Gpu::Device::streamSynchronize();
+
682 }
+
683 
+
684 template <class index_type, class PTile>
+
685 void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type nitems,
+
686  const PTile& ptile, Box bx, Geometry geom, const IntVect idx_type)
+
687 {
+
688  AMREX_ALWAYS_ASSERT(idx_type.allGE(IntVect(0)) && idx_type.allLE(IntVect(2)));
+
689 
+
690  const IntVect refine_vect = max(idx_type, IntVect(1)).min(IntVect(2));
+
691  const IntVect type_vect = idx_type - idx_type / 2 * 2;
+
692 
+
693  geom.refine(refine_vect);
+
694 
+
695  Box domain = geom.Domain();
696 
-
697  geom.refine(refine_vect);
-
698 
-
699  Box domain = geom.Domain();
-
700 
-
701  bx.convert(type_vect);
-
702  domain.convert(type_vect);
+
697  bx.convert(type_vect);
+
698  domain.convert(type_vect);
+
699 
+
700  const RealVect dxi(geom.InvCellSize());
+
701  const RealVect pos_offset = Real(0.5) * (RealVect(geom.ProbLo()) + RealVect(geom.ProbHi())
+
702  - RealVect(geom.CellSize()) * RealVect(domain.smallEnd() + domain.bigEnd()));
703 
-
704  const RealVect dxi(geom.InvCellSize());
-
705  const RealVect pos_offset = Real(0.5) * (RealVect(geom.ProbLo()) + RealVect(geom.ProbHi())
-
706  - RealVect(geom.CellSize()) * RealVect(domain.smallEnd() + domain.bigEnd()));
-
707 
-
708  const int ref_product = AMREX_D_TERM(refine_vect[0], * refine_vect[1], * refine_vect[2]);
-
709  const IntVect ref_offset(AMREX_D_DECL(1, refine_vect[0], refine_vect[0] * refine_vect[1]));
-
710 
-
711  auto ptd = ptile.getConstParticleTileData();
-
712  using ParticleType = typename PTile::ParticleType::ConstType;
-
713  PermutationForDeposition<index_type>(perm, nitems, bx.numPts() * ref_product,
-
714  [=] AMREX_GPU_DEVICE (index_type idx) noexcept
-
715  {
-
716  const auto& p = make_particle<ParticleType>{}(ptd,idx);
-
717 
-
718  IntVect iv = ((p.pos() - pos_offset) * dxi).round();
-
719 
-
720  IntVect iv_coarse = iv / refine_vect;
-
721  IntVect iv_remainder = iv - iv_coarse * refine_vect;
-
722 
-
723  iv_coarse = iv_coarse.max(bx.smallEnd());
-
724  iv_coarse = iv_coarse.min(bx.bigEnd());
-
725  return bx.index(iv_coarse) + bx.numPts() * (iv_remainder * ref_offset).sum();
-
726  });
-
727 }
-
728 
-
729 
-
730 #ifdef AMREX_USE_HDF5_ASYNC
-
731 void async_vol_es_wait_particle();
-
732 void async_vol_es_wait_close_particle();
-
733 #endif
-
734 }
-
735 
-
736 #endif // include guard
+
704  const int ref_product = AMREX_D_TERM(refine_vect[0], * refine_vect[1], * refine_vect[2]);
+
705  const IntVect ref_offset(AMREX_D_DECL(1, refine_vect[0], refine_vect[0] * refine_vect[1]));
+
706 
+
707  auto ptd = ptile.getConstParticleTileData();
+
708  using ParticleType = typename PTile::ParticleType::ConstType;
+
709  PermutationForDeposition<index_type>(perm, nitems, bx.numPts() * ref_product,
+
710  [=] AMREX_GPU_DEVICE (index_type idx) noexcept
+
711  {
+
712  const auto& p = make_particle<ParticleType>{}(ptd,idx);
+
713 
+
714  IntVect iv = ((p.pos() - pos_offset) * dxi).round();
+
715 
+
716  IntVect iv_coarse = iv / refine_vect;
+
717  IntVect iv_remainder = iv - iv_coarse * refine_vect;
+
718 
+
719  iv_coarse = iv_coarse.max(bx.smallEnd());
+
720  iv_coarse = iv_coarse.min(bx.bigEnd());
+
721  return bx.index(iv_coarse) + bx.numPts() * (iv_remainder * ref_offset).sum();
+
722  });
+
723 }
+
724 
+
725 
+
726 #ifdef AMREX_USE_HDF5_ASYNC
+
727 void async_vol_es_wait_particle();
+
728 void async_vol_es_wait_close_particle();
+
729 #endif
+
730 }
+
731 
+
732 #endif // include guard
BL_PROFILE
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:558
AMREX_ASSERT
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
AMREX_ALWAYS_ASSERT
#define AMREX_ALWAYS_ASSERT(EX)
Definition: AMReX_BLassert.H:50
@@ -829,11 +825,11 @@
amrex::detail::max
@ max
Definition: AMReX_ParallelReduce.H:17
amrex::detail::sum
@ sum
Definition: AMReX_ParallelReduce.H:19
amrex::openbc::P
static constexpr int P
Definition: AMReX_OpenBC.H:14
-
amrex::particle_detail::clearEmptyEntries
void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:596
+
amrex::particle_detail::clearEmptyEntries
void clearEmptyEntries(C &c)
Definition: AMReX_ParticleUtil.H:592
amrex
Definition: AMReX_Amr.cpp:49
amrex::swapParticle
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void swapParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle swapping routine that can run on the GPU.
Definition: AMReX_ParticleTransformation.H:111
-
amrex::EnsureThreadSafeTiles
void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:580
-
amrex::PermutationForDeposition
void PermutationForDeposition(Gpu::DeviceVector< index_type > &perm, index_type nitems, index_type nbins, F &&f)
Definition: AMReX_ParticleUtil.H:607
+
amrex::EnsureThreadSafeTiles
void EnsureThreadSafeTiles(PC &pc)
Definition: AMReX_ParticleUtil.H:576
+
amrex::PermutationForDeposition
void PermutationForDeposition(Gpu::DeviceVector< index_type > &perm, index_type nitems, index_type nbins, F &&f)
Definition: AMReX_ParticleUtil.H:603
amrex::max
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
amrex::min
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
amrex::getParticleCell
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi, const Box &domain) noexcept
Definition: AMReX_ParticleUtil.H:362
@@ -842,7 +838,7 @@
amrex::ParallelFor
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:97
amrex::enforcePeriodic
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic(P &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &phi, amrex::GpuArray< amrex::ParticleReal, AMREX_SPACEDIM > const &rlo, amrex::GpuArray< amrex::ParticleReal, AMREX_SPACEDIM > const &rhi, amrex::GpuArray< int, AMREX_SPACEDIM > const &is_per) noexcept
Definition: AMReX_ParticleUtil.H:403
amrex::computeNeighborProcs
Vector< int > computeNeighborProcs(const ParGDBBase *a_gdb, int ngrow)
Definition: AMReX_ParticleUtil.cpp:22
-
amrex::SameIteratorsOK
bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:568
+
amrex::SameIteratorsOK
bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition: AMReX_ParticleUtil.H:564
amrex::numParticlesOutOfRange
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition: AMReX_ParticleUtil.H:34
amrex::partitionParticlesByDest
int partitionParticlesByDest(PTile &ptile, const PLocator &ploc, CellAssignor &&assignor, const ParticleBufferMap &pmap, const GpuArray< Real, AMREX_SPACEDIM > &plo, const GpuArray< Real, AMREX_SPACEDIM > &phi, const GpuArray< ParticleReal, AMREX_SPACEDIM > &rlo, const GpuArray< ParticleReal, AMREX_SPACEDIM > &rhi, const GpuArray< int, AMREX_SPACEDIM > &is_per, int lev, int gid, int, int lev_min, int lev_max, int nGrow, bool remove_negative)
Definition: AMReX_ParticleUtil.H:444
amrex::Abort
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:212
@@ -876,8 +872,6 @@
amrex::GpuArray< Real, AMREX_SPACEDIM >
amrex::Gpu::DeviceScalar
Definition: AMReX_GpuMemory.H:56
amrex::Gpu::DeviceScalar::dataPtr
T * dataPtr()
Definition: AMReX_GpuMemory.H:90
-
amrex::Particle
The struct used to store particles.
Definition: AMReX_Particle.H:240
-
amrex::Particle::pos
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealVect pos() const &
Definition: AMReX_Particle.H:283
make_particle
Definition: AMReX_MakeParticle.H:16
diff --git a/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml b/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml index 9865d8abf4..7f431aa9eb 100644 --- a/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml +++ b/amrex/docs_xml/doxygen/AMReX__ParticleUtil_8H.xml @@ -2688,253 +2688,249 @@ } else { -amrex::Particle<0>p_prime; -AMREX_D_TERM(p_prime.pos(0)=src_data.pos(0,i+this_offset);, -p_prime.pos(1)=src_data.pos(1,i+this_offset);, -p_prime.pos(2)=src_data.pos(2,i+this_offset);); - -enforcePeriodic(p_prime,plo,phi,rlo,rhi,is_per); -autotup_prime=ploc(p_prime,lev_min,lev_max,nGrow,assignor); -assigned_grid=amrex::get<0>(tup_prime); -assigned_lev=amrex::get<1>(tup_prime); -if(assigned_grid>=0) -{ -AMREX_D_TERM(src_data.pos(0,i+this_offset)=p_prime.pos(0);, -src_data.pos(1,i+this_offset)=p_prime.pos(1);, -src_data.pos(2,i+this_offset)=p_prime.pos(2);); -} -elseif(lev_min>0) -{ -AMREX_D_TERM(p_prime.pos(0)=src_data.pos(0,i+this_offset);, -p_prime.pos(1)=src_data.pos(1,i+this_offset);, -p_prime.pos(2)=src_data.pos(2,i+this_offset);); -autotup=ploc(p_prime,lev_min,lev_max,nGrow,assignor); -assigned_grid=amrex::get<0>(tup); -assigned_lev=amrex::get<1>(tup); -} +autop_prime=src_data.getSuperParticle(i+this_offset); +enforcePeriodic(p_prime,plo,phi,rlo,rhi,is_per); +autotup_prime=ploc(p_prime,lev_min,lev_max,nGrow,assignor); +assigned_grid=amrex::get<0>(tup_prime); +assigned_lev=amrex::get<1>(tup_prime); +if(assigned_grid>=0) +{ +AMREX_D_TERM(src_data.pos(0,i+this_offset)=p_prime.pos(0);, +src_data.pos(1,i+this_offset)=p_prime.pos(1);, +src_data.pos(2,i+this_offset)=p_prime.pos(2);); +} +elseif(lev_min>0) +{ +AMREX_D_TERM(p_prime.pos(0)=src_data.pos(0,i+this_offset);, +p_prime.pos(1)=src_data.pos(1,i+this_offset);, +p_prime.pos(2)=src_data.pos(2,i+this_offset);); +autotup=ploc(p_prime,lev_min,lev_max,nGrow,assignor); +assigned_grid=amrex::get<0>(tup); +assigned_lev=amrex::get<1>(tup); +} +} + +if((remove_negative==false)&&(src_data.id(i+this_offset)<0)){ +returntrue; } -if((remove_negative==false)&&(src_data.id(i+this_offset)<0)){ -returntrue; -} - -return((assigned_grid==gid)&&(assigned_lev==lev)&&(getPID(lev,gid)==pid)); -}; - -num_stay=Scan::PrefixSum<int>(this_chunk_size, -[=]AMREX_GPU_DEVICE(inti)->int +return((assigned_grid==gid)&&(assigned_lev==lev)&&(getPID(lev,gid)==pid)); +}; + +num_stay=Scan::PrefixSum<int>(this_chunk_size, +[=]AMREX_GPU_DEVICE(inti)->int +{ +returnparticle_stays(i); +}, +[=]AMREX_GPU_DEVICE(inti,intconst&s) { -returnparticle_stays(i); -}, -[=]AMREX_GPU_DEVICE(inti,intconst&s) -{ -intsrc_i=i+this_offset; -intdst_i=particle_stays(i)?s:this_chunk_size-1-(i-s); -copyParticle(dst_data,src_data,src_i,dst_i); -}, -Scan::Type::exclusive); -} - -if(num_chunks==1) +intsrc_i=i+this_offset; +intdst_i=particle_stays(i)?s:this_chunk_size-1-(i-s); +copyParticle(dst_data,src_data,src_i,dst_i); +}, +Scan::Type::exclusive); +} + +if(num_chunks==1) +{ +ptile.swap(ptile_tmp); +} +else { -ptile.swap(ptile_tmp); -} -else -{ -AMREX_FOR_1D(this_chunk_size,i, -{ -copyParticle(src_data,dst_data,i,i+this_offset); -}); -} - -if(ichunk>0) -{ -intnum_swap=std::min(this_offset-last_offset,num_stay); -AMREX_FOR_1D(num_swap,i, -{ -swapParticle(src_data,src_data,last_offset+i, -this_offset+num_stay-1-i); -}); -} - -last_offset+=num_stay; -} - -returnlast_offset; -} - -#endif - -template<classPC1,classPC2> -boolSameIteratorsOK(constPC1&pc1,constPC2&pc2){ -if(pc1.numLevels()!=pc2.numLevels()){returnfalse;} -if(pc1.do_tiling!=pc2.do_tiling){returnfalse;} -if(pc1.tile_size!=pc2.tile_size){returnfalse;} -for(intlev=0;lev<pc1.numLevels();++lev){ -if(pc1.ParticleBoxArray(lev)!=pc2.ParticleBoxArray(lev)){returnfalse;} -if(pc1.ParticleDistributionMap(lev)!=pc2.ParticleDistributionMap(lev)){returnfalse;} -} -returntrue; -} - -template<classPC> -voidEnsureThreadSafeTiles(PC&pc){ -usingIter=typenamePC::ParIterType; -for(intlev=0;lev<pc.numLevels();++lev){ -for(Iterpti(pc,lev);pti.isValid();++pti){ -pc.DefineAndReturnParticleTile(lev,pti); -} -} -} +AMREX_FOR_1D(this_chunk_size,i, +{ +copyParticle(src_data,dst_data,i,i+this_offset); +}); +} + +if(ichunk>0) +{ +intnum_swap=std::min(this_offset-last_offset,num_stay); +AMREX_FOR_1D(num_swap,i, +{ +swapParticle(src_data,src_data,last_offset+i, +this_offset+num_stay-1-i); +}); +} + +last_offset+=num_stay; +} + +returnlast_offset; +} + +#endif + +template<classPC1,classPC2> +boolSameIteratorsOK(constPC1&pc1,constPC2&pc2){ +if(pc1.numLevels()!=pc2.numLevels()){returnfalse;} +if(pc1.do_tiling!=pc2.do_tiling){returnfalse;} +if(pc1.tile_size!=pc2.tile_size){returnfalse;} +for(intlev=0;lev<pc1.numLevels();++lev){ +if(pc1.ParticleBoxArray(lev)!=pc2.ParticleBoxArray(lev)){returnfalse;} +if(pc1.ParticleDistributionMap(lev)!=pc2.ParticleDistributionMap(lev)){returnfalse;} +} +returntrue; +} + +template<classPC> +voidEnsureThreadSafeTiles(PC&pc){ +usingIter=typenamePC::ParIterType; +for(intlev=0;lev<pc.numLevels();++lev){ +for(Iterpti(pc,lev);pti.isValid();++pti){ +pc.DefineAndReturnParticleTile(lev,pti); +} +} +} + +IntVectcomputeRefFac(constParGDBBase*a_gdb,intsrc_lev,intlev); + +Vector<int>computeNeighborProcs(constParGDBBase*a_gdb,intngrow); -IntVectcomputeRefFac(constParGDBBase*a_gdb,intsrc_lev,intlev); - -Vector<int>computeNeighborProcs(constParGDBBase*a_gdb,intngrow); - -namespaceparticle_detail -{ -template<typenameC> -voidclearEmptyEntries(C&c) -{ -for(autoc_it=c.begin();c_it!=c.end();/*no++*/) -{ -if(c_it->second.empty()){c.erase(c_it++);} -else{++c_it;} -} -} -} - -template<classindex_type,typenameF> -voidPermutationForDeposition(Gpu::DeviceVector<index_type>&perm,index_typenitems, -index_typenbins,F&&f) -{ -BL_PROFILE("PermutationForDeposition()"); +namespaceparticle_detail +{ +template<typenameC> +voidclearEmptyEntries(C&c) +{ +for(autoc_it=c.begin();c_it!=c.end();/*no++*/) +{ +if(c_it->second.empty()){c.erase(c_it++);} +else{++c_it;} +} +} +} + +template<classindex_type,typenameF> +voidPermutationForDeposition(Gpu::DeviceVector<index_type>&perm,index_typenitems, +index_typenbins,F&&f) +{ +BL_PROFILE("PermutationForDeposition()"); + +constexprindex_typegpu_block_size=1024; +constexprindex_typegpu_block_size_m1=gpu_block_size-1; +constexprindex_typellist_guard=std::numeric_limits<index_type>::max(); -constexprindex_typegpu_block_size=1024; -constexprindex_typegpu_block_size_m1=gpu_block_size-1; -constexprindex_typellist_guard=std::numeric_limits<index_type>::max(); - -//rounduptogpu_block_size -nbins=(nbins+gpu_block_size_m1)/gpu_block_size*gpu_block_size; - -Gpu::DeviceVector<index_type>llist_start(nbins,llist_guard); -Gpu::DeviceVector<index_type>llist_next(nitems); -perm.resize(nitems); -Gpu::DeviceScalar<index_type>global_idx(0); - -index_type*pllist_start=llist_start.dataPtr(); -index_type*pllist_next=llist_next.dataPtr(); -index_type*pperm=perm.dataPtr(); -index_type*pglobal_idx=global_idx.dataPtr(); - -amrex::ParallelFor(nitems,[=]AMREX_GPU_DEVICE(index_typei)noexcept -{ -i=nitems-i-1; -pllist_next[i]=Gpu::Atomic::Exch(pllist_start+f(i),i); -}); - -#ifdefined(AMREX_USE_CUDA)||defined(AMREX_USE_HIP) -amrex::launch<gpu_block_size>(nbins/gpu_block_size,Gpu::gpuStream(), -[=]AMREX_GPU_DEVICE(){ -__shared__index_typesdata[gpu_block_size]; -index_typecurrent_idx=pllist_start[threadIdx.x+gpu_block_size*blockIdx.x]; +//rounduptogpu_block_size +nbins=(nbins+gpu_block_size_m1)/gpu_block_size*gpu_block_size; + +Gpu::DeviceVector<index_type>llist_start(nbins,llist_guard); +Gpu::DeviceVector<index_type>llist_next(nitems); +perm.resize(nitems); +Gpu::DeviceScalar<index_type>global_idx(0); + +index_type*pllist_start=llist_start.dataPtr(); +index_type*pllist_next=llist_next.dataPtr(); +index_type*pperm=perm.dataPtr(); +index_type*pglobal_idx=global_idx.dataPtr(); + +amrex::ParallelFor(nitems,[=]AMREX_GPU_DEVICE(index_typei)noexcept +{ +i=nitems-i-1; +pllist_next[i]=Gpu::Atomic::Exch(pllist_start+f(i),i); +}); + +#ifdefined(AMREX_USE_CUDA)||defined(AMREX_USE_HIP) +amrex::launch<gpu_block_size>(nbins/gpu_block_size,Gpu::gpuStream(), +[=]AMREX_GPU_DEVICE(){ +__shared__index_typesdata[gpu_block_size]; +index_typecurrent_idx=pllist_start[threadIdx.x+gpu_block_size*blockIdx.x]; + +while(true){ +sdata[threadIdx.x]=index_type(current_idx!=llist_guard); +index_typex=0; -while(true){ -sdata[threadIdx.x]=index_type(current_idx!=llist_guard); -index_typex=0; - -//simpleblockwideprefixsum -for(index_typei=1;i<gpu_block_size;i*=2){ +//simpleblockwideprefixsum +for(index_typei=1;i<gpu_block_size;i*=2){ +__syncthreads(); +if(threadIdx.x>=i){ +x=sdata[threadIdx.x-i]; +} __syncthreads(); if(threadIdx.x>=i){ -x=sdata[threadIdx.x-i]; +sdata[threadIdx.x]+=x; } -__syncthreads(); -if(threadIdx.x>=i){ -sdata[threadIdx.x]+=x; -} +} +__syncthreads(); +if(sdata[gpu_block_size_m1]==0){ +break; } __syncthreads(); -if(sdata[gpu_block_size_m1]==0){ -break; -} -__syncthreads(); -if(threadIdx.x==gpu_block_size_m1){ -x=sdata[gpu_block_size_m1]; -sdata[gpu_block_size_m1]=Gpu::Atomic::Add(pglobal_idx,x); +if(threadIdx.x==gpu_block_size_m1){ +x=sdata[gpu_block_size_m1]; +sdata[gpu_block_size_m1]=Gpu::Atomic::Add(pglobal_idx,x); +} +__syncthreads(); +if(threadIdx.x<gpu_block_size_m1){ +sdata[threadIdx.x]+=sdata[gpu_block_size_m1]; } __syncthreads(); -if(threadIdx.x<gpu_block_size_m1){ -sdata[threadIdx.x]+=sdata[gpu_block_size_m1]; +if(threadIdx.x==gpu_block_size_m1){ +sdata[gpu_block_size_m1]+=x; } __syncthreads(); -if(threadIdx.x==gpu_block_size_m1){ -sdata[gpu_block_size_m1]+=x; -} -__syncthreads(); - -if(current_idx!=llist_guard){ -pperm[sdata[threadIdx.x]-1]=current_idx; -current_idx=pllist_next[current_idx]; -} -} -}); -#else -Abort("Notimplemented"); -#endif - -Gpu::Device::streamSynchronize(); -} - -template<classindex_type,classPTile> -voidPermutationForDeposition(Gpu::DeviceVector<index_type>&perm,index_typenitems, -constPTile&ptile,Boxbx,Geometrygeom,constIntVectidx_type) -{ -AMREX_ALWAYS_ASSERT(idx_type.allGE(IntVect(0))&&idx_type.allLE(IntVect(2))); - -constIntVectrefine_vect=max(idx_type,IntVect(1)).min(IntVect(2)); -constIntVecttype_vect=idx_type-idx_type/2*2; + +if(current_idx!=llist_guard){ +pperm[sdata[threadIdx.x]-1]=current_idx; +current_idx=pllist_next[current_idx]; +} +} +}); +#else +Abort("Notimplemented"); +#endif + +Gpu::Device::streamSynchronize(); +} + +template<classindex_type,classPTile> +voidPermutationForDeposition(Gpu::DeviceVector<index_type>&perm,index_typenitems, +constPTile&ptile,Boxbx,Geometrygeom,constIntVectidx_type) +{ +AMREX_ALWAYS_ASSERT(idx_type.allGE(IntVect(0))&&idx_type.allLE(IntVect(2))); + +constIntVectrefine_vect=max(idx_type,IntVect(1)).min(IntVect(2)); +constIntVecttype_vect=idx_type-idx_type/2*2; + +geom.refine(refine_vect); + +Boxdomain=geom.Domain(); -geom.refine(refine_vect); - -Boxdomain=geom.Domain(); - -bx.convert(type_vect); -domain.convert(type_vect); +bx.convert(type_vect); +domain.convert(type_vect); + +constRealVectdxi(geom.InvCellSize()); +constRealVectpos_offset=Real(0.5)*(RealVect(geom.ProbLo())+RealVect(geom.ProbHi()) +-RealVect(geom.CellSize())*RealVect(domain.smallEnd()+domain.bigEnd())); -constRealVectdxi(geom.InvCellSize()); -constRealVectpos_offset=Real(0.5)*(RealVect(geom.ProbLo())+RealVect(geom.ProbHi()) --RealVect(geom.CellSize())*RealVect(domain.smallEnd()+domain.bigEnd())); - -constintref_product=AMREX_D_TERM(refine_vect[0],*refine_vect[1],*refine_vect[2]); -constIntVectref_offset(AMREX_D_DECL(1,refine_vect[0],refine_vect[0]*refine_vect[1])); - -autoptd=ptile.getConstParticleTileData(); -usingParticleType=typenamePTile::ParticleType::ConstType; -PermutationForDeposition<index_type>(perm,nitems,bx.numPts()*ref_product, -[=]AMREX_GPU_DEVICE(index_typeidx)noexcept -{ -constauto&p=make_particle<ParticleType>{}(ptd,idx); - -IntVectiv=((p.pos()-pos_offset)*dxi).round(); - -IntVectiv_coarse=iv/refine_vect; -IntVectiv_remainder=iv-iv_coarse*refine_vect; - -iv_coarse=iv_coarse.max(bx.smallEnd()); -iv_coarse=iv_coarse.min(bx.bigEnd()); -returnbx.index(iv_coarse)+bx.numPts()*(iv_remainder*ref_offset).sum(); -}); -} - - -#ifdefAMREX_USE_HDF5_ASYNC -voidasync_vol_es_wait_particle(); -voidasync_vol_es_wait_close_particle(); -#endif -} - -#endif//includeguard +constintref_product=AMREX_D_TERM(refine_vect[0],*refine_vect[1],*refine_vect[2]); +constIntVectref_offset(AMREX_D_DECL(1,refine_vect[0],refine_vect[0]*refine_vect[1])); + +autoptd=ptile.getConstParticleTileData(); +usingParticleType=typenamePTile::ParticleType::ConstType; +PermutationForDeposition<index_type>(perm,nitems,bx.numPts()*ref_product, +[=]AMREX_GPU_DEVICE(index_typeidx)noexcept +{ +constauto&p=make_particle<ParticleType>{}(ptd,idx); + +IntVectiv=((p.pos()-pos_offset)*dxi).round(); + +IntVectiv_coarse=iv/refine_vect; +IntVectiv_remainder=iv-iv_coarse*refine_vect; + +iv_coarse=iv_coarse.max(bx.smallEnd()); +iv_coarse=iv_coarse.min(bx.bigEnd()); +returnbx.index(iv_coarse)+bx.numPts()*(iv_remainder*ref_offset).sum(); +}); +} + + +#ifdefAMREX_USE_HDF5_ASYNC +voidasync_vol_es_wait_particle(); +voidasync_vol_es_wait_close_particle(); +#endif +} + +#endif//includeguard diff --git a/amrex/docs_xml/doxygen/namespaceamrex.xml b/amrex/docs_xml/doxygen/namespaceamrex.xml index 71e7bc6992..43568ad1d8 100644 --- a/amrex/docs_xml/doxygen/namespaceamrex.xml +++ b/amrex/docs_xml/doxygen/namespaceamrex.xml @@ -69326,7 +69326,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + Vector< int > @@ -69347,7 +69347,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + @@ -70018,7 +70018,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + @@ -70047,7 +70047,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + @@ -70069,7 +70069,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + @@ -70106,7 +70106,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + @@ -70151,7 +70151,7 @@ Example usage: using PType = typename PC::ParticleType; amrex::ReduceOps<Redu - + diff --git a/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml b/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml index 6cb594fccb..c8adec0068 100644 --- a/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml +++ b/amrex/docs_xml/doxygen/namespaceamrex_1_1particle__detail.xml @@ -129,7 +129,7 @@ - +