From de5215bcdd9b4c70fcf4dd42c43f21a1f1b61a32 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 1 Feb 2024 18:29:37 -0800
Subject: [PATCH 1/2] Doc: Compute Pure SoA Layout

Add an explicit and GPU/CPU portable example to compute
on the new, pure SoA particle layout.
---
 docs/source/usage/compute.rst   | 24 +++++++++---
 tests/test_particleContainer.py | 69 +++++++++++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/docs/source/usage/compute.rst b/docs/source/usage/compute.rst
index e14a838f..32f5acd6 100644
--- a/docs/source/usage/compute.rst
+++ b/docs/source/usage/compute.rst
@@ -61,15 +61,27 @@ AMReX `Particles <https://amrex-codes.github.io/amrex/docs_html/Particle_Chapter
 There are a few small differences to the `iteration over a ParticleContainer <https://amrex-codes.github.io/amrex/docs_html/Particle.html#iterating-over-particles>`__ compared to a ``MultiFab``:
 
 * ``ParticleContainer`` is aware of mesh-refinement levels,
-* AMReX supports a variety of data layouts for particles (AoS and SoA + runtime SoA attributes), which requires a few more calls to access.
+* AMReX supports a variety of data layouts for particles, the modern pure SoA + runtime attribute layout and the legacy AoS + SoA + runtime SoA attributes layout.
 
 Here is the general structure for computing on particles:
 
-.. literalinclude:: ../../../tests/test_particleContainer.py
-   :language: python3
-   :dedent: 4
-   :start-after: # Manual: Compute PC START
-   :end-before: # Manual: Compute PC END
+.. tab-set::
+
+   .. tab-item:: Modern (pure SoA) Layout
+
+      .. literalinclude:: ../../../tests/test_particleContainer.py
+         :language: python3
+         :dedent: 4
+         :start-after: # Manual: Pure SoA Compute PC START
+         :end-before: # Manual: Pure SoA Compute PC END
+
+   .. tab-item:: Legacy (AoS + SoA) Layout
+
+      .. literalinclude:: ../../../tests/test_particleContainer.py
+         :language: python3
+         :dedent: 4
+         :start-after: # Manual: Legacy Compute PC START
+         :end-before: # Manual: Legacy Compute PC END
 
 For many small CPU and GPU examples on how to compute on particles, see the following test cases:
 
diff --git a/tests/test_particleContainer.py b/tests/test_particleContainer.py
index 341d202b..1b974ebc 100644
--- a/tests/test_particleContainer.py
+++ b/tests/test_particleContainer.py
@@ -43,6 +43,18 @@ def particle_container(Npart, std_geometry, distmap, boxarr, std_real_box):
     return pc
 
 
+@pytest.fixture(scope="function")
+def soa_particle_container(Npart, std_geometry, distmap, boxarr, std_real_box):
+    pc = amr.ParticleContainer_pureSoA_8_0_default(std_geometry, distmap, boxarr)
+    myt = amr.ParticleInitType_pureSoA_8_0()
+    myt.real_array_data = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
+    myt.int_array_data = []
+
+    iseed = 1
+    pc.InitRandom(Npart, iseed, myt, False, std_real_box)
+    return pc
+
+
 def test_particleInitType():
     myt = amr.ParticleInitType_1_1_2_1()
     print(myt.real_struct_data)
@@ -276,13 +288,58 @@ def test_per_cell(empty_particle_container, std_geometry, std_particle):
     assert ncells * std_particle.real_array_data[1] == sum_1
 
 
+def test_soa_pc_numpy(soa_particle_container, Npart):
+    """Used in docs/source/usage/compute.rst"""
+    pc = soa_particle_container
+
+    class Config:
+        have_gpu = False
+
+    # Manual: Pure SoA Compute PC START
+    # code-specific getter function, e.g.:
+    # pc = sim.get_particles()
+    # Config = sim.extension.Config
+
+    # iterate over every mesh-refinement levels (no MR: lev=0)
+    for lvl in range(pc.finest_level + 1):
+        # get every local chunk of particles
+        for pti in pc.iterator(pc, level=lvl):
+            # additional compile-time and runtime attributes in SoA format
+            soa = pti.soa().to_cupy() if Config.have_gpu else pti.soa().to_numpy()
+
+            # notes:
+            # Only the next lines are the "HOT LOOP" of the computation.
+            # For efficiency, use numpy array operation for speed.
+
+            # write to all particles in the chunk
+            # note: careful, if you change particle positions, you need to
+            #       redistribute particles before continuing the simulation step
+            print(soa.real)
+            soa.real[0][()] = 0.30  # x
+            soa.real[1][()] = 0.35  # y
+            soa.real[2][()] = 0.40  # z
+
+            # all other real attributes
+            for soa_real in soa.real[3:]:
+                soa_real[()] = 42.0
+
+            # all int attributes
+            for soa_int in soa.int:
+                soa_int[()] = 12
+    # Manual: Pure SoA Compute PC END
+
+
 def test_pc_numpy(particle_container, Npart):
     """Used in docs/source/usage/compute.rst"""
     pc = particle_container
 
-    # Manual: Compute PC START
+    class Config:
+        have_gpu = False
+
+    # Manual: Legacy Compute PC START
     # code-specific getter function, e.g.:
     # pc = sim.get_particles()
+    # Config = sim.extension.Config
 
     # iterate over every mesh-refinement levels (no MR: lev=0)
     for lvl in range(pc.finest_level + 1):
@@ -290,10 +347,14 @@ def test_pc_numpy(particle_container, Npart):
         for pti in pc.iterator(pc, level=lvl):
             # default layout: AoS with positions and cpuid
             # note: not part of the new PureSoA particle container layout
-            aos = pti.aos().to_numpy()
+            aos = (
+                pti.aos().to_numpy(copy=True)
+                if Config.have_gpu
+                else pti.aos().to_numpy()
+            )
 
             # additional compile-time and runtime attributes in SoA format
-            soa = pti.soa().to_numpy()
+            soa = pti.soa().to_cupy() if Config.have_gpu else pti.soa().to_numpy()
 
             # notes:
             # Only the next lines are the "HOT LOOP" of the computation.
@@ -313,7 +374,7 @@ def test_pc_numpy(particle_container, Npart):
 
             for soa_int in soa.int:
                 soa_int[()] = 12
-    # Manual: Compute PC END
+    # Manual: Legacy Compute PC END
 
 
 @pytest.mark.skipif(

From 58be049f0febe1ad89e7da5426b6b9b997429035 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 1 Feb 2024 18:55:47 -0800
Subject: [PATCH 2/2] `ParticleInitData` for PureSoA

---
 src/Particle/ParticleContainer.H           | 15 ++++++++-------
 src/Particle/ParticleContainer_ImpactX.cpp |  1 -
 src/Particle/ParticleContainer_WarpX.cpp   |  3 ---
 tests/test_particleContainer.py            | 10 +++++-----
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/Particle/ParticleContainer.H b/src/Particle/ParticleContainer.H
index 5dfcdc36..76e23a80 100644
--- a/src/Particle/ParticleContainer.H
+++ b/src/Particle/ParticleContainer.H
@@ -380,13 +380,17 @@ void make_ParticleContainer_and_Iterators (py::module &m, std::string allocstr)
         // }
     ;
 
+    py_pc
+        .def("InitRandom", py::overload_cast<Long, ULong, const ParticleInitData&, bool, RealBox>(&ParticleContainerType::InitRandom))
+    ;
+
     // TODO for pure SoA
     // depends on https://github.com/AMReX-Codes/amrex/pull/3280
     if constexpr (!T_ParticleType::is_soa_particle) {
         py_pc
-            .def("InitRandom", py::overload_cast<Long, ULong, const ParticleInitData&, bool, RealBox>(&ParticleContainerType::InitRandom)) // TODO pure SoA
-            .def("InitRandomPerBox", py::overload_cast<Long, ULong, const ParticleInitData&>(&ParticleContainerType::InitRandomPerBox)) // TODO pure SoA
-            .def("InitOnePerCell", &ParticleContainerType::InitOnePerCell);
+            .def("InitRandomPerBox", py::overload_cast<Long, ULong, const ParticleInitData&>(&ParticleContainerType::InitRandomPerBox))
+            .def("InitOnePerCell", &ParticleContainerType::InitOnePerCell)
+        ;
     }
 
     using iterator = amrex::ParIter_impl<ParticleType, T_NArrayReal, T_NArrayInt, Allocator>;
@@ -408,10 +412,7 @@ void make_ParticleContainer_and_Iterators (py::module &m, std::string allocstr)
 template <typename T_ParticleType, int T_NArrayReal=0, int T_NArrayInt=0>
 void make_ParticleContainer_and_Iterators (py::module &m)
 {
-    // TODO for pure SoA
-    // depends on https://github.com/AMReX-Codes/amrex/pull/3280
-    if constexpr (!T_ParticleType::is_soa_particle)
-        make_ParticleInitData<T_ParticleType, T_NArrayReal, T_NArrayInt>(m);
+    make_ParticleInitData<T_ParticleType, T_NArrayReal, T_NArrayInt>(m);
 
     // first, because used as copy target in methods in containers with other allocators
     make_ParticleContainer_and_Iterators<T_ParticleType, T_NArrayReal, T_NArrayInt,
diff --git a/src/Particle/ParticleContainer_ImpactX.cpp b/src/Particle/ParticleContainer_ImpactX.cpp
index 43862b10..0b48bcf5 100644
--- a/src/Particle/ParticleContainer_ImpactX.cpp
+++ b/src/Particle/ParticleContainer_ImpactX.cpp
@@ -14,6 +14,5 @@ void init_ParticleContainer_ImpactX(py::module& m) {
 
     // TODO: we might need to move all or most of the defines in here into a
     //       test/example submodule, so they do not collide with downstream projects
-    make_ParticleContainer_and_Iterators<Particle<0, 0>, 5, 0>(m);     // ImpactX 22.07 - 24.02
     make_ParticleContainer_and_Iterators<SoAParticle<8, 0>, 8, 0>(m);  // ImpactX 24.03+
 }
diff --git a/src/Particle/ParticleContainer_WarpX.cpp b/src/Particle/ParticleContainer_WarpX.cpp
index 36f75e45..b2c7bd3d 100644
--- a/src/Particle/ParticleContainer_WarpX.cpp
+++ b/src/Particle/ParticleContainer_WarpX.cpp
@@ -13,9 +13,6 @@ void init_ParticleContainer_WarpX(py::module& m) {
 
     // TODO: we might need to move all or most of the defines in here into a
     //       test/example submodule, so they do not collide with downstream projects
-    make_ParticleContainer_and_Iterators<Particle<0, 0>, 4, 0>(m);   // WarpX 22.07 - 24.02 1D-3D
-    //make_ParticleContainer_and_Iterators<Particle<0, 0>, 5, 0> (m);   // WarpX 22.07 - 24.02 RZ
-
 #if AMREX_SPACEDIM == 1
     make_ParticleContainer_and_Iterators<SoAParticle<5, 0>, 5, 0>(m);  // WarpX 24.03+ 1D
 #elif AMREX_SPACEDIM == 2
diff --git a/tests/test_particleContainer.py b/tests/test_particleContainer.py
index 1b974ebc..3742a7ce 100644
--- a/tests/test_particleContainer.py
+++ b/tests/test_particleContainer.py
@@ -300,19 +300,19 @@ class Config:
     # pc = sim.get_particles()
     # Config = sim.extension.Config
 
-    # iterate over every mesh-refinement levels (no MR: lev=0)
+    # iterate over mesh-refinement level (no MR: lev=0)
     for lvl in range(pc.finest_level + 1):
         # get every local chunk of particles
         for pti in pc.iterator(pc, level=lvl):
-            # additional compile-time and runtime attributes in SoA format
+            # compile-time and runtime attributes in SoA format
             soa = pti.soa().to_cupy() if Config.have_gpu else pti.soa().to_numpy()
 
             # notes:
             # Only the next lines are the "HOT LOOP" of the computation.
-            # For efficiency, use numpy array operation for speed.
+            # For speed, use array operations.
 
             # write to all particles in the chunk
-            # note: careful, if you change particle positions, you need to
+            # note: careful, if you change particle positions, you might need to
             #       redistribute particles before continuing the simulation step
             print(soa.real)
             soa.real[0][()] = 0.30  # x
@@ -341,7 +341,7 @@ class Config:
     # pc = sim.get_particles()
     # Config = sim.extension.Config
 
-    # iterate over every mesh-refinement levels (no MR: lev=0)
+    # iterate over mesh-refinement level (no MR: lev=0)
     for lvl in range(pc.finest_level + 1):
         # get every local chunk of particles
         for pti in pc.iterator(pc, level=lvl):