Fix SyncCurrent and SyncRho for multi-levels (ECP-WarpX#3777)

* Fix SyncCurrent and SyncRho for multi-levels Previously these two function did not work when max_level > 1. For example, when max_level = 2, the coarse patch data from level 2 were coarsened and added to the fine patch on level 1. However, the coarse patch on level 1 did not have any contribution from level 2 as it should have, because the coarsening was performed before the fine patch on level 1 received the level 2 contribution. The order has been fixed in this commit. * Improve inline comment Co-authored-by: Weiqun Zhang <[email protected]> * Improve inline comments * Reset CI benchmark * Add comments * Update Source/Parallelization/WarpXComm.cpp Co-authored-by: Axel Huebl <[email protected]> * Update Source/Parallelization/WarpXComm.cpp Co-authored-by: Axel Huebl <[email protected]> * Update Source/Parallelization/WarpXComm.cpp Co-authored-by: Axel Huebl <[email protected]> --------- Co-authored-by: Edoardo Zoni <[email protected]> Co-authored-by: Edoardo Zoni <[email protected]> Co-authored-by: Axel Huebl <[email protected]>
jlvay · Jun 9, 2023 · f6cda9a · f6cda9a
1 parent 8ec6b19
commit f6cda9a
Show file tree

Hide file tree

Showing 2 changed files with 213 additions and 54 deletions.
diff --git a/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_psatd.json b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_psatd.json
@@ -1,40 +1,40 @@
 {
   "electrons": {
-    "particle_momentum_x": 4.238116934276727e-20,
+    "particle_momentum_x": 4.2387000553111037e-20,
     "particle_momentum_y": 0.0,
-    "particle_momentum_z": 4.238116918188906e-20,
-    "particle_position_x": 0.6553600150461454,
-    "particle_position_y": 0.6553600150453582,
+    "particle_momentum_z": 4.238700039223283e-20,
+    "particle_position_x": 0.6553599659707846,
+    "particle_position_y": 0.6553599659699976,
     "particle_weight": 3200000000000000.5
   },
   "lev=0": {
     "Bx": 0.0,
-    "By": 48.481624783762086,
+    "By": 46.86760020196344,
     "Bz": 0.0,
-    "Ex": 7589841440058.995,
+    "Ex": 7589330186947.288,
     "Ey": 0.0,
-    "Ez": 7589841451955.45,
-    "jx": 7282092285786011.0,
+    "Ez": 7589330198843.742,
+    "jx": 7283102663077210.0,
     "jy": 0.0,
-    "jz": 7282092252988784.0
+    "jz": 7283102630279985.0
   },
   "lev=1": {
     "Bx": 0.0,
-    "By": 376.07845874193026,
+    "By": 369.2209081024592,
     "Bz": 0.0,
-    "Ex": 7595314624325.873,
+    "Ex": 7593801203430.788,
     "Ey": 0.0,
-    "Ez": 7595314408918.084,
-    "jx": 6594445628310054.0,
+    "Ez": 7593800988021.711,
+    "jx": 6597471633118175.0,
     "jy": 0.0,
-    "jz": 6594445442498818.0
+    "jz": 6597471447306984.0
   },
   "positrons": {
-    "particle_momentum_x": 4.2380886359427404e-20,
+    "particle_momentum_x": 4.238670783273887e-20,
     "particle_momentum_y": 0.0,
-    "particle_momentum_z": 4.2380886149634417e-20,
-    "particle_position_x": 0.6553596427220252,
-    "particle_position_y": 0.6553596427227213,
+    "particle_momentum_z": 4.238670762294589e-20,
+    "particle_position_x": 0.6553596917214832,
+    "particle_position_y": 0.6553596917221793,
     "particle_weight": 3200000000000000.5
   }
 }
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
@@ -863,6 +863,8 @@ WarpX::SyncCurrent (
     // If warpx.do_current_centering = 1, center currents from nodal grid to staggered grid
     if (WarpX::do_current_centering)
     {
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(finest_level <= 1,
+                                         "warpx.do_current_centering=1 not supported with more than one fine levels");
         for (int lev = 0; lev <= finest_level; lev++)
         {
             WarpX::UpdateCurrentNodalToStag(*J_fp[lev][0], *current_fp_nodal[lev][0]);
@@ -871,33 +873,144 @@ WarpX::SyncCurrent (
         }
     }
 
-    // Restrict fine patch current onto the coarse patch, before
-    // summing the guard cells of the fine patch
-    for (int lev = 1; lev <= finest_level; ++lev)
+    // If there is a single level, we apply the filter on the fp data and
+    // then call SumBoundary that adds data from different boxes. This needs
+    // to be done because a particle near a box boundary may deposit current
+    // at a given (i,j,k) that is on the edge or in the ghost region of that
+    // box while at the same time that (i,j,k) is also in the valid region
+    // of another box. After SumBoundary, the result is as if there is only
+    // a single box on a single process. Also note we need to call
+    // SumBoundary even if there is only a single process, because a process
+    // may have multiple boxes. Furthermore, even if there is only a single
+    // box on a single process, SumBoundary should also be called if there
+    // are periodic boundaries. So we always call SumBounary even if it
+    // might be a no-op in some cases, because the function does not perform
+    // any communication if not necessary.
+    //
+    // When there are multiple levels, we need to send data from fine levels
+    // to coarse levels. In the implementation below, we loop over levels
+    // from the finest to the coarsest. On each level, filtering and
+    // SumBoundary are done as the last two things. So the communication
+    // data on the sender side are always unfiltered and unsummed. The
+    // receivers are responsible for filtering that is dependent on the grid
+    // resolution. On the finest level, we coarsen the unsummed fp data onto
+    // cp grids, which are the coarsened version of the fp grids with the
+    // same DistributionMapping. Then on the level below, We use ParallelAdd
+    // to add the finer level's cp data onto the current level's fp
+    // data. After that, we apply filter and SumBoundary to the finer
+    // level's cp MultiFab. At this time, the finer level's fp and cp data
+    // have all been properly filtered and summed. For the current level, if
+    // there are levels below this, we need to process this level's cp data
+    // just like we have done for the finer level. The iteration continues
+    // until we reache level 0. There are however two additional
+    // complications.
+    //
+    // The first complication is that simply calling ParallelAdd to add the
+    // finer level's cp data to the current level's fp data does not
+    // work. Suppose there are multiple boxes on the current level (or just
+    // a single box with periodic bounaries). A given (i,j,k) can be present
+    // in more than one box for nodal data in AMReX.
+    // At the time of calling ParallelAdd, the current
+    // level's fp data have not been summed. Because of how ParallelAdd
+    // works, all boxes with that (i,j,k) will receive the data. So there is
+    // a double counting issue of those data points existing on multiple boxes. Note
+    // that at this time, the current level's fp data have not been summed
+    // and we will call SumBoundary on the fp data. That would overcount the
+    // finer level's cp data. So we fix this issue by creating a temporary
+    // MultiFab to receive the finer level's cp data. We also create a mask
+    // that can mark only one instance of the data as the owner if there are
+    // overlapping points among boxes. Using the mask, we can add the
+    // temporary MultiFab's data to the fp MultiFab only if the source owns
+    // the data.
+    //
+    // The other complication is there might be a current buffer depending a
+    // runtime parameter. The current buffer data, if they exist, need to be
+    // communicated to the coarser level's fp MultiFab just like the cp
+    // data. A simple approach would be to call another ParallelAdd in
+    // additional the ParallelAdd for the cp data. But we like to minimize
+    // parallel communication. So we add the cp data to the current buffer
+    // MultiFab and use the latter as the source of ParallelAdd
+    // communication to the coarser level. Note that we can do it this way
+    // but not the other way around of adding the current buffer data to the
+    // cp MultiFab because we still need to use the original cp data whereas
+    // the buffer data are no longer needed once they have been sent to the
+    // coarser level. So there are two cases. If there is no current buffer,
+    // the cp MultiFab is the source of communication. If there is a current
+    // buffer, the buffer MultiFab is the source instead. In the
+    // implementation below, we use an alias MultiFab to manage this.
+
+    std::unique_ptr<MultiFab> mf_comm; // for communication between levels
+    for (int idim = 0; idim < 3; ++idim)
     {
-        J_cp[lev][0]->setVal(0.0);
-        J_cp[lev][1]->setVal(0.0);
-        J_cp[lev][2]->setVal(0.0);
+        for (int lev = finest_level; lev >= 0; --lev)
+        {
+            const int ncomp = J_fp[lev][idim]->nComp();
+            auto const& period = Geom(lev).periodicity();
 
-        const IntVect& refinement_ratio = refRatio(lev-1);
+            if (lev < finest_level)
+            {
+                // On a coarse level, the data in mf_comm comes from the
+                // coarse patch of the fine level. They are unfiltered and uncommunicated.
+                // We need to add it to the fine patch of the current level.
+                MultiFab fine_lev_cp(J_fp[lev][idim]->boxArray(),
+                                     J_fp[lev][idim]->DistributionMap(),
+                                     ncomp, 0);
+                fine_lev_cp.setVal(0.0);
+                fine_lev_cp.ParallelAdd(*mf_comm, 0, 0, ncomp, mf_comm->nGrowVect(),
+                                        IntVect(0), period);
+                // We now need to create a mask to fix the double counting.
+                auto owner_mask = amrex::OwnerMask(fine_lev_cp, period);
+                auto const& mma = owner_mask->const_arrays();
+                auto const& sma = fine_lev_cp.const_arrays();
+                auto const& dma = J_fp[lev][idim]->arrays();
+                amrex::ParallelFor(fine_lev_cp, IntVect(0), ncomp,
+                [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k, int n)
+                {
+                    if (mma[bno](i,j,k) && sma[bno](i,j,k,n) != 0.0_rt) {
+                        dma[bno](i,j,k,n) += sma[bno](i,j,k,n);
+                    }
+                });
+                // Now it's safe to apply filter and sumboundary on J_cp
+                if (use_filter)
+                {
+                    ApplyFilterJ(J_cp, lev+1, idim);
+                }
+                SumBoundaryJ(J_cp, lev+1, idim, period);
+            }
 
-        std::array<const MultiFab*,3> fine { J_fp[lev][0].get(),
-                                             J_fp[lev][1].get(),
-                                             J_fp[lev][2].get() };
-        std::array<      MultiFab*,3> crse { J_cp[lev][0].get(),
-                                             J_cp[lev][1].get(),
-                                             J_cp[lev][2].get() };
-        ablastr::coarsen::average::Coarsen(*crse[0], *fine[0], refinement_ratio );
-        ablastr::coarsen::average::Coarsen(*crse[1], *fine[1], refinement_ratio );
-        ablastr::coarsen::average::Coarsen(*crse[2], *fine[2], refinement_ratio );
-    }
+            if (lev > 0)
+            {
+                // On a fine level, we need to coarsen the current onto the
+                // coarse level. This needs to be done before filtering because
+                // filtering depends on the level. This is also done before any
+                // same-level communication because it's easier this way to
+                // avoid double counting.
+                J_cp[lev][idim]->setVal(0.0);
+                ablastr::coarsen::average::Coarsen(*J_cp[lev][idim],
+                                                   *J_fp[lev][idim],
+                                                   refRatio(lev-1));
+                if (current_buf[lev][idim])
+                {
+                    IntVect const& ng = J_cp[lev][idim]->nGrowVect();
+                    AMREX_ASSERT(ng.allLE(current_buf[lev][idim]->nGrowVect()));
+                    MultiFab::Add(*current_buf[lev][idim], *J_cp[lev][idim],
+                                  0, 0, ncomp, ng);
+                    mf_comm = std::make_unique<MultiFab>
+                        (*current_buf[lev][idim], amrex::make_alias, 0, ncomp);
+                }
+                else
+                {
+                    mf_comm = std::make_unique<MultiFab>
+                        (*J_cp[lev][idim], amrex::make_alias, 0, ncomp);
+                }
+            }
 
-    // For each level
-    // - apply filter to the coarse patch/buffer of `lev+1` and fine patch of `lev` (same resolution)
-    // - add the coarse patch/buffer of `lev+1` into the fine patch of `lev`
-    // - sum guard cells of the coarse patch of `lev+1` and fine patch of `lev`
-    for (int lev=0; lev <= finest_level; ++lev) {
-        AddCurrentFromFineLevelandSumBoundary(J_fp, J_cp, lev);
+            if (use_filter)
+            {
+                ApplyFilterJ(J_fp, lev, idim);
+            }
+            SumBoundaryJ(J_fp, lev, idim, period);
+        }
     }
 }
 
@@ -916,21 +1029,67 @@ WarpX::SyncRho (
     if (!charge_fp[0]) return;
     const int ncomp = charge_fp[0]->nComp();
 
-    // Restrict fine patch onto the coarse patch,
-    // before summing the guard cells of the fine patch
-    for (int lev = 1; lev <= finest_level; ++lev)
+    // See comments in WarpX::SyncCurrent for an explanation of the algorithm.
+
+    std::unique_ptr<MultiFab> mf_comm; // for communication between levels
+    for (int lev = finest_level; lev >= 0; --lev)
     {
-        charge_cp[lev]->setVal(0.0);
-        const IntVect& refinement_ratio = refRatio(lev-1);
-        ablastr::coarsen::average::Coarsen(*charge_cp[lev], *charge_fp[lev], refinement_ratio );
-    }
+        if (lev < finest_level)
+        {
+            auto const& period = Geom(lev).periodicity();
+
+            // On a coarse level, the data in mf_comm comes from the
+            // coarse patch of the fine level. They are unfiltered and uncommunicated.
+            // We need to add it to the fine patch of the current level.
+            MultiFab fine_lev_cp(charge_fp[lev]->boxArray(),
+                                 charge_fp[lev]->DistributionMap(),
+                                 ncomp, 0);
+            fine_lev_cp.setVal(0.0);
+            fine_lev_cp.ParallelAdd(*mf_comm, 0, 0, ncomp, mf_comm->nGrowVect(),
+                                    IntVect(0), period);
+            // We now need to create a mask to fix the double counting.
+            auto owner_mask = amrex::OwnerMask(fine_lev_cp, period);
+            auto const& mma = owner_mask->const_arrays();
+            auto const& sma = fine_lev_cp.const_arrays();
+            auto const& dma = charge_fp[lev]->arrays();
+            amrex::ParallelFor(fine_lev_cp, IntVect(0), ncomp,
+            [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k, int n)
+            {
+                if (mma[bno](i,j,k) && sma[bno](i,j,k,n) != 0.0_rt) {
+                    dma[bno](i,j,k,n) += sma[bno](i,j,k,n);
+                }
+            });
+            // Now it's safe to apply filter and sumboundary on charge_cp
+            ApplyFilterandSumBoundaryRho(lev+1, lev, *charge_cp[lev+1], 0, ncomp);
+        }
+
+        if (lev > 0)
+        {
+            // On a fine level, we need to coarsen the data onto the coarse
+            // level. This needs to be done before filtering because
+            // filtering depends on the level. This is also done before any
+            // same-level communication because it's easier this way to
+            // avoid double counting.
+            charge_cp[lev]->setVal(0.0);
+            ablastr::coarsen::average::Coarsen(*charge_cp[lev],
+                                               *charge_fp[lev],
+                                               refRatio(lev-1));
+            if (charge_buf[lev])
+            {
+                IntVect const& ng = charge_cp[lev]->nGrowVect();
+                AMREX_ASSERT(ng.allLE(charge_buf[lev]->nGrowVect()));
+                MultiFab::Add(*charge_buf[lev], *charge_cp[lev], 0, 0, ncomp, ng);
+                mf_comm = std::make_unique<MultiFab>
+                    (*charge_buf[lev], amrex::make_alias, 0, ncomp);
+            }
+            else
+            {
+                mf_comm = std::make_unique<MultiFab>
+                    (*charge_cp[lev], amrex::make_alias, 0, ncomp);
+            }
+        }
 
-    // For each level
-    // - apply filter to the coarse patch/buffer of `lev+1` and fine patch of `lev` (same resolution)
-    // - add the coarse patch/buffer of `lev+1` into the fine patch of `lev`
-    // - sum guard cells of the coarse patch of `lev+1` and fine patch of `lev`
-    for (int lev=0; lev <= finest_level; ++lev) {
-        AddRhoFromFineLevelandSumBoundary(charge_fp, charge_cp, lev, 0, ncomp);
+        ApplyFilterandSumBoundaryRho(lev, lev, *charge_fp[lev], 0, ncomp);
     }
 }