diff --git a/.github/workflows/bittree.yml b/.github/workflows/bittree.yml
index c12fbedc58f..687bf07c00d 100644
--- a/.github/workflows/bittree.yml
+++ b/.github/workflows/bittree.yml
@@ -52,7 +52,7 @@ jobs:
         mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs_bittree amr.plot_int=1000
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -104,7 +104,7 @@ jobs:
         mpiexec -n 2 ./main3d.gnu.TEST.MPI.ex inputs_bittree max_step=10
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml
index ec469bb5de6..a343832b510 100644
--- a/.github/workflows/clang.yml
+++ b/.github/workflows/clang.yml
@@ -59,7 +59,7 @@ jobs:
         make test_install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -117,7 +117,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -159,7 +159,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/dependencies/dependencies_codeplay.sh b/.github/workflows/dependencies/dependencies_codeplay.sh
index dfd2e5a03f7..bf692d3c2c7 100755
--- a/.github/workflows/dependencies/dependencies_codeplay.sh
+++ b/.github/workflows/dependencies/dependencies_codeplay.sh
@@ -6,10 +6,24 @@
 
 set -eu -o pipefail
 
-curl -o oneapi_nvidia.sh -L "https://developer.codeplay.com/api/v1/products/download?product=oneapi&variant=nvidia&filters[]=linux&aat=$1"
-chmod +x oneapi_nvidia.sh
-sudo ./oneapi_nvidia.sh --yes
+# `man apt.conf`:
+#   Number of retries to perform. If this is non-zero APT will retry
+#   failed files the given number of times.
+echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
 
-curl -o oneapi_amd.sh -L "https://developer.codeplay.com/api/v1/products/download?product=oneapi&variant=amd&filters[]=linux&aat=$1"
-chmod +x oneapi_amd.sh
-sudo ./oneapi_amd.sh --yes
+# https://developer.codeplay.com/apt/index.html
+sudo wget -qO - https://developer.codeplay.com/apt/public.key | gpg --dearmor | sudo tee /usr/share/keyrings/codeplay-keyring.gpg > /dev/null
+echo "deb [signed-by=/usr/share/keyrings/codeplay-keyring.gpg] https://developer.codeplay.com/apt all main" | sudo tee /etc/apt/sources.list.d/codeplay.list
+
+sudo apt-get update
+
+# try apt install up to five times, to avoid connection splits
+status=1
+for itry in {1..5}
+do
+    sudo apt-get install -y --no-install-recommends \
+        $1 \
+        && { sudo apt-get clean; status=0; break; }  \
+        || { sleep 10; }
+done
+if [[ ${status} -ne 0 ]]; then exit 1; fi
diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh
index 852342e4ac1..4673a7caed5 100755
--- a/.github/workflows/dependencies/dependencies_hip.sh
+++ b/.github/workflows/dependencies/dependencies_hip.sh
@@ -45,6 +45,9 @@ sudo apt-get install -y --no-install-recommends \
     rocrand-dev     \
     rocprim-dev
 
+# hiprand-dev is a new package that does not exist in old versions
+sudo apt-get install -y --no-install-recommends hiprand-dev || true
+
 # activate
 #
 source /etc/profile.d/rocm.sh
diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml
index afc2044bdd7..aca7a9c8723 100644
--- a/.github/workflows/gcc.yml
+++ b/.github/workflows/gcc.yml
@@ -55,7 +55,7 @@ jobs:
         make test_install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -107,7 +107,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -155,7 +155,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -204,7 +204,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -263,7 +263,7 @@ jobs:
 
         # Let's not use clang-tidy for this test because it wants to use C++20.
         # ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        # make -j2 -f clang-tidy-ccache-misses.mak \
+        # make -j2 -k -f clang-tidy-ccache-misses.mak \
         #     CLANG_TIDY=clang-tidy-12 \
         #     CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -320,7 +320,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -384,7 +384,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -426,7 +426,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -466,7 +466,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -506,7 +506,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -546,7 +546,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -585,7 +585,7 @@ jobs:
             CCACHE=ccache
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -630,7 +630,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/hypre.yml b/.github/workflows/hypre.yml
index 50423f3942a..871224fc79c 100644
--- a/.github/workflows/hypre.yml
+++ b/.github/workflows/hypre.yml
@@ -100,7 +100,7 @@ jobs:
         mpiexec -n 2 ./main3d.gnu.MPI.ex inputs.hypre
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -148,7 +148,7 @@ jobs:
         mpiexec -n 2 ./main2d.gnu.MPI.ex inputs.2d
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index d86035d916e..aa1036ddf94 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -44,7 +44,8 @@ jobs:
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which icpx)             \
             -DCMAKE_Fortran_COMPILER=$(which ifx)          \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
@@ -86,7 +87,8 @@ jobs:
             -DAMReX_GPU_BACKEND=SYCL                       \
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which icpx)             \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
@@ -95,19 +97,16 @@ jobs:
   tests-oneapi-sycl-eb-nvidia:
     name: oneAPI SYCL for Nvidia GPUs [tests w/ EB]
     runs-on: ubuntu-latest
-    env:
-      CODEPLAYTOKEN: ${{ secrets.CODEPLAYTOKEN }}
     steps:
     - uses: actions/checkout@v3
     - name: Dependencies
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       run: |
-        .github/workflows/dependencies/dependencies_nvcc.sh
+        .github/workflows/dependencies/ubuntu_free_disk_space.sh
         .github/workflows/dependencies/dependencies_dpcpp.sh
-        .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }}
+        .github/workflows/dependencies/dependencies_nvcc.sh 12.0
+        .github/workflows/dependencies/dependencies_codeplay.sh oneapi-nvidia-12.0
         .github/workflows/dependencies/dependencies_ccache.sh
     - name: Set Up Cache
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       uses: actions/cache@v3
       with:
         path: ~/.cache/ccache
@@ -115,9 +114,7 @@ jobs:
         restore-keys: |
              ccache-${{ github.workflow }}-${{ github.job }}-git-
     - name: Build & Install
-      if: ${{ env.CODEPLAYTOKEN != '' }}
-      # clang currently supports CUDA up to version 11.5 and a warning is issued with newer versions
-      env: {CXXFLAGS: "-fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-unknown-cuda-version"}
+      env: {CXXFLAGS: "-fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"}
       run: |
         export CCACHE_COMPRESS=1
         export CCACHE_COMPRESSLEVEL=10
@@ -136,7 +133,8 @@ jobs:
             -DAMReX_GPU_BACKEND=SYCL                       \
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which clang++)          \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
@@ -145,20 +143,16 @@ jobs:
   no-tests-oneapi-sycl-amd:
     name: oneAPI SYCL for AMD GPUs
     runs-on: ubuntu-20.04
-    env:
-      CODEPLAYTOKEN: ${{ secrets.CODEPLAYTOKEN }}
     steps:
     - uses: actions/checkout@v3
     - name: Dependencies
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       run: |
         .github/workflows/dependencies/ubuntu_free_disk_space.sh
-        .github/workflows/dependencies/dependencies_hip.sh 5.4.6
         .github/workflows/dependencies/dependencies_dpcpp.sh
-        .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }}
+        .github/workflows/dependencies/dependencies_hip.sh 5.4.3
+        .github/workflows/dependencies/dependencies_codeplay.sh oneapi-amd-5.4.3
         .github/workflows/dependencies/dependencies_ccache.sh
     - name: Set Up Cache
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       uses: actions/cache@v3
       with:
         path: ~/.cache/ccache
@@ -166,7 +160,6 @@ jobs:
         restore-keys: |
              ccache-${{ github.workflow }}-${{ github.job }}-git-
     - name: Build & Install
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       env: {CXXFLAGS: "-fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx90a -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"}
       run: |
         export CCACHE_COMPRESS=1
@@ -186,7 +179,8 @@ jobs:
             -DAMReX_SYCL_SUB_GROUP_SIZE=64                 \
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which clang++)          \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
diff --git a/.github/workflows/petsc.yml b/.github/workflows/petsc.yml
index 6d0b92b1343..eaddf1c2489 100644
--- a/.github/workflows/petsc.yml
+++ b/.github/workflows/petsc.yml
@@ -50,7 +50,7 @@ jobs:
         mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs.rt.2d.petsc
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml
index 080a17fd984..d907b485261 100644
--- a/.github/workflows/smoke.yml
+++ b/.github/workflows/smoke.yml
@@ -47,7 +47,7 @@ jobs:
         make test_install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/sundials.yml b/.github/workflows/sundials.yml
index 12dfd42c159..a890e10fad3 100644
--- a/.github/workflows/sundials.yml
+++ b/.github/workflows/sundials.yml
@@ -60,7 +60,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/CHANGES b/CHANGES
index 82b28a03e87..cad5363b99e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,53 @@
+# 24.01
+
+  -- MLMG: Use free functions instead of MF member functions (#3681)
+
+  -- Add a few free functions for MLMG (#3680)
+
+  -- Eliminating Matrix operations in MLMG CG bottom solver if initial vector is zero (#3668)
+
+  -- Add a for loop that is unrolled at compile time (#3674)
+
+  -- Add PTD version of getParticleCell (#3675)
+
+  -- Improve ParIter docs (#3676)
+
+  -- Fix CI for ROCm 6.0 (#3673)
+
+  -- PureSoA IdCpu fixes (#3671)
+
+  -- CMake: AMReX_PARALLEL_LINK_JOBS (#3628)
+
+  -- Clang-Tidy in CI: Keep Going after Errors (#3667)
+
+  -- Delete empty below comments on classes and functions (#3669)
+
+  -- Documentation for Profiling: Hot Spots and Load Balance (#3622)
+
+  -- Fix warnings in SortParticlesForDeposition (#3664)
+
+  -- Fix Resize Issue of Fab with the Async Arena (#3663)
+
+  -- Fix SuperParticle `push_back` (#3661)
+
+  -- Pure SoA Particle: Separate Array for IdCPU (#3585)
+
+  -- Limit the scope of gpu_rand_generator (#3659)
+
+  -- Fix a typo in doxygen for NonLocalBC::FillBoundary (#3658)
+
+  -- GNU Make: Fix name collision for aurora (#3656)
+
+  -- two separate fixes -- particle_compare and ref_ratio=1 (#3655)
+
+  -- Clarify documentation on setEBDirchlet() and fix link to AMReX-Hydro (#3652)
+
+  -- Robustify the Cache Cleanup Scripts (#3650)
+
+  -- Disable CodeQL scheduled jobs on forks (#3649)
+
+  -- Work around compiler bug in nvcc 12.2 by using functor instead of lambda (#3653)
+
 # 23.12
 
   -- solve_cg: avoid use of MF `z` (#3637)
diff --git a/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst b/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
index 8726f51a2ba..cdd774488e4 100644
--- a/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
+++ b/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
@@ -93,6 +93,47 @@ it is also recommended to wrap any ``BL_PROFILE_TINY_FLUSH();`` calls in
 informative ``amrex::Print()`` lines to ensure accurate identification of each
 set of timers.
 
+Hot Spots and Load Balance
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The output of TinyProfiler can help us to identify hot spots. For example,
+the following output shows the top three hot spots of a linear solver test
+running on 4 MPI processes.
+
+.. highlight:: console
+
+::
+
+    --------------------------------------------------------------------------------------------
+    Name                                         NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %
+    --------------------------------------------------------------------------------------------
+    MLPoisson::Fsmooth()                            560     0.4775     0.4793     0.4815  34.97%
+    MLPoisson::Fapply()                             114     0.1103      0.113     0.1167   8.48%
+    FabArray::Xpay()                                109        0.1     0.1013     0.1038   7.54%
+
+In this test, there are 16 boxes evenly distributed among 4 MPI processes. The
+output above shows that the load is perfectly balanced. However, if the load
+is not balanced, the results can be very different and sometimes
+misleading. For example, if we put 2, 2, 6 and 6 boxes on processes 0, 1, 2
+and 3, respectively, the top three hot spots now include two MPI
+communication functions, ``FillBoundary`` and ``ParallelCopy``.
+
+.. highlight:: console
+
+::
+
+    --------------------------------------------------------------------------------------------
+    Name                                         NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %
+    --------------------------------------------------------------------------------------------
+    FillBoundary_finish()                           607    0.01568     0.3367     0.6574  41.97%
+    MLPoisson::Fsmooth()                            560     0.2133     0.4047     0.5973  38.13%
+    FabArray::ParallelCopy_finish()                 231   0.002977    0.09748     0.1895  12.10%
+
+The reason that the MPI communication appears slow is that the lightly
+loaded processes have to wait for messages sent by the heavily loaded
+processes. See also :ref:`sec:profopts` for a diagnostic option that may
+provide more insight on the load imbalance.
+
 .. _sec:full:profiling:
 
 Full Profiling
diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst
index aff060e9166..08297cb3e2a 100644
--- a/Docs/sphinx_documentation/source/GPU.rst
+++ b/Docs/sphinx_documentation/source/GPU.rst
@@ -217,7 +217,7 @@ variables to configure the build
    +------------------------------+-------------------------------------------------+-------------+-----------------+
    | SYCL_SUB_GROUP_SIZE          | Specify subgroup size                           | 32          | 64, 32, 16      |
    +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | SYCL_MAX_PARALLEL_LINK_JOBS  | Number of parallel jobs in device link          | 1           | 1, 2, 3, etc.   |
+   | SYCL_PARALLEL_LINK_JOBS      | Number of parallel jobs in device link          | 1           | 1, 2, 3, etc.   |
    +------------------------------+-------------------------------------------------+-------------+-----------------+
 .. raw:: latex
 
@@ -428,22 +428,24 @@ Below is an example configuration for SYCL:
 
 .. table:: AMReX SYCL-specific build options
 
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | Variable Name                | Description                                     | Default     | Possible values |
-   +==============================+=================================================+=============+=================+
-   | AMReX_SYCL_AOT               | Enable SYCL ahead-of-time compilation           | NO          | YES, NO         |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_AOT_GRF_MODE      | Specify AOT register file mode                  | Default     | Default, Large, |
-   |                              |                                                 |             | AutoLarge       |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMREX_INTEL_ARCH             | Specify target if AOT is enabled                | None        | pvc, etc.       |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_SPLIT_KERNEL      | Enable SYCL kernel splitting                    | YES         | YES, NO         |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_ONEDPL            | Enable SYCL's oneDPL algorithms                 | NO          | YES, NO         |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_SUB_GROUP_SIZE    | Specify subgroup size                           | 32          | 64, 32, 16      |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | Variable Name                 | Description                                  | Default     | Possible values  |
+   +===============================+==============================================+=============+==================+
+   | AMReX_SYCL_AOT                | Enable SYCL ahead-of-time compilation        | NO          | YES, NO          |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_AOT_GRF_MODE       | Specify AOT register file mode               | Default     | Default, Large,  |
+   |                               |                                              |             | AutoLarge        |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMREX_INTEL_ARCH              | Specify target if AOT is enabled             | None        | pvc, etc.        |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_SPLIT_KERNEL       | Enable SYCL kernel splitting                 | YES         | YES, NO          |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_ONEDPL             | Enable SYCL's oneDPL algorithms              | NO          | YES, NO          |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_SUB_GROUP_SIZE     | Specify subgroup size                        | 32          | 64, 32, 16       |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_PARALLEL_LINK_JOBS      | Specify number of parallel link jobs         | 1           | positive integer |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
 .. raw:: latex
 
    \end{center}
diff --git a/Docs/sphinx_documentation/source/Particle.rst b/Docs/sphinx_documentation/source/Particle.rst
index be8292c772b..e3a28591a72 100644
--- a/Docs/sphinx_documentation/source/Particle.rst
+++ b/Docs/sphinx_documentation/source/Particle.rst
@@ -86,7 +86,8 @@ tracked as the particle positions change. To do this, we provide the
 
 ::
 
-      ParticleContainer<3, 2, 4, 4> mypc;
+      using MyParticleContainer = ParticleContainer<3, 2, 4, 4>;
+      MyParticleContainer mypc;
 
 Like the :cpp:`Particle` class itself, the :cpp:`ParticleContainer`
 class is templated. The first two template parameters have the same meaning as
@@ -375,8 +376,8 @@ example, to iterate over all the AoS data:
 ::
 
 
-    using MyParIter = ConstParIter<2*BL_SPACEDIM>;
-    for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
+    using MyParConstIter = MyParticleContainer::ParConstIterType;
+    for (MyParConstIter pti(pc, lev); pti.isValid(); ++pti) {
         const auto& particles = pti.GetArrayOfStructs();
         for (const auto& p : particles) {
             // do stuff with p...
@@ -392,7 +393,7 @@ skipped. You can also access the SoA data using the :math:`ParIter` as follows:
 ::
 
 
-    using MyParIter = ParIter<0, 0, 2, 2>;
+    using MyParIter = MyParticleContainer::ParIterType;
     for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
         auto& particle_attributes = pti.GetStructOfArrays();
         RealVector& real_comp0 = particle_attributes.GetRealData(0);
diff --git a/Src/Amr/AMReX_Amr.H b/Src/Amr/AMReX_Amr.H
index a7173fd105a..bb18ec9d160 100644
--- a/Src/Amr/AMReX_Amr.H
+++ b/Src/Amr/AMReX_Amr.H
@@ -30,7 +30,6 @@ class AmrInSituBridge;
 * not belong on a single level, like establishing and updating the hierarchy
 * of levels, global timestepping, and managing the different AmrLevels
 */
-
 class Amr
     : public AmrCore
 {
diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H
index d4ac6c7c70d..8abb00b5475 100644
--- a/Src/Amr/AMReX_AmrLevel.H
+++ b/Src/Amr/AMReX_AmrLevel.H
@@ -34,7 +34,6 @@ class TagBoxArray;
 * AmrLevel functions both as a container for state data on a level
 * and also manages the advancement of data in time.
 */
-
 class AmrLevel
 {
     friend class Amr;
diff --git a/Src/Amr/AMReX_Derive.H b/Src/Amr/AMReX_Derive.H
index e1a7310a7b2..1e0cceb7894 100644
--- a/Src/Amr/AMReX_Derive.H
+++ b/Src/Amr/AMReX_Derive.H
@@ -100,7 +100,6 @@ class DescriptorList;
 * from the state data contained in AmrLevel and its derivatives. Some
 * examples might be kinetic energy, vorticity, concentration gradients ...
 */
-
 class DeriveRec
 {
    friend class DeriveList;
@@ -339,7 +338,6 @@ private:
 *
 * DeriveList manages and provides access to the list of DeriveRecs.
 */
-
 class DeriveList
 {
 public:
diff --git a/Src/Amr/AMReX_LevelBld.H b/Src/Amr/AMReX_LevelBld.H
index 8b421265bfc..bb79184ca5e 100644
--- a/Src/Amr/AMReX_LevelBld.H
+++ b/Src/Amr/AMReX_LevelBld.H
@@ -18,7 +18,6 @@ namespace amrex {
 * Abstract base class specifying an interface for building problem-specific
 * AmrLevels.
 */
-
 class LevelBld
 {
 public:
diff --git a/Src/Amr/AMReX_StateData.H b/Src/Amr/AMReX_StateData.H
index 251e6482a45..e6edb486c41 100644
--- a/Src/Amr/AMReX_StateData.H
+++ b/Src/Amr/AMReX_StateData.H
@@ -29,7 +29,6 @@ class StateDataPhysBCFunct;
 *
 * StateData holds state data on a level for the current and previous time step.
 */
-
 class StateData
 {
     friend class StateDataPhysBCFunct;
diff --git a/Src/Amr/AMReX_StateDescriptor.H b/Src/Amr/AMReX_StateDescriptor.H
index 6cd6c92cdd3..2830b955705 100644
--- a/Src/Amr/AMReX_StateDescriptor.H
+++ b/Src/Amr/AMReX_StateDescriptor.H
@@ -29,7 +29,6 @@ namespace amrex {
 /**
 * \brief Attributes of StateData.
 */
-
 class StateDescriptor
 {
     friend class DescriptorList;
@@ -434,7 +433,6 @@ private:
 *
 * A container class for StateDescriptors.
 */
-
 class DescriptorList
 {
 public:
diff --git a/Src/AmrCore/AMReX_AmrCore.H b/Src/AmrCore/AMReX_AmrCore.H
index 20428b40930..2969b986a75 100644
--- a/Src/AmrCore/AMReX_AmrCore.H
+++ b/Src/AmrCore/AMReX_AmrCore.H
@@ -20,7 +20,6 @@ class AmrParGDB;
  * virtual functions to allocate, initialize and delete data.  It also
  * requires the derived class to tag cells for refinement.
  */
-
 class AmrCore
     : public AmrMesh
 {
diff --git a/Src/AmrCore/AMReX_Cluster.H b/Src/AmrCore/AMReX_Cluster.H
index 5bbf5c796bc..7d60131e6cf 100644
--- a/Src/AmrCore/AMReX_Cluster.H
+++ b/Src/AmrCore/AMReX_Cluster.H
@@ -20,7 +20,6 @@ class ClusterList;
 *
 * Utility class for tagging error cells.
 */
-
 class Cluster
 {
 public:
@@ -138,7 +137,6 @@ private:
 *
 * A container class for Cluster.
 */
-
 class ClusterList
 {
 public:
diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H
index 9ab1a978966..ab4395d8ce8 100644
--- a/Src/AmrCore/AMReX_ErrorList.H
+++ b/Src/AmrCore/AMReX_ErrorList.H
@@ -102,7 +102,6 @@ extern "C"
 * actual error tagging will be through derivation, so provision is made
 * for this as well.
 */
-
 class ErrorRec
 {
 public:
@@ -348,7 +347,6 @@ private:
 *
 * Container class for ErrorRecs.
 */
-
 class ErrorList
 {
 public:
diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H
index d36b3529efd..5ff1c9550d3 100644
--- a/Src/AmrCore/AMReX_FillPatcher.H
+++ b/Src/AmrCore/AMReX_FillPatcher.H
@@ -68,7 +68,6 @@ namespace amrex {
  * See AmrLevel::RK for an example of using the RungeKutta functions and
  * FillPatcher together.
  */
-
 template <class MF = MultiFab>
 class FillPatcher
 {
diff --git a/Src/AmrCore/AMReX_FluxRegister.H b/Src/AmrCore/AMReX_FluxRegister.H
index 4178eb289ba..f5983e18872 100644
--- a/Src/AmrCore/AMReX_FluxRegister.H
+++ b/Src/AmrCore/AMReX_FluxRegister.H
@@ -14,7 +14,6 @@ namespace amrex {
 *
 * Stores and manipulates fluxes at coarse-fine interfaces.
 */
-
 class FluxRegister
     :
     public BndryRegister
diff --git a/Src/AmrCore/AMReX_InterpFaceRegister.H b/Src/AmrCore/AMReX_InterpFaceRegister.H
index a63c2c23e4c..c54879bcaf6 100644
--- a/Src/AmrCore/AMReX_InterpFaceRegister.H
+++ b/Src/AmrCore/AMReX_InterpFaceRegister.H
@@ -12,7 +12,6 @@ namespace amrex {
  *  \brief InterpFaceRegister is a coarse/fine boundary register for
  *  interpolation of face data at the coarse/fine boundary.
  */
-
 class InterpFaceRegister
 {
 public:
diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H
index d2fe66b0cbd..e1210a83329 100644
--- a/Src/AmrCore/AMReX_Interpolater.H
+++ b/Src/AmrCore/AMReX_Interpolater.H
@@ -17,7 +17,6 @@ class IArrayBox;
 *
 * Specifies interpolater interface for coarse-to-fine interpolation in space.
 */
-
 class Interpolater
     : public InterpBase
 {
@@ -160,7 +159,6 @@ public:
 *
 * Bilinear interpolation on node centered data.
 */
-
 class NodeBilinear
     :
     public Interpolater
@@ -219,7 +217,6 @@ public:
 *
 * Bilinear interpolation on cell centered data.
 */
-
 class CellBilinear
     :
     public Interpolater
@@ -286,7 +283,6 @@ public:
 * sum_ivar a(ic,jc,ivar)*fab(if,jf,ivar) = 0 is satisfied
 * in all fine cells if,jf covering coarse cell ic,jc.
 */
-
 class CellConservativeLinear
     :
     public Interpolater
@@ -344,7 +340,6 @@ protected:
 * Linear conservative interpolation on cell centered data
 * but with protection against undershoots or overshoots.
 */
-
 class CellConservativeProtected
     :
     public CellConservativeLinear
@@ -393,7 +388,6 @@ public:
 *
 * Quadratic interpolation on cell centered data.
 */
-
 class CellQuadratic
     :
     public Interpolater
@@ -451,7 +445,6 @@ public:
 /**
 * \brief Piecewise Constant interpolation on cell centered data.
 */
-
 class PCInterp
     :
     public Interpolater
@@ -512,7 +505,6 @@ public:
 * in constructing the polynomial, the average of the polynomial inside that
 * cell is equal to the cell averaged value of the original data.
 */
-
 class CellConservativeQuartic
     :
     public Interpolater
@@ -574,7 +566,6 @@ public:
 * a given coarse cell will have the same divergence, even when the coarse
 * grid divergence is spatially varying.
 */
-
 class FaceDivFree
     :
     public Interpolater
@@ -667,7 +658,6 @@ public:
 *
 * Bilinear interpolation on data.
 */
-
 class FaceLinear
     :
     public Interpolater
@@ -789,7 +779,6 @@ public:
 *
 * Quartic interpolation on cell centered data.
 */
-
 class CellQuartic
     :
     public Interpolater
diff --git a/Src/AmrCore/AMReX_TagBox.H b/Src/AmrCore/AMReX_TagBox.H
index 929e181e0e8..3d26f76e9cb 100644
--- a/Src/AmrCore/AMReX_TagBox.H
+++ b/Src/AmrCore/AMReX_TagBox.H
@@ -20,7 +20,6 @@ namespace amrex {
 *
 * This class is used to tag cells in a Box that need addition refinement.
 */
-
 class TagBox final
     :
     public BaseFab<char>
@@ -145,7 +144,6 @@ public:
 *
 * A container class for TagBoxes.
 */
-
 class TagBoxArray
     :
     public FabArray<TagBox>
diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H
index d328f693a96..b93c476f86b 100644
--- a/Src/Base/AMReX_Arena.H
+++ b/Src/Base/AMReX_Arena.H
@@ -82,7 +82,6 @@ struct ArenaInfo
 * A virtual base class for objects that manage their own dynamic
 * memory allocation.
 */
-
 class Arena
 {
 public:
diff --git a/Src/Base/AMReX_BArena.H b/Src/Base/AMReX_BArena.H
index 9a3b4aa0f1b..d587d100859 100644
--- a/Src/Base/AMReX_BArena.H
+++ b/Src/Base/AMReX_BArena.H
@@ -11,7 +11,6 @@ namespace amrex {
 * This is the simplest dynamic memory management class derived from Arena.
 * Makes calls to std::malloc and std::free.
 */
-
 class BArena
     :
     public Arena
diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H
index 268147a3a04..d23da777eda 100644
--- a/Src/Base/AMReX_BCRec.H
+++ b/Src/Base/AMReX_BCRec.H
@@ -10,10 +10,9 @@ namespace amrex {
 /**
 * \brief Boundary Condition Records.
 * Necessary information and functions for computing boundary conditions.
+*
+* This class has standard layout.  And we should keep it so!
 */
-
-// This class has standard layout.  And we should keep it so!
-
 class BCRec
 {
 public:
diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H
index e0331e7a0ae..eb8e5c59615 100644
--- a/Src/Base/AMReX_BaseFab.H
+++ b/Src/Base/AMReX_BaseFab.H
@@ -3528,7 +3528,6 @@ BaseFab<T>::protected_divide (const BaseFab<T>& src, const Box& srcbox, const Bo
 * and stored in component comp of this FAB.
 * This fab is returned as a reference for chaining.
 */
-
 template <class T>
 template <RunOn run_on>
 BaseFab<T>&
diff --git a/Src/Base/AMReX_BoxDomain.H b/Src/Base/AMReX_BoxDomain.H
index a82e5ddc72e..af92d631607 100644
--- a/Src/Base/AMReX_BoxDomain.H
+++ b/Src/Base/AMReX_BoxDomain.H
@@ -55,14 +55,12 @@ std::ostream& operator<< (std::ostream& os, const BoxDomain& bd);
 
 /**
 * \brief A List of Disjoint Boxes.
+*
 * A BoxDomain is a BoxList with the restriction that Boxes in the list
 * are disjoint.
+* Note that a BoxDomain is NOT a BoxList due to the protected inheritance.
+* This is a concrete class, not a polymorphic one.
 */
-
-//Note that a BoxDomain is NOT a BoxList due to the protected inheritance.
-//This is a concrete class, not a polymorphic one.
-
-
 class BoxDomain
     :
     protected BoxList
diff --git a/Src/Base/AMReX_BoxList.H b/Src/Base/AMReX_BoxList.H
index c0ff30025ff..cab414d36c8 100644
--- a/Src/Base/AMReX_BoxList.H
+++ b/Src/Base/AMReX_BoxList.H
@@ -48,7 +48,6 @@ namespace amrex
 * IndexType.  This class implements operations for sets of Boxes.
 * This is a concrete class, not a polymorphic one.
 */
-
 class BoxList
 {
 public:
diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H
index 163039df2ef..9547bc92f21 100644
--- a/Src/Base/AMReX_CArena.H
+++ b/Src/Base/AMReX_CArena.H
@@ -24,7 +24,6 @@ struct MemStat;
 * chunks of heap space and apportions it out as requested.  It merges
 * together neighboring chunks on each free().
 */
-
 class CArena
     :
     public Arena
diff --git a/Src/Base/AMReX_CoordSys.H b/Src/Base/AMReX_CoordSys.H
index ab946ffa3d0..24096c6f428 100644
--- a/Src/Base/AMReX_CoordSys.H
+++ b/Src/Base/AMReX_CoordSys.H
@@ -20,7 +20,6 @@ class FArrayBox;
 *
 * Routines for mapping between physical coordinate system and index space.
 */
-
 class CoordSys
 {
 public:
diff --git a/Src/Base/AMReX_DistributionMapping.H b/Src/Base/AMReX_DistributionMapping.H
index 0707532a0fc..e9aa82f16a2 100644
--- a/Src/Base/AMReX_DistributionMapping.H
+++ b/Src/Base/AMReX_DistributionMapping.H
@@ -37,7 +37,6 @@ class FabArrayBase;
 *  BoxArray are as equal across CPUs as is possible.  The SFC distribution is
 *  based on a space filling curve.
 */
-
 class DistributionMapping
 {
   public:
diff --git a/Src/Base/AMReX_FACopyDescriptor.H b/Src/Base/AMReX_FACopyDescriptor.H
index 7e1e383d237..ca7d3f47029 100644
--- a/Src/Base/AMReX_FACopyDescriptor.H
+++ b/Src/Base/AMReX_FACopyDescriptor.H
@@ -103,7 +103,6 @@ FabCopyDescriptor<FAB>::~FabCopyDescriptor ()
 * \brief This class orchestrates filling a destination fab of size destFabBox
 * from fabarray on the local processor (myProc).
 */
-
 template <class FAB>
 class FabArrayCopyDescriptor
 {
diff --git a/Src/Base/AMReX_FPC.H b/Src/Base/AMReX_FPC.H
index 77c4dfa923b..8975ed8e9b9 100644
--- a/Src/Base/AMReX_FPC.H
+++ b/Src/Base/AMReX_FPC.H
@@ -15,7 +15,6 @@ namespace amrex {
 * namespaces, and we don't like global constants, we make them static
 * constant data members of this class.
 */
-
 class FPC
 {
 public:
diff --git a/Src/Base/AMReX_FabArrayBase.H b/Src/Base/AMReX_FabArrayBase.H
index d8bc4411874..e2cf0ed9641 100644
--- a/Src/Base/AMReX_FabArrayBase.H
+++ b/Src/Base/AMReX_FabArrayBase.H
@@ -721,6 +721,11 @@ public:
 
 };
 
+[[nodiscard]] int nComp (FabArrayBase const& fa);
+[[nodiscard]] IntVect nGrowVect (FabArrayBase const& fa);
+[[nodiscard]] BoxArray const& boxArray (FabArrayBase const& fa);
+[[nodiscard]] DistributionMapping const& DistributionMap (FabArrayBase const& fa);
+
 #ifdef BL_USE_MPI
 bool CheckRcvStats (Vector<MPI_Status>& recv_stats, const Vector<std::size_t>& recv_size, int tag);
 #endif
diff --git a/Src/Base/AMReX_FabArrayBase.cpp b/Src/Base/AMReX_FabArrayBase.cpp
index 8dd8275f66a..6997f3489dd 100644
--- a/Src/Base/AMReX_FabArrayBase.cpp
+++ b/Src/Base/AMReX_FabArrayBase.cpp
@@ -2699,4 +2699,24 @@ FabArrayBase::flushParForCache ()
 
 #endif
 
+int nComp (FabArrayBase const& fa)
+{
+    return fa.nComp();
+}
+
+IntVect nGrowVect (FabArrayBase const& fa)
+{
+    return fa.nGrowVect();
+}
+
+BoxArray const& boxArray (FabArrayBase const& fa)
+{
+    return fa.boxArray();
+}
+
+DistributionMapping const& DistributionMap (FabArrayBase const& fa)
+{
+    return fa.DistributionMap();
+}
+
 }
diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H
index ca80a070f45..0897c57ed4f 100644
--- a/Src/Base/AMReX_FabArrayUtility.H
+++ b/Src/Base/AMReX_FabArrayUtility.H
@@ -1602,6 +1602,193 @@ Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int n
     return sm;
 }
 
+//! dst = val
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setVal (MF& dst, typename MF::value_type val)
+{
+    dst.setVal(val);
+}
+
+//! dst = val in ghost cells.
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp)
+{
+    dst.setBndry(val, scomp, ncomp);
+}
+
+//! dst = src
+template <class DMF, class SMF,
+          std::enable_if_t<IsMultiFabLike_v<DMF> &&
+                           IsMultiFabLike_v<SMF>, int> = 0>
+void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp,
+                int ncomp, IntVect const& nghost)
+{
+    amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst += src
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp,
+                int ncomp, IntVect const& nghost)
+{
+    amrex::Add(dst, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst += a * src
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
+            int ncomp, IntVect const& nghost)
+{
+    MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst = src + a * dst
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
+           int ncomp, IntVect const& nghost)
+{
+    MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst = src w/ MPI communication
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp,
+                   IntVect const& ng_src = IntVect(0),
+                   IntVect const& ng_dst = IntVect(0),
+                   Periodicity const& period = Periodicity::NonPeriodic())
+{
+    dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period);
+}
+
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+[[nodiscard]] typename MF::value_type
+norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost,
+         bool local = false)
+{
+    return mf.norminf(scomp, ncomp, nghost, local);
+}
+
+//! dst = val
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setVal (Array<MF,N>& dst, typename MF::value_type val)
+{
+    for (auto& mf: dst) {
+        mf.setVal(val);
+    }
+}
+
+//! dst = val in ghost cells.
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setBndry (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp)
+{
+    for (auto& mf : dst) {
+        mf.setBndry(val, scomp, ncomp);
+    }
+}
+
+//! dst = src
+template <class DMF, class SMF, std::size_t N,
+          std::enable_if_t<IsMultiFabLike_v<DMF> &&
+                           IsMultiFabLike_v<SMF>, int> = 0>
+void LocalCopy (Array<DMF,N>& dst, Array<SMF,N> const& src, int scomp, int dcomp,
+                int ncomp, IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst += src
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void LocalAdd (Array<MF,N>& dst, Array<MF,N> const& src, int scomp, int dcomp,
+               int ncomp, IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst += a * src
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Saxpy (Array<MF,N>& dst, typename MF::value_type a,
+            Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
+            IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst = src + a * dst
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Xpay (Array<MF,N>& dst, typename MF::value_type a,
+           Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
+           IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst = src w/ MPI communication
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+void ParallelCopy (Array<MF,N>& dst, Array<MF,N> const& src,
+                   int scomp, int dcomp, int ncomp,
+                   IntVect const& ng_src = IntVect(0),
+                   IntVect const& ng_dst = IntVect(0),
+                   Periodicity const& period = Periodicity::NonPeriodic())
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period);
+    }
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+[[nodiscard]] typename MF::value_type
+norminf (Array<MF,N> const& mf, int scomp, int ncomp, IntVect const& nghost,
+         bool local = false)
+{
+    auto r = typename MF::value_type(0);
+    for (std::size_t i = 0; i < N; ++i) {
+        auto tmp = mf[i].norminf(scomp, ncomp, nghost, true);
+        r = std::max(r,tmp);
+    }
+    if (!local) {
+        ParallelAllReduce::Max(r, ParallelContext::CommunicatorSub());
+    }
+    return r;
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] int nComp (Array<MF,N> const& mf)
+{
+    return mf[0].nComp();
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] IntVect nGrowVect (Array<MF,N> const& mf)
+{
+    return mf[0].nGrowVect();
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] BoxArray const&
+boxArray (Array<MF,N> const& mf)
+{
+    return mf[0].boxArray();
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] DistributionMapping const&
+DistributionMap (Array<MF,N> const& mf)
+{
+    return mf[0].DistributionMap();
+}
+
 }
 
 #endif
diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H
index 4017273151a..550b42f2f6a 100644
--- a/Src/Base/AMReX_Geometry.H
+++ b/Src/Base/AMReX_Geometry.H
@@ -16,14 +16,6 @@
 #include <map>
 
 namespace amrex {
-/**
-* \class Geometry
-* \brief Rectangular problem domain geometry.
-*
-* This class describes problem domain and coordinate system for
-* RECTANGULAR problem domains.  Since the problem domain is RECTANGULAR,
-* periodicity is meaningful.
-*/
 
 class MultiFab;
 class DistributionMapping;
@@ -67,6 +59,14 @@ public:
     int coord;
 };
 
+/**
+ * \class Geometry
+ * \brief Rectangular problem domain geometry.
+ *
+ * This class describes problem domain and coordinate system for
+ * RECTANGULAR problem domains.  Since the problem domain is RECTANGULAR,
+ * periodicity is meaningful.
+ */
 class Geometry
     :
     public CoordSys
diff --git a/Src/Base/AMReX_IArrayBox.H b/Src/Base/AMReX_IArrayBox.H
index b5240395f02..db0f26d5080 100644
--- a/Src/Base/AMReX_IArrayBox.H
+++ b/Src/Base/AMReX_IArrayBox.H
@@ -41,7 +41,6 @@ public:
 
 *  This class does NOT provide a copy constructor or assignment operator.
 */
-
 class IArrayBox
     :
     public BaseFab<int>
diff --git a/Src/Base/AMReX_IndexType.H b/Src/Base/AMReX_IndexType.H
index 02a56aae2a2..0fd613d2a99 100644
--- a/Src/Base/AMReX_IndexType.H
+++ b/Src/Base/AMReX_IndexType.H
@@ -19,7 +19,6 @@ namespace amrex {
 * enumerated type CellIndex to be either CELL or NODE; i.e. each of the
 * AMREX_SPACEDIM dimensions must be either CELL or NODE.
 */
-
 class IndexType
 {
     friend MPI_Datatype ParallelDescriptor::Mpi_typemap<IndexType>::type();
diff --git a/Src/Base/AMReX_IntVect.H b/Src/Base/AMReX_IntVect.H
index fd71c93ae87..b2658a5ec94 100644
--- a/Src/Base/AMReX_IntVect.H
+++ b/Src/Base/AMReX_IntVect.H
@@ -42,7 +42,6 @@ int coarsen (int i, int ratio) noexcept
 * C++ array.  In addition, the basic arithmetic operators have been overloaded
 * to implement scaling and translation operations.
 */
-
 class IntVect
 {
     friend MPI_Datatype ParallelDescriptor::Mpi_typemap<IntVect>::type();
diff --git a/Src/Base/AMReX_Loop.H b/Src/Base/AMReX_Loop.H
index 84b39107e45..19e1c3e5191 100644
--- a/Src/Base/AMReX_Loop.H
+++ b/Src/Base/AMReX_Loop.H
@@ -211,6 +211,30 @@ void LoopConcurrentOnCpu (Box const& bx, int ncomp, F&& f) noexcept
     }}}}
 }
 
+// Implementation of "constexpr for" based on
+// https://artificial-mind.net/blog/2020/10/31/constexpr-for
+//
+// Approximates what one would get from a compile-time
+// unrolling of the loop
+// for (int i = 0; i < N; ++i) {
+//    f(i);
+// }
+//
+// The mechanism is recursive: we evaluate f(i) at the current
+// i and then call the for loop at i+1. f() is a lambda function
+// that provides the body of the loop and takes only an integer
+// i as its argument.
+
+template<auto I, auto N, class F>
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+constexpr void constexpr_for (F&& f)
+{
+    if constexpr (I < N) {
+        f(std::integral_constant<decltype(I), I>());
+        constexpr_for<I+1, N>(f);
+    }
+}
+
 #include <AMReX_Loop.nolint.H>
 
 }
diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H
index 29af89ba88e..ca9b1ab7fff 100644
--- a/Src/Base/AMReX_MultiFabUtil.H
+++ b/Src/Base/AMReX_MultiFabUtil.H
@@ -637,13 +637,13 @@ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
 
 
 
-   /**
-    * \brief Returns part of a norm based on two MultiFabs
-    * The MultiFabs MUST have the same underlying BoxArray.
-    * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
-    * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
-    */
-
+/**
+ * \brief Returns part of a norm based on two MultiFabs.
+ *
+ * The MultiFabs MUST have the same underlying BoxArray.
+ * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
+ * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
+ */
 template <typename F>
 Real
 NormHelper (const MultiFab& x, int xcomp,
@@ -696,14 +696,14 @@ NormHelper (const MultiFab& x, int xcomp,
     return sm;
 }
 
-   /**
-    * \brief Returns part of a norm based on three MultiFabs
-    * The MultiFabs MUST have the same underlying BoxArray.
-    * The Predicate pf is used to test the mask
-    * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
-    * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
-    */
-
+/**
+ * \brief Returns part of a norm based on three MultiFabs
+ *
+ * The MultiFabs MUST have the same underlying BoxArray.
+ * The Predicate pf is used to test the mask
+ * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
+ * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
+ */
 template <typename MMF, typename Pred, typename F>
 Real
 NormHelper (const MMF& mask,
diff --git a/Src/Base/AMReX_NFiles.H b/Src/Base/AMReX_NFiles.H
index 824351b50dc..bd1518dd44c 100644
--- a/Src/Base/AMReX_NFiles.H
+++ b/Src/Base/AMReX_NFiles.H
@@ -23,7 +23,6 @@ namespace amrex {
 *   nfi.Stream().write((const char *) data.dataPtr(), nChars);
 * }
 */
-
 class NFilesIter
 {
   public:
diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H
index 263bb84a95e..61e3622b336 100644
--- a/Src/Base/AMReX_Orientation.H
+++ b/Src/Base/AMReX_Orientation.H
@@ -25,7 +25,6 @@ class OrientationIter;
 * AMREX_SPACEDIM-1 and then the AMREX_SPACEDIM high sides from direction 0 ..
 * AMREX_SPACEDIM-1.
 */
-
 class Orientation
 {
 public:
diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H
index d2e8c8ebec3..75db747fd9f 100644
--- a/Src/Base/AMReX_PArena.H
+++ b/Src/Base/AMReX_PArena.H
@@ -15,7 +15,6 @@ namespace amrex {
 * \brief This arena uses CUDA stream-ordered memory allocator if available.
 * If not, use The_Arena().
 */
-
 class PArena
     :
     public Arena
diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H
index 01a0098333e..b6f4799f2e8 100644
--- a/Src/Base/AMReX_ParmParse.H
+++ b/Src/Base/AMReX_ParmParse.H
@@ -267,7 +267,6 @@ class IntVect;
 *    #endif
 *
 */
-
 class ParmParse
 {
 public:
diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp
index c2ecfc7b37a..6fe442bfc5b 100644
--- a/Src/Base/AMReX_ParmParse.cpp
+++ b/Src/Base/AMReX_ParmParse.cpp
@@ -450,7 +450,6 @@ ppfound (const std::string& keyword,
 // except if n==-1, return the index of the last occurrence.
 // Return 0 if the specified occurrence does not exist.
 //
-
 const ParmParse::PP_entry*
 ppindex (const ParmParse::Table& table,
          int         n,
diff --git a/Src/Base/AMReX_RealVect.H b/Src/Base/AMReX_RealVect.H
index 635d21927f6..9e1d72700f7 100644
--- a/Src/Base/AMReX_RealVect.H
+++ b/Src/Base/AMReX_RealVect.H
@@ -28,7 +28,6 @@ namespace amrex
   C++ array.  In addition, the basic arithmetic operators have been overloaded
   to implement scaling and translation operations.
 */
-
 class RealVect
 {
 public:
diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H
index cfac0851cab..d68bf00bfb4 100644
--- a/Src/Base/AMReX_RungeKutta.H
+++ b/Src/Base/AMReX_RungeKutta.H
@@ -4,8 +4,6 @@
 
 #include <AMReX_FabArray.H>
 
-namespace amrex::RungeKutta {
-
 /**
  * \brief Functions for Runge-Kutta methods
  *
@@ -48,6 +46,7 @@ namespace amrex::RungeKutta {
  * FillPatcher class can be useful for implementing such a callable.  See
  * AmrLevel::RK for an example.
  */
+namespace amrex::RungeKutta {
 
 struct PostStageNoOp {
     template <typename MF>
diff --git a/Src/Base/AMReX_TypeTraits.H b/Src/Base/AMReX_TypeTraits.H
index 222576f05f5..fbcb7a2c0e3 100644
--- a/Src/Base/AMReX_TypeTraits.H
+++ b/Src/Base/AMReX_TypeTraits.H
@@ -37,6 +37,18 @@ namespace amrex
     template <class A>
     inline constexpr bool IsFabArray_v = IsFabArray<A>::value;
 
+    template <class M, class Enable = void>
+    struct IsMultiFabLike : std::false_type {};
+    //
+    template <class M>
+    struct IsMultiFabLike<M, std::enable_if_t<IsFabArray_v<M> &&
+                                              IsBaseFab_v<typename M::fab_type> > >
+        : std::true_type {};
+    //
+    template <class M>
+    inline constexpr bool IsMultiFabLike_v = IsMultiFabLike<M>::value;
+
+
     template <bool B, class T = void>
     using EnableIf_t = typename std::enable_if<B,T>::type;
 
diff --git a/Src/Base/AMReX_Vector.H b/Src/Base/AMReX_Vector.H
index c377076fe1b..18e14d5c3c0 100644
--- a/Src/Base/AMReX_Vector.H
+++ b/Src/Base/AMReX_Vector.H
@@ -20,7 +20,6 @@ namespace amrex {
 * Vector::operator[] provides bound checking when compiled with
 * DEBUG=TRUE.
 */
-
 template <class T, class Allocator=std::allocator<T> >
 class Vector
     :
diff --git a/Src/Base/AMReX_VisMF.H b/Src/Base/AMReX_VisMF.H
index f0b146f6a9d..468523e0039 100644
--- a/Src/Base/AMReX_VisMF.H
+++ b/Src/Base/AMReX_VisMF.H
@@ -29,7 +29,6 @@ class IArrayBox;
 * \brief File I/O for FabArray<FArrayBox>.
 *  Wrapper class for reading/writing FabArray<FArrayBox> objects to disk in various "smart" ways.
 */
-
 class VisMF
     : public VisMFBuffer
 {
diff --git a/Src/Boundary/AMReX_BoundCond.H b/Src/Boundary/AMReX_BoundCond.H
index 834f790f6b5..963a2fa7ecc 100644
--- a/Src/Boundary/AMReX_BoundCond.H
+++ b/Src/Boundary/AMReX_BoundCond.H
@@ -16,7 +16,6 @@ namespace amrex {
    boundary conditions are specified via an integer identifier.
    This class maintains that integer.
 */
-
 class BoundCond
 {
 public:
diff --git a/Src/Boundary/AMReX_FabSet.H b/Src/Boundary/AMReX_FabSet.H
index f4ae8b7d247..9841555b336 100644
--- a/Src/Boundary/AMReX_FabSet.H
+++ b/Src/Boundary/AMReX_FabSet.H
@@ -40,7 +40,6 @@ namespace amrex {
         FabSets are used primarily as a data storage mechanism, and are
         manipulated by more sophisticated control classes.
 */
-
 template <typename MF>
 class FabSetT
 {
diff --git a/Src/Boundary/AMReX_Mask.H b/Src/Boundary/AMReX_Mask.H
index 3a41ea81913..02000250f49 100644
--- a/Src/Boundary/AMReX_Mask.H
+++ b/Src/Boundary/AMReX_Mask.H
@@ -22,7 +22,6 @@ namespace amrex {
 
         This class does NOT provide a copy constructor or assignment operator.
 */
-
 class Mask final
     :
     public BaseFab<int>
diff --git a/Src/Boundary/AMReX_YAFluxRegister.H b/Src/Boundary/AMReX_YAFluxRegister.H
index 075a630a2f7..e26426ce15b 100644
--- a/Src/Boundary/AMReX_YAFluxRegister.H
+++ b/Src/Boundary/AMReX_YAFluxRegister.H
@@ -23,7 +23,6 @@ namespace amrex {
   `Reflux` is called to update the coarse cells next to the
   coarse/fine boundary.
 */
-
 template <typename MF>
 class YAFluxRegisterT
 {
diff --git a/Src/EB/AMReX_EBFluxRegister.H b/Src/EB/AMReX_EBFluxRegister.H
index 33ec811dcf6..72fec3b6a7c 100644
--- a/Src/EB/AMReX_EBFluxRegister.H
+++ b/Src/EB/AMReX_EBFluxRegister.H
@@ -53,7 +53,6 @@ namespace amrex {
   to add the part in ghost cells (excluding ghost cells covered by
   valid cells of other grids) to EBFluxRegister's internal data.
 */
-
 class EBFluxRegister
     : public YAFluxRegister
 {
diff --git a/Src/Extern/Bittree/AMReX_Bittree.H b/Src/Extern/Bittree/AMReX_Bittree.H
index 54a046be720..feb05e9f189 100644
--- a/Src/Extern/Bittree/AMReX_Bittree.H
+++ b/Src/Extern/Bittree/AMReX_Bittree.H
@@ -18,7 +18,6 @@ LIBRARIES += -lbittree
 Include in inputs:
 amr.use_bittree = true
 */
-
 class btUnit {
   // Functions used in AmrMesh
   public:
diff --git a/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp b/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp
index 8408f75c41d..34671fac1a8 100644
--- a/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp
+++ b/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp
@@ -24,7 +24,6 @@ namespace amrex::sundials {
 /* ----------------------------------------------------------------------------
  * Function to create a new empty multifab vector
  */
-
 N_Vector N_VNewEmpty_MultiFab(sunindextype length, ::sundials::Context* sunctx)
 {
     /* Create vector */
@@ -76,7 +75,6 @@ N_Vector N_VNewEmpty_MultiFab(sunindextype length, ::sundials::Context* sunctx)
 /* ----------------------------------------------------------------------------
  * Function to create a new MultiFab vector
  */
-
 N_Vector N_VNew_MultiFab(sunindextype length,
                          const amrex::BoxArray &ba,
                          const amrex::DistributionMapping &dm,
@@ -102,7 +100,6 @@ N_Vector N_VNew_MultiFab(sunindextype length,
 /* ----------------------------------------------------------------------------
  * Function to create a MultiFab N_Vector with user-specific MultiFab
  */
-
 N_Vector N_VMake_MultiFab(sunindextype length, amrex::MultiFab *v_mf,
                           ::sundials::Context* sunctx)
 {
diff --git a/Src/Extern/SUNDIALS/AMReX_SUNMemory.H b/Src/Extern/SUNDIALS/AMReX_SUNMemory.H
index f7700ce4210..5fc01c3b6b9 100644
--- a/Src/Extern/SUNDIALS/AMReX_SUNMemory.H
+++ b/Src/Extern/SUNDIALS/AMReX_SUNMemory.H
@@ -13,7 +13,6 @@ namespace amrex::sundials {
  *
  * This class allows SUNDIALS to allocate memory using the amrex::Arena.
  */
-
 class MemoryHelper {
 public:
     MemoryHelper(::sundials::Context* sunctx);
diff --git a/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H b/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H
index bb3695d19ac..090a5f43534 100644
--- a/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H
+++ b/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H
@@ -15,7 +15,6 @@ namespace amrex::sundials {
  * This will create the nthreads SUNDIALS context objects that are needed by
  * the SUNDIALS solver and vector objects. Called by amrex::Initialize.
  */
-
 void Initialize(int nthreads);
 
 /**
@@ -23,7 +22,6 @@ void Initialize(int nthreads);
  *
  * Called by amrex::Finalize.
  */
-
 void Finalize();
 
 /**
@@ -33,7 +31,6 @@ void Finalize();
  *
  * A SUNDIALS context should not be used concurrently from different threads.
  */
-
 ::sundials::Context* The_Sundials_Context(int i = amrex::OpenMP::get_thread_num());
 
 }
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
index 3afa56ee245..3bfab3c9f61 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
@@ -12,8 +12,8 @@ class MLCGSolverT
 {
 public:
 
-    using FAB = typename MF::fab_type;
-    using RT  = typename MF::value_type;
+    using FAB = typename MLLinOpT<MF>::FAB;
+    using RT  = typename MLLinOpT<MF>::RT;
 
     enum struct Type { BiCGStab, CG };
 
@@ -42,6 +42,16 @@ public:
     void setMaxIter (int _maxiter) { maxiter = _maxiter; }
     [[nodiscard]] int getMaxIter () const { return maxiter; }
 
+
+    /**
+    * Is the initial guess provided to the solver zero ?
+    * If so, set this to true.
+    * The solver will avoid a few operations if this is true.
+    * Default is false.
+    */
+    void setInitSolnZeroed (bool _sol_zeroed) { initial_vec_zeroed = _sol_zeroed; }
+    [[nodiscard]] bool getInitSolnZeroed () const { return initial_vec_zeroed; }
+
     void setNGhost(int _nghost) {nghost = IntVect(_nghost);}
     [[nodiscard]] int getNGhost() {return nghost[0];}
 
@@ -62,6 +72,7 @@ private:
     int maxiter   = 100;
     IntVect nghost = IntVect(0);
     int iter = -1;
+    bool initial_vec_zeroed = false;
 };
 
 template <typename MF>
@@ -88,27 +99,34 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 {
     BL_PROFILE("MLCGSolver::bicgstab");
 
-    const int ncomp = sol.nComp();
+    const int ncomp = nComp(sol);
 
-    MF p = Lp.make(amrlev, mglev, sol.nGrowVect());
-    MF r = Lp.make(amrlev, mglev, sol.nGrowVect());
-    p.setVal(RT(0.0)); // Make sure all entries are initialized to avoid errors
-    r.setVal(RT(0.0));
+    MF p = Lp.make(amrlev, mglev, nGrowVect(sol));
+    MF r = Lp.make(amrlev, mglev, nGrowVect(sol));
+    setVal(p, RT(0.0)); // Make sure all entries are initialized to avoid errors
+    setVal(r, RT(0.0));
 
-    MF sorig = Lp.make(amrlev, mglev, nghost);
     MF rh    = Lp.make(amrlev, mglev, nghost);
     MF v     = Lp.make(amrlev, mglev, nghost);
     MF t     = Lp.make(amrlev, mglev, nghost);
 
-    Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
 
-    // Then normalize
-    Lp.normalize(amrlev, mglev, r);
+    MF sorig;
 
-    sorig.LocalCopy(sol,0,0,ncomp,nghost);
-    rh.LocalCopy   (r  ,0,0,ncomp,nghost);
+    if ( initial_vec_zeroed ) {
+        LocalCopy(r,rhs,0,0,ncomp,nghost);
+    } else {
+        sorig = Lp.make(amrlev, mglev, nghost);
 
-    sol.setVal(RT(0.0));
+        Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
+
+        LocalCopy(sorig,sol,0,0,ncomp,nghost);
+        setVal(sol, RT(0.0));
+    }
+
+    // Then normalize
+    Lp.normalize(amrlev, mglev, r);
+    LocalCopy(rh, r, 0,0,ncomp,nghost);
 
     RT rnorm = norm_inf(r);
     const RT rnorm0 = rnorm;
@@ -141,13 +159,13 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         }
         if ( iter == 1 )
         {
-            p.LocalCopy(r,0,0,ncomp,nghost);
+            LocalCopy(p,r,0,0,ncomp,nghost);
         }
         else
         {
             const RT beta = (rho/rho_1)*(alpha/omega);
-            MF::Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v
-            MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p
+            Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v
+            Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p
         }
         Lp.apply(amrlev, mglev, v, p, MLLinOpT<MF>::BCMode::Homogeneous, MLLinOpT<MF>::StateMode::Correction);
         Lp.normalize(amrlev, mglev, v);
@@ -161,8 +179,8 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         {
             ret = 2; break;
         }
-        MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
-        MF::Saxpy(r,  -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v
+        Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
+        Saxpy(r,  -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v
 
         rnorm = norm_inf(r);
         rnorm = norm_inf(r);
@@ -198,8 +216,8 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         {
             ret = 3; break;
         }
-        MF::Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r
-        MF::Saxpy(r,  -omega, t, 0, 0, ncomp, nghost); // r += -omega * t
+        Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r
+        Saxpy(r,  -omega, t, 0, 0, ncomp, nghost); // r += -omega * t
 
         rnorm = norm_inf(r);
 
@@ -238,12 +256,16 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 
     if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
     {
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        if ( !initial_vec_zeroed ) {
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
+        }
     }
     else
     {
-        sol.setVal(RT(0.0));
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        setVal(sol, RT(0.0));
+        if ( !initial_vec_zeroed ) {
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
+        }
     }
 
     return ret;
@@ -255,20 +277,26 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 {
     BL_PROFILE("MLCGSolver::cg");
 
-    const int ncomp = sol.nComp();
+    const int ncomp = nComp(sol);
 
-    MF p = Lp.make(amrlev, mglev, sol.nGrowVect());
-    p.setVal(RT(0.0));
+    MF p = Lp.make(amrlev, mglev, nGrowVect(sol));
+    setVal(p, RT(0.0));
 
-    MF sorig = Lp.make(amrlev, mglev, nghost);
     MF r     = Lp.make(amrlev, mglev, nghost);
     MF q     = Lp.make(amrlev, mglev, nghost);
 
-    sorig.LocalCopy(sol,0,0,ncomp,nghost);
+    MF sorig;
 
-    Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
+    if ( initial_vec_zeroed ) {
+        LocalCopy(r,rhs,0,0,ncomp,nghost);
+    } else {
+        sorig = Lp.make(amrlev, mglev, nghost);
 
-    sol.setVal(RT(0.0));
+        Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
+
+        LocalCopy(sorig,sol,0,0,ncomp,nghost);
+        setVal(sol, RT(0.0));
+    }
 
     RT       rnorm    = norm_inf(r);
     const RT rnorm0   = rnorm;
@@ -302,12 +330,12 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         }
         if (iter == 1)
         {
-            p.LocalCopy(r,0,0,ncomp,nghost);
+            LocalCopy(p,r,0,0,ncomp,nghost);
         }
         else
         {
             RT beta = rho/rho_1;
-            MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p
+            Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p
         }
         Lp.apply(amrlev, mglev, q, p, MLLinOpT<MF>::BCMode::Homogeneous, MLLinOpT<MF>::StateMode::Correction);
 
@@ -329,8 +357,8 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
                            << " rho " << rho
                            << " alpha " << alpha << '\n';
         }
-        MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
-        MF::Saxpy(r, -alpha, q, 0, 0, ncomp, nghost); // r += -alpha * q
+        Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
+        Saxpy(r, -alpha, q, 0, 0, ncomp, nghost); // r += -alpha * q
         rnorm = norm_inf(r);
 
         if ( verbose > 2 )
@@ -364,12 +392,16 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 
     if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
     {
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        if ( !initial_vec_zeroed ) {
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
+        }
     }
     else
     {
-        sol.setVal(RT(0.0));
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        setVal(sol, RT(0.0));
+        if ( !initial_vec_zeroed ) {
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
+        }
     }
 
     return ret;
@@ -390,8 +422,8 @@ template <typename MF>
 auto
 MLCGSolverT<MF>::norm_inf (const MF& res, bool local) -> RT
 {
-    int ncomp = res.nComp();
-    RT result = res.norminf(0,ncomp,IntVect(0),true);
+    int ncomp = nComp(res);
+    RT result = norminf(res,0,ncomp,IntVect(0),true);
     if (!local) {
         BL_PROFILE("MLCGSolver::ParallelAllReduce");
         ParallelAllReduce::Max(result, Lp.BottomCommunicator());
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
index b8aa71eebd0..f0dca07f3ab 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
@@ -85,6 +85,15 @@ struct LinOpEnumType
     enum struct Location { FaceCenter, FaceCentroid, CellCenter, CellCentroid };
 };
 
+template <typename T, class Enable = void> struct LinOpData {};
+//
+template <typename T>
+struct LinOpData <T, std::enable_if_t<IsMultiFabLike_v<T> > >
+{
+    using   fab_type = typename T::fab_type;
+    using value_type = typename T::value_type;
+};
+
 template <typename T> class MLMGT;
 template <typename T> class MLCGSolverT;
 template <typename T> class MLPoissonT;
@@ -100,8 +109,8 @@ public:
     template <typename T> friend class MLPoissonT;
     template <typename T> friend class MLABecLaplacianT;
 
-    using FAB = typename MF::fab_type;
-    using RT  = typename MF::value_type;
+    using FAB = typename LinOpData<MF>::fab_type;
+    using RT  = typename LinOpData<MF>::value_type;
 
     using BCType = LinOpBCType;
     using BCMode    = LinOpEnumType::BCMode;
@@ -1375,13 +1384,18 @@ template <typename MF>
 void
 MLLinOpT<MF>::make (Vector<Vector<MF> >& mf, IntVect const& ng) const
 {
-    mf.clear();
-    mf.resize(m_num_amr_levels);
-    for (int alev = 0; alev < m_num_amr_levels; ++alev) {
-        mf[alev].resize(m_num_mg_levels[alev]);
-        for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) {
-            mf[alev][mlev] = make(alev, mlev, ng);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        mf.clear();
+        mf.resize(m_num_amr_levels);
+        for (int alev = 0; alev < m_num_amr_levels; ++alev) {
+            mf[alev].resize(m_num_mg_levels[alev]);
+            for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) {
+                mf[alev][mlev] = make(alev, mlev, ng);
+            }
         }
+    } else {
+        amrex::ignore_unused(mf, ng);
+        amrex::Abort("MLLinOpT::make: how did we get here?");
     }
 }
 
@@ -1389,39 +1403,62 @@ template <typename MF>
 MF
 MLLinOpT<MF>::make (int amrlev, int mglev, IntVect const& ng) const
 {
-    return MF(amrex::convert(m_grids[amrlev][mglev], m_ixtype),
-              m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(),
-              *m_factory[amrlev][mglev]);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        return MF(amrex::convert(m_grids[amrlev][mglev], m_ixtype),
+                  m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(),
+                  *m_factory[amrlev][mglev]);
+    } else {
+        amrex::ignore_unused(amrlev, mglev, ng);
+        amrex::Abort("MLLinOpT::make: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
 MF
 MLLinOpT<MF>::makeAlias (MF const& mf) const
 {
-    return MF(mf, amrex::make_alias, 0, mf.nComp());
+    if constexpr (IsMultiFabLike_v<MF>) {
+        return MF(mf, amrex::make_alias, 0, mf.nComp());
+    } else {
+        amrex::ignore_unused(mf);
+        amrex::Abort("MLLinOpT::makeAlias: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
 MF
 MLLinOpT<MF>::makeCoarseMG (int amrlev, int mglev, IntVect const& ng) const
 {
-    BoxArray cba = m_grids[amrlev][mglev];
-    IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev];
-    cba.coarsen(ratio);
-    cba.convert(m_ixtype);
-    return MF(cba, m_dmap[amrlev][mglev], getNComp(), ng);
-
+    if constexpr (IsMultiFabLike_v<MF>) {
+        BoxArray cba = m_grids[amrlev][mglev];
+        IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev];
+        cba.coarsen(ratio);
+        cba.convert(m_ixtype);
+        return MF(cba, m_dmap[amrlev][mglev], getNComp(), ng);
+    } else {
+        amrex::ignore_unused(amrlev, mglev, ng);
+        amrex::Abort("MLLinOpT::makeCoarseMG: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
 MF
 MLLinOpT<MF>::makeCoarseAmr (int famrlev, IntVect const& ng) const
 {
-    BoxArray cba = m_grids[famrlev][0];
-    IntVect ratio(AMRRefRatio(famrlev-1));
-    cba.coarsen(ratio);
-    cba.convert(m_ixtype);
-    return MF(cba, m_dmap[famrlev][0], getNComp(), ng);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        BoxArray cba = m_grids[famrlev][0];
+        IntVect ratio(AMRRefRatio(famrlev-1));
+        cba.coarsen(ratio);
+        cba.convert(m_ixtype);
+        return MF(cba, m_dmap[famrlev][0], getNComp(), ng);
+    } else {
+        amrex::ignore_unused(famrlev, ng);
+        amrex::Abort("MLLinOpT::makeCoarseAmr: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
index 70e7e121486..9bfc2f0007d 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
@@ -21,8 +21,8 @@ public:
 
     template <typename T> friend class MLCGSolverT;
 
-    using FAB = typename MF::fab_type;
-    using RT  = typename MF::value_type;
+    using FAB = typename MLLinOpT<MF>::FAB;
+    using RT  = typename MLLinOpT<MF>::RT;
 
     using BCMode   = typename MLLinOpT<MF>::BCMode;
     using Location = typename MLLinOpT<MF>::Location;
@@ -507,7 +507,7 @@ MLMGT<MF>::solve (const Vector<AMF*>& a_sol, const Vector<AMF const*>& a_rhs,
     for (int alev = 0; alev < namrlevs; ++alev)
     {
         if (!sol_is_alias[alev]) {
-            a_sol[alev]->LocalCopy(sol[alev], 0, 0, ncomp, ng_back);
+            LocalCopy(*a_sol[alev], sol[alev], 0, 0, ncomp, ng_back);
         }
     }
 
@@ -541,11 +541,11 @@ MLMGT<MF>::getGradSolution (const Vector<Array<AMF*,AMREX_SPACEDIM> >& a_grad_so
             Array<MF,AMREX_SPACEDIM> grad_sol;
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
                 auto const& amf = *(a_grad_sol[alev][idim]);
-                grad_sol[idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                grad_sol[idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
             linop.compGrad(alev, GetArrOfPtrs(grad_sol), sol[alev], a_loc);
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-                a_grad_sol[alev][idim]->LocalCopy(grad_sol[idim], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_grad_sol[alev][idim], grad_sol[idim], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -578,13 +578,13 @@ MLMGT<MF>::getFluxes (const Vector<Array<AMF*,AMREX_SPACEDIM> >& a_flux,
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
                 auto const& amf = *(a_flux[ilev][idim]);
-                fluxes[ilev][idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                fluxes[ilev][idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
         }
         getFluxes(GetVecOfArrOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-                a_flux[ilev][idim]->LocalCopy(fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_flux[ilev][idim], fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -618,14 +618,14 @@ MLMGT<MF>::getFluxes (const Vector<Array<AMF*,AMREX_SPACEDIM> >& a_flux,
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
                 auto const& amf = *(a_flux[ilev][idim]);
-                fluxes[ilev][idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                fluxes[ilev][idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
-            sol[ilev].LocalCopy(*a_sol[ilev], 0, 0, ncomp, sol[ilev].nGrowVect());
+            LocalCopy(sol[ilev], *a_sol[ilev], 0, 0, ncomp, nGrowVect(sol[ilev]));
         }
         linop.getFluxes(GetVecOfArrOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-                a_flux[ilev][idim]->LocalCopy(fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_flux[ilev][idim], fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -653,11 +653,11 @@ MLMGT<MF>::getFluxes (const Vector<AMF*> & a_flux, Location a_loc)
         Vector<MF> fluxes(namrlevs);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             auto const& amf = *a_flux[ilev];
-            fluxes[ilev].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+            fluxes[ilev].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
         }
         getFluxes(GetVecOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
-            a_flux[ilev]->LocalCopy(fluxes[ilev], 0, 0, ncomp, IntVect(0));
+            LocalCopy(*a_flux[ilev], fluxes[ilev], 0, 0, ncomp, IntVect(0));
         }
     }
 }
@@ -676,11 +676,11 @@ void
 MLMGT<MF>::getFluxes (const Vector<AMF*> & a_flux,
                       const Vector<AMF*>& a_sol, Location /*a_loc*/)
 {
-    AMREX_ASSERT(a_flux[0]->nComp() >= AMREX_SPACEDIM);
+    AMREX_ASSERT(nComp(*a_flux[0]) >= AMREX_SPACEDIM);
 
     if constexpr (! std::is_same<AMF,MF>()) {
         for (int alev = 0; alev < namrlevs; ++alev) {
-            sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, sol[alev].nGrowVect());
+            LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, nGrowVect(sol[alev]));
         }
     }
 
@@ -718,11 +718,11 @@ MLMGT<MF>::getFluxes (const Vector<AMF*> & a_flux,
             Vector<MF> fluxes(namrlevs);
             for (int ilev = 0; ilev < namrlevs; ++ilev) {
                 auto const& amf = *a_flux[ilev];
-                fluxes[ilev].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                fluxes[ilev].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
             linop.getFluxes(GetVecOfPtrs(fluxes), GetVecOfPtrs(sol));
             for (int ilev = 0; ilev < namrlevs; ++ilev) {
-                a_flux[ilev]->LocalCopy(fluxes[ilev], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_flux[ilev], fluxes[ilev], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -779,7 +779,7 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
     sol_is_alias.resize(namrlevs,true);
     for (int alev = 0; alev < namrlevs; ++alev)
     {
-        if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol)
+        if (cf_strategy == CFStrategy::ghostnodes || nGrowVect(*a_sol[alev]) == ng_sol)
         {
             sol[alev] = linop.makeAlias(*a_sol[alev]);
             sol_is_alias[alev] = true;
@@ -790,7 +790,7 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
             {
                 sol[alev] = linop.make(alev, 0, ng_sol);
             }
-            sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, IntVect(0));
+            LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, IntVect(0));
         }
     }
 
@@ -808,9 +808,9 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
         const MF* prhs = a_rhs[alev];
 #if (AMREX_SPACEDIM != 3)
         int nghost = (cf_strategy == CFStrategy::ghostnodes) ? linop.getNGrow(alev) : 0;
-        MF rhstmp(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost,
+        MF rhstmp(boxArray(*prhs), DistributionMap(*prhs), ncomp, nghost,
                   MFInfo(), *linop.Factory(alev));
-        rhstmp.LocalCopy(*prhs, 0, 0, ncomp, IntVect(nghost));
+        LocalCopy(rhstmp, *prhs, 0, 0, ncomp, IntVect(nghost));
         linop.applyMetricTerm(alev, 0, rhstmp);
         linop.unimposeNeumannBC(alev, rhstmp);
         linop.applyInhomogNeumannTerm(alev, rhstmp);
@@ -822,9 +822,9 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
                          *a_res[alev+1], sol[alev+1], *a_rhs[alev+1]);
             if (linop.isCellCentered()) {
 #ifdef AMREX_USE_EB
-                amrex::EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
+                EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
 #else
-                amrex::average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
+                average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
 #endif
             }
         }
@@ -858,7 +858,7 @@ MLMGT<MF>::apply (const Vector<MF*>& out, const Vector<MF*>& a_in)
             nghost = linop.getNGrow(alev);
             in[alev] = a_in[alev];
         }
-        else if (a_in[alev]->nGrowVect() == ng_sol)
+        else if (nGrowVect(*a_in[alev]) == ng_sol)
         {
             in[alev] = a_in[alev];
         }
@@ -866,18 +866,18 @@ MLMGT<MF>::apply (const Vector<MF*>& out, const Vector<MF*>& a_in)
         {
             IntVect ng = ng_sol;
             if (cf_strategy == CFStrategy::ghostnodes) { ng = IntVect(nghost); }
-            in_raii[alev].define(a_in[alev]->boxArray(),
-                                 a_in[alev]->DistributionMap(),
-                                 a_in[alev]->nComp(), ng,
+            in_raii[alev].define(boxArray       (*a_in[alev]),
+                                 DistributionMap(*a_in[alev]),
+                                 nComp          (*a_in[alev]), ng,
                                  MFInfo(), *linop.Factory(alev));
-            in_raii[alev].LocalCopy(*a_in[alev], 0, 0, ncomp, IntVect(nghost));
+            LocalCopy(in_raii[alev], *a_in[alev], 0, 0, ncomp, IntVect(nghost));
             in[alev] = &(in_raii[alev]);
         }
-        rh[alev].define(a_in[alev]->boxArray(),
-                        a_in[alev]->DistributionMap(),
-                        a_in[alev]->nComp(), nghost, MFInfo(),
+        rh[alev].define(boxArray       (*a_in[alev]),
+                        DistributionMap(*a_in[alev]),
+                        nComp          (*a_in[alev]), nghost, MFInfo(),
                         *linop.Factory(alev));
-        rh[alev].setVal(RT(0.0));
+        setVal(rh[alev], RT(0.0));
     }
 
     if (!linop_prepared) {
@@ -901,9 +901,9 @@ MLMGT<MF>::apply (const Vector<MF*>& out, const Vector<MF*>& a_in)
                          *out[alev+1], *in[alev+1], rh[alev+1]);
             if (linop.isCellCentered()) {
 #ifdef AMREX_USE_EB
-                amrex::EB_average_down(*out[alev+1], *out[alev], 0, out[alev]->nComp(), amrrr[alev]);
+                EB_average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]);
 #else
-                amrex::average_down(*out[alev+1], *out[alev], 0, out[alev]->nComp(), amrrr[alev]);
+                average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]);
 #endif
             }
         }
@@ -970,10 +970,10 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
         }
         else
         {
-            if (a_sol[alev]->nGrowVect() == ng_sol) {
+            if (nGrowVect(*a_sol[alev]) == ng_sol) {
                 if constexpr (std::is_same<AMF,MF>()) {
                     sol[alev] = linop.makeAlias(*a_sol[alev]);
-                    sol[alev].setBndry(RT(0.0), 0, ncomp);
+                    setBndry(sol[alev], RT(0.0), 0, ncomp);
                     sol_is_alias[alev] = true;
                 }
             }
@@ -981,8 +981,8 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
                 if (!solve_called) {
                     sol[alev] = linop.make(alev, 0, ng_sol);
                 }
-                sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, IntVect(0));
-                sol[alev].setBndry(RT(0.0), 0, ncomp);
+                LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, IntVect(0));
+                setBndry(sol[alev], RT(0.0), 0, ncomp);
             }
         }
     }
@@ -994,7 +994,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
         if (!solve_called) {
             rhs[alev] = linop.make(alev, 0, ng_rhs);
         }
-        rhs[alev].LocalCopy(*a_rhs[alev], 0, 0, ncomp, ng_rhs);
+        LocalCopy(rhs[alev], *a_rhs[alev], 0, 0, ncomp, ng_rhs);
         linop.applyMetricTerm(alev, 0, rhs[alev]);
         linop.unimposeNeumannBC(alev, rhs[alev]);
         linop.applyInhomogNeumannTerm(alev, rhs[alev]);
@@ -1036,8 +1036,8 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
         const int nmglevs = linop.NMGLevels(alev);
         for (int mglev = 0; mglev < nmglevs; ++mglev)
         {
-            res   [alev][mglev].setVal(RT(0.0));
-            rescor[alev][mglev].setVal(RT(0.0));
+            setVal(res   [alev][mglev], RT(0.0));
+            setVal(rescor[alev][mglev], RT(0.0));
         }
     }
 
@@ -1054,7 +1054,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
                 if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev,mglev)); }
                 cor[alev][mglev] = linop.make(alev, mglev, _ng);
             }
-            cor[alev][mglev].setVal(RT(0.0));
+            setVal(cor[alev][mglev], RT(0.0));
         }
     }
 
@@ -1070,7 +1070,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
                 if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev,mglev)); }
                 cor_hold[alev][mglev] = linop.make(alev, mglev, _ng);
             }
-            cor_hold[alev][mglev].setVal(RT(0.0));
+            setVal(cor_hold[alev][mglev], RT(0.0));
         }
     }
     for (int alev = 1; alev < finest_amr_lev; ++alev)
@@ -1081,7 +1081,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
             if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev)); }
             cor_hold[alev][0] = linop.make(alev, 0, _ng);
         }
-        cor_hold[alev][0].setVal(RT(0.0));
+        setVal(cor_hold[alev][0], RT(0.0));
     }
 
     if (linop.m_parent // no embedded N-Solve
@@ -1110,30 +1110,32 @@ template <typename MF>
 void
 MLMGT<MF>::prepareForNSolve ()
 {
-    ns_linop = linop.makeNLinOp(nsolve_grid_size);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        ns_linop = linop.makeNLinOp(nsolve_grid_size);
 
-    int nghost = 0;
-    if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(); }
-
-    const BoxArray& ba = (*ns_linop).m_grids[0][0];
-    const DistributionMapping& dm =(*ns_linop).m_dmap[0][0];
-
-    int ng = 1;
-    if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
-    ns_sol = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
-    ng = 0;
-    if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
-    ns_rhs = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
-    ns_sol->setVal(RT(0.0));
-    ns_rhs->setVal(RT(0.0));
-
-    ns_linop->setLevelBC(0, ns_sol.get());
-
-    ns_mlmg = std::make_unique<MLMGT<MF>>(*ns_linop);
-    ns_mlmg->setVerbose(0);
-    ns_mlmg->setFixedIter(1);
-    ns_mlmg->setMaxFmgIter(20);
-    ns_mlmg->setBottomSolver(BottomSolver::smoother);
+        int nghost = 0;
+        if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(); }
+
+        const BoxArray& ba = (*ns_linop).m_grids[0][0];
+        const DistributionMapping& dm =(*ns_linop).m_dmap[0][0];
+
+        int ng = 1;
+        if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
+        ns_sol = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
+        ng = 0;
+        if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
+        ns_rhs = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
+        setVal(*ns_sol, RT(0.0));
+        setVal(*ns_rhs, RT(0.0));
+
+        ns_linop->setLevelBC(0, ns_sol.get());
+
+        ns_mlmg = std::make_unique<MLMGT<MF>>(*ns_linop);
+        ns_mlmg->setVerbose(0);
+        ns_mlmg->setFixedIter(1);
+        ns_mlmg->setMaxFmgIter(20);
+        ns_mlmg->setBottomSolver(BottomSolver::smoother);
+    }
 }
 
 // in  : Residual (res) on the finest AMR level
@@ -1149,7 +1151,7 @@ void MLMGT<MF>::oneIter (int iter)
 
         IntVect nghost(0);
         if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(alev)); }
-        sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost);
 
         // compute residual for the coarse AMR level
         computeResWithCrseSolFineCor(alev-1,alev);
@@ -1175,7 +1177,7 @@ void MLMGT<MF>::oneIter (int iter)
 
         IntVect nghost(0);
         if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(0)); }
-        sol[0].LocalAdd(cor[0][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[0], cor[0][0], 0, 0, ncomp, nghost);
     }
 
     for (int alev = 1; alev <= finest_amr_lev; ++alev)
@@ -1185,10 +1187,10 @@ void MLMGT<MF>::oneIter (int iter)
 
         IntVect nghost(0);
         if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(alev)); }
-        sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost);
 
         if (alev != finest_amr_lev) {
-            cor_hold[alev][0].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+            LocalAdd(cor_hold[alev][0], cor[alev][0], 0, 0, ncomp, nghost);
         }
 
         // Update fine AMR level correction
@@ -1196,10 +1198,10 @@ void MLMGT<MF>::oneIter (int iter)
 
         miniCycle(alev);
 
-        sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost);
 
         if (alev != finest_amr_lev) {
-            cor[alev][0].LocalAdd(cor_hold[alev][0], 0, 0, ncomp, nghost);
+            LocalAdd(cor[alev][0], cor_hold[alev][0], 0, 0, ncomp, nghost);
         }
     }
 
@@ -1231,12 +1233,12 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
 
         if (verbose >= 4)
         {
-            RT norm = res[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(res[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   DN: Norm before smooth " << norm << "\n";
         }
 
-        cor[amrlev][mglev].setVal(RT(0.0));
+        setVal(cor[amrlev][mglev], RT(0.0));
         bool skip_fillboundary = true;
         for (int i = 0; i < nu1; ++i) {
             linop.smooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev], skip_fillboundary);
@@ -1248,7 +1250,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
 
         if (verbose >= 4)
         {
-            RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   DN: Norm after  smooth " << norm << "\n";
         }
@@ -1262,7 +1264,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
     {
         if (verbose >= 4)
         {
-            RT norm = res[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev_bottom
                            << "   DN: Norm before bottom " << norm << "\n";
         }
@@ -1270,7 +1272,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev_bottom);
-            RT norm = rescor[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev_bottom
                            << "   UP: Norm after  bottom " << norm << "\n";
         }
@@ -1279,11 +1281,11 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
     {
         if (verbose >= 4)
         {
-            RT norm = res[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev_bottom
                            << "       Norm before smooth " << norm << "\n";
         }
-        cor[amrlev][mglev_bottom].setVal(RT(0.0));
+        setVal(cor[amrlev][mglev_bottom], RT(0.0));
         bool skip_fillboundary = true;
         for (int i = 0; i < nu1; ++i) {
             linop.smooth(amrlev, mglev_bottom, cor[amrlev][mglev_bottom],
@@ -1293,7 +1295,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev_bottom);
-            RT norm = rescor[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev  << " " << mglev_bottom
                            << "       Norm after  smooth " << norm << "\n";
         }
@@ -1308,7 +1310,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev);
-            RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   UP: Norm before smooth " << norm << "\n";
         }
@@ -1321,7 +1323,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev);
-            RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   UP: Norm after  smooth " << norm << "\n";
         }
@@ -1361,12 +1363,12 @@ MLMGT<MF>::mgFcycle ()
         // rescor = res - L(cor)
         computeResOfCorrection(amrlev, mglev);
         // res = rescor; this provides b to the vcycle below
-        res[amrlev][mglev].LocalCopy(rescor[amrlev][mglev], 0, 0, ncomp, nghost);
+        LocalCopy(res[amrlev][mglev], rescor[amrlev][mglev], 0, 0, ncomp, nghost);
 
         // save cor; do v-cycle; add the saved to cor
         std::swap(cor[amrlev][mglev], cor_hold[amrlev][mglev]);
         mgVcycle(amrlev, mglev);
-        cor[amrlev][mglev].LocalAdd(cor_hold[amrlev][mglev], 0, 0, ncomp, nghost);
+        LocalAdd(cor[amrlev][mglev], cor_hold[amrlev][mglev], 0, 0, ncomp, nghost);
     }
 }
 
@@ -1393,16 +1395,16 @@ MLMGT<MF>::NSolve (MLMGT<MF>& a_solver, MF& a_sol, MF& a_rhs)
 {
     BL_PROFILE("MLMG::NSolve()");
 
-    a_sol.setVal(RT(0.0));
+    setVal(a_sol, RT(0.0));
 
     MF const& res_bottom = res[0].back();
-    if (BoxArray::SameRefs(a_rhs.boxArray(),res_bottom.boxArray()) &&
-        DistributionMapping::SameRefs(a_rhs.DistributionMap(),res_bottom.DistributionMap()))
+    if (BoxArray::SameRefs(boxArray(a_rhs),boxArray(res_bottom)) &&
+        DistributionMapping::SameRefs(DistributionMap(a_rhs),DistributionMap(res_bottom)))
     {
-        a_rhs.LocalCopy(res_bottom, 0, 0, ncomp, IntVect(0));
+        LocalCopy(a_rhs, res_bottom, 0, 0, ncomp, IntVect(0));
     } else {
-        a_rhs.setVal(RT(0.0));
-        a_rhs.ParallelCopy(res_bottom);
+        setVal(a_rhs, RT(0.0));
+        ParallelCopy(a_rhs, res_bottom, 0, 0, ncomp);
     }
 
     a_solver.solve(Vector<MF*>{&a_sol}, Vector<MF const*>{&a_rhs},
@@ -1428,7 +1430,7 @@ MLMGT<MF>::actualBottomSolve ()
     auto& x = cor[amrlev][mglev];
     auto& b = res[amrlev][mglev];
 
-    x.setVal(RT(0.0));
+    setVal(x, RT(0.0));
 
     if (bottom_solver == BottomSolver::smoother)
     {
@@ -1444,9 +1446,9 @@ MLMGT<MF>::actualBottomSolve ()
         MF raii_b;
         if (linop.isBottomSingular() && linop.getEnforceSingularSolvable())
         {
-            const IntVect ng = b.nGrowVect();
+            const IntVect ng = nGrowVect(b);
             raii_b = linop.make(amrlev, mglev, ng);
-            raii_b.LocalCopy(b, 0, 0, ncomp, ng);
+            LocalCopy(raii_b, b, 0, 0, ncomp, ng);
             bottom_b = &raii_b;
 
             makeSolvable(amrlev,mglev,*bottom_b);
@@ -1486,7 +1488,7 @@ MLMGT<MF>::actualBottomSolve ()
             int ret = bottomSolveWithCG(x, *bottom_b, cg_type);
             // If the MLMG solve failed then set the correction to zero
             if (ret != 0) {
-                cor[amrlev][mglev].setVal(RT(0.0));
+                setVal(cor[amrlev][mglev], RT(0.0));
                 if (bottom_solver == BottomSolver::cgbicg ||
                     bottom_solver == BottomSolver::bicgcg) {
                     if (bottom_solver == BottomSolver::cgbicg) {
@@ -1496,7 +1498,7 @@ MLMGT<MF>::actualBottomSolve ()
                     }
                     ret = bottomSolveWithCG(x, *bottom_b, cg_type);
                     if (ret != 0) {
-                        cor[amrlev][mglev].setVal(RT(0.0));
+                        setVal(cor[amrlev][mglev], RT(0.0));
                     } else { // switch permanently
                         if (cg_type == MLCGSolverT<MF>::Type::CG) {
                             bottom_solver = BottomSolver::cg;
@@ -1526,6 +1528,7 @@ MLMGT<MF>::bottomSolveWithCG (MF& x, const MF& b, typename MLCGSolverT<MF>::Type
     cg_solver.setSolver(type);
     cg_solver.setVerbose(bottom_verbose);
     cg_solver.setMaxIter(bottom_maxiter);
+    cg_solver.setInitSolnZeroed(true);
     if (cf_strategy == CFStrategy::ghostnodes) { cg_solver.setNGhost(linop.getNGrow()); }
 
     int ret = cg_solver.solve(x, b, bottom_reltol, bottom_abstol);
@@ -1590,7 +1593,7 @@ MLMGT<MF>::computeResWithCrseSolFineCor (int calev, int falev)
     linop.solutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata);
 
     linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous);
-    fine_res.LocalCopy(fine_rescor, 0, 0, ncomp, nghost);
+    LocalCopy(fine_res, fine_rescor, 0, 0, ncomp, nghost);
 
     linop.reflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs);
 
@@ -1618,7 +1621,7 @@ MLMGT<MF>::computeResWithCrseCorFineCor (int falev)
     // fine_rescor = fine_res - L(fine_cor)
     linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res,
                              BCMode::Inhomogeneous, &crse_cor);
-    fine_res.LocalCopy(fine_rescor, 0, 0, ncomp, nghost);
+    LocalCopy(fine_res, fine_rescor, 0, 0, ncomp, nghost);
 }
 
 // Interpolate correction from coarse to fine AMR level.
@@ -1647,9 +1650,9 @@ MLMGT<MF>::interpCorrection (int alev)
     }
 
     MF cfine = linop.makeCoarseAmr(alev, IntVect(ng_dst));
-    cfine.setVal(RT(0.0));
-    cfine.ParallelCopy(crse_cor, 0, 0, ncomp,  IntVect(ng_src), IntVect(ng_dst),
-                       crse_geom.periodicity());
+    setVal(cfine, RT(0.0));
+    ParallelCopy(cfine, crse_cor, 0, 0, ncomp, IntVect(ng_src), IntVect(ng_dst),
+                 crse_geom.periodicity());
 
     linop.interpolationAmr(alev, fine_cor, cfine, nghost); // NOLINT(readability-suspicious-call-argument)
 }
@@ -1688,7 +1691,7 @@ MLMGT<MF>::addInterpCorrection (int alev, int mglev)
     else
     {
         cfine = linop.makeCoarseMG(alev, mglev, IntVect(0));
-        cfine.ParallelCopy(crse_cor,0,0,ncomp,IntVect(0),IntVect(0));
+        ParallelCopy(cfine, crse_cor, 0, 0, ncomp);
         cmf = &cfine;
     }
 
diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H
index 16004d1231e..4ae8b7c4365 100644
--- a/Src/Particle/AMReX_Particle.H
+++ b/Src/Particle/AMReX_Particle.H
@@ -24,15 +24,6 @@ namespace
         constexpr Long NoSplitParticleID = GhostParticleID - 4;
     }
 
-    /** Used for 32bit int particle Ids, as in pure SoA layout */
-    namespace IntParticleIds {
-        constexpr int GhostParticleID = 2147483647; // 2**31-1
-        constexpr int VirtualParticleID = GhostParticleID - 1;
-        constexpr int LastParticleID = GhostParticleID - 2;
-        constexpr int DoSplitParticleID = GhostParticleID - 3;
-        constexpr int NoSplitParticleID = GhostParticleID - 4;
-    }
-
     using namespace LongParticleIds;
 }
 
diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
index a1bdbdd56e2..1048df8724d 100644
--- a/Src/Particle/AMReX_ParticleTile.H
+++ b/Src/Particle/AMReX_ParticleTile.H
@@ -458,7 +458,7 @@ SoAParticle<NArrayReal, NArrayInt>::NextID ()
 #endif
     next = the_next_id++;
 
-    if (next > IntParticleIds::LastParticleID) {
+    if (next > LongParticleIds::LastParticleID) {
         amrex::Abort("SoAParticle<NArrayReal, NArrayInt>::NextID() -- too many particles");
     }
 
@@ -470,7 +470,7 @@ int
 SoAParticle<NArrayReal, NArrayInt>::UnprotectedNextID ()
 {
     int next = the_next_id++;
-    if (next > IntParticleIds::LastParticleID) {
+    if (next > LongParticleIds::LastParticleID) {
         amrex::Abort("SoAParticle<NArrayReal, NArrayInt>::NextID() -- too many particles");
     }
     return next;
@@ -1039,7 +1039,9 @@ struct ParticleTile
 
     void shrink_to_fit ()
     {
-        if constexpr (!ParticleType::is_soa_particle) {
+        if constexpr (ParticleType::is_soa_particle) {
+            GetStructOfArrays().GetIdCPUData().shrink_to_fit();
+        } else {
             m_aos_tile().shrink_to_fit();
         }
         for (int j = 0; j < NumRealComps(); ++j)
@@ -1058,7 +1060,9 @@ struct ParticleTile
     Long capacity () const
     {
         Long nbytes = 0;
-        if constexpr (!ParticleType::is_soa_particle) {
+        if constexpr (ParticleType::is_soa_particle) {
+            nbytes += GetStructOfArrays().GetIdCPUData().capacity() * sizeof(uint64_t);
+        } else {
             nbytes += m_aos_tile().capacity() * sizeof(ParticleType);
         }
         for (int j = 0; j < NumRealComps(); ++j)
@@ -1077,7 +1081,9 @@ struct ParticleTile
 
     void swap (ParticleTile<ParticleType, NArrayReal, NArrayInt, Allocator>& other)
     {
-        if constexpr (!ParticleType::is_soa_particle) {
+        if constexpr (ParticleType::is_soa_particle) {
+            GetStructOfArrays().GetIdCPUData().swap(other.GetStructOfArrays().GetIdCPUData());
+        } else {
             m_aos_tile().swap(other.GetArrayOfStructs()());
         }
         for (int j = 0; j < NumRealComps(); ++j)
diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H
index aa737455ce6..7ca26cef064 100644
--- a/Src/Particle/AMReX_ParticleTransformation.H
+++ b/Src/Particle/AMReX_ParticleTransformation.H
@@ -608,10 +608,8 @@ int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile&
  * \param p predicate function - particles will be copied if p returns true
  * \param src_start the offset at which to start reading particles from src
  * \param dst_start the offset at which to start writing particles to dst
- * \param n the number of particles to apply the operation to
  *
  */
-
 template <typename DstTile, typename SrcTile, typename Pred, typename F, typename Index,
           std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,Index> nvccfoo = 0>
 Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f,
diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index 5430cd34037..682a82450f0 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -47,7 +47,6 @@ numParticlesOutOfRange (Iterator const& pti, int nGrow)
  * \param nGrow the number of grow cells allowed.
  *
  */
-
 template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value && !Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0>
 int
 numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
@@ -372,6 +371,26 @@ IntVect getParticleCell (P const& p,
     return iv;
 }
 
+template <typename PTD>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+IntVect getParticleCell (PTD const& ptd, int i,
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& plo,
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& dxi,
+                         const Box& domain) noexcept
+{
+    if constexpr (PTD::ParticleType::is_soa_particle)
+    {
+        IntVect iv(
+                   AMREX_D_DECL(int(amrex::Math::floor((ptd.m_rdata[0][i]-plo[0])*dxi[0])),
+                                int(amrex::Math::floor((ptd.m_rdata[1][i]-plo[1])*dxi[1])),
+                                int(amrex::Math::floor((ptd.m_rdata[2][i]-plo[2])*dxi[2]))));
+        iv += domain.smallEnd();
+        return iv;
+    } else {
+        return getParticleCell(ptd.m_aos[i], plo, dxi, domain);;
+    }
+}
+
 struct DefaultAssignor
 {
 
@@ -675,6 +694,7 @@ void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type n
             }
         });
 #else
+    amrex::ignore_unused(pperm, pglobal_idx);
     Abort("Not implemented");
 #endif
 
diff --git a/Src/Particle/AMReX_StructOfArrays.H b/Src/Particle/AMReX_StructOfArrays.H
index 6cd498e20a2..4de35e085ca 100644
--- a/Src/Particle/AMReX_StructOfArrays.H
+++ b/Src/Particle/AMReX_StructOfArrays.H
@@ -195,13 +195,12 @@ struct StructOfArrays {
         for (int i = 0; i < int(m_runtime_idata.size()); ++i) { m_runtime_idata[i].resize(count); }
     }
 
-    [[nodiscard]] IdCPU* idcpuarray () {
+    [[nodiscard]] uint64_t* idcpuarray () {
         if constexpr (use64BitIdCpu == true) {
             return m_idcpu.dataPtr();
         } else {
             return nullptr;
         }
-
     }
 
     [[nodiscard]] GpuArray<ParticleReal*, NReal> realarray ()
diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake
index 9e3073cd53f..a2e86b2fbd3 100644
--- a/Tools/CMake/AMReXFlagsTargets.cmake
+++ b/Tools/CMake/AMReXFlagsTargets.cmake
@@ -89,7 +89,7 @@ target_compile_options( Flags_CXX
    $<${_cxx_appleclang_rwdbg}:>
    $<${_cxx_appleclang_rel}:>
    $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable>
-   $<${_cxx_intelllvm_rwdbg}:-g1>
+   $<${_cxx_intelllvm_rwdbg}:-gline-tables-only -fdebug-info-for-profiling> # recommended by Intel VTune
    $<${_cxx_intelllvm_rel}:>
    )
 
diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake
index 7f24cb46db4..319a6939acc 100644
--- a/Tools/CMake/AMReXOptions.cmake
+++ b/Tools/CMake/AMReXOptions.cmake
@@ -213,6 +213,16 @@ if (AMReX_SYCL)
       endif()
    endif()
 
+   set(AMReX_PARALLEL_LINK_JOBS_DEFAULT 1)
+   if (DEFINED ENV{AMREX_PARALLEL_LINK_JOBS})
+      set(AMReX_PARALLEL_LINK_JOBS_DEFAULT "$ENV{AMREX_PARALLEL_LINK_JOBS}")
+   endif()
+   set(AMReX_PARALLEL_LINK_JOBS ${AMReX_PARALLEL_LINK_JOBS_DEFAULT}
+       CACHE STRING "SYCL max parallel link jobs")
+   if (NOT AMReX_PARALLEL_LINK_JOBS GREATER_EQUAL 1 OR
+       NOT AMReX_PARALLEL_LINK_JOBS MATCHES "^[1-9][0-9]*$")
+      message(FATAL_ERROR "AMReX_PARALLEL_LINK_JOBS (${AMReX_PARALLEL_LINK_JOBS}) must be a positive integer")
+   endif()
 endif ()
 
 # --- HIP ----
diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake
index a67571dc412..2b48f1c53fe 100644
--- a/Tools/CMake/AMReXSYCL.cmake
+++ b/Tools/CMake/AMReXSYCL.cmake
@@ -88,4 +88,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND "${CMAKE_BUILD_TYPE}" MATCHES "Debug"
       "$<${_cxx_sycl}:-fsycl-link-huge-device-code>" )
 endif ()
 
+if (AMReX_PARALLEL_LINK_JOBS GREATER 1)
+   target_link_options( SYCL
+      INTERFACE
+      $<${_cxx_sycl}:-fsycl-max-parallel-link-jobs=${AMReX_PARALLEL_LINK_JOBS}>)
+endif()
+
 unset(_cxx_sycl)