Merge branch 'development' into subch_planar

AMReX-Astro · Sep 6, 2024 · d9ea765 · d9ea765
2 parents 4ac0a4c + 97555d4
commit d9ea765
Show file tree

Hide file tree

Showing 139 changed files with 4,277 additions and 984 deletions.
diff --git a/.github/workflows/good_defines.txt b/.github/workflows/good_defines.txt
@@ -1,3 +1,4 @@
+ALLOW_GPU_PRINTF
 AMREX_DEBUG
 AMREX_PARTICLES
 AMREX_SPACEDIM

diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,52 @@
+# 24.09
+
+  * Code clean-ups / clang-tidy (#2942, #2949)
+
+  * update the `hse_convergence` readme to reflect current convergence
+    (#2946)
+
+  * update the `bubble_convergence` plotting script (#2947)
+
+  * new Frontier scaling numbers (#2948)
+
+  * more GPU error printing (@3944)
+
+  * science problem updates: `flame_wave` (#2943)
+
+  * documentation updates (#2939)
+
+# 24.08
+
+  * lazy QueueReduction has been enabled for the timing diagnostics
+    (#2926)
+
+  * The `job_info` file output now correctly labels the compute time
+    as GPU-hours instead of CPU-hours when running on GPUs (#2930)
+
+  * We can now output warnings when running on GPUs if you build
+    with `USE_GPU_PRINTF=TRUE`(#2923, #2928)
+
+  * Code clean-ups / sync with Microphysics (#2900, #2901, #2905,
+    #2906, #2909, #2912, #2919, #2922, #2932, #2933, #2936, #2938,
+    #2940)
+
+  * The area weighting in the diagnostics in `subch_planar` was fixed
+    (#2885)
+
+  * A script to produce a resolution study for the `circular_det`
+    problem was added (#2857)
+
+  * science problem updates: `xrb_layered` (#2917), `nova` (#2913),
+    `wdmerger` (#2907, #2918, #2931), `Detonation` (#2902)
+
+  * updated scaling results on Frontier (#2904, #2914, #2915)
+
+  * more exact Riemann solver clean-up (#2896, #2897, #2898)
+    and clean-ups to the two shock solvers (#2895)
+
+  * fix issues with eigenvectors and clang-tidy in the MHD solver
+    (#2880)
+
 # 24.07
 
   * Reorganizing of the existing 2-shock and HLL Riemann solvers

diff --git a/Diagnostics/DustCollapse/GNUmakefile b/Diagnostics/DustCollapse/GNUmakefile
@@ -10,8 +10,6 @@ USE_OMP = FALSE
 
 USE_REACT = FALSE
 
-USE_ACC = FALSE
-
 # programs to be compiled
 ALL: dustcollapse_$(DIM)d.ex
 

diff --git a/Diagnostics/Radiation/GNUmakefile b/Diagnostics/Radiation/GNUmakefile
@@ -10,8 +10,6 @@ USE_OMP = FALSE
 
 USE_REACT = FALSE
 
-USE_ACC = FALSE
-
 ALL: radhelp
 
 radhelp:

diff --git a/Diagnostics/Sedov/GNUmakefile b/Diagnostics/Sedov/GNUmakefile
@@ -10,8 +10,6 @@ USE_OMP = FALSE
 
 USE_REACT = FALSE
 
-USE_ACC = FALSE
-
 # programs to be compiled
 ALL: sedov_$(DIM)d.ex
 

diff --git a/Docs/source/faq.rst b/Docs/source/faq.rst
@@ -17,31 +17,46 @@ Compiling
    There are 2 things you can do to check what’s happening. First, inspect
    the directories in ``VPATH_LOCATIONS``. This can be done via:
 
-   ::
+   .. prompt:: bash
 
-       make print-VPATH_LOCATIONS
+      make print-VPATH_LOCATIONS
 
    Next, ask make to tell you where it is finding each of the source
    files. This is done through a script ``find_files_vpath.py``
    that is hooked into Castro’s build system. You can run this as:
 
-   ::
+   .. prompt:: bash
 
-       make file_locations
+      make file_locations
 
    At the end of the report, it will list any files it cannot find in
-   the vpath. Some of these are to be expected (like ``extern.f90``
-   and ``buildInfo.cpp``—these are written at compile-time. But any
-   other missing files need to be investigated.
+   the vpath. Some of these are to be expected (like
+   ``buildInfo.cpp``—these are written at compile-time). But any other
+   missing files need to be investigated.
+
+#. *I put a copy of one of the header files (e.g. ``problem_tagging.H``)
+   in my problem setup but it does not seem to recognized / used by
+   the build system.  Why doesn't my executable use my custom version
+   of the header?*
+
+   This is likely due to compiler caching / ccache.  You need to
+   clear the cache and the build:
+
+   .. prompt:: bash
+
+      ccache -C
+      make clean
+
+   Then rebuild and it should be recognized.
 
 #. *I’m still having trouble compiling. How can I find out what
    all of the make variables are set to?*
 
    Use:
 
-   ::
+   .. prompt:: bash
 
-       make help
+      make help
 
    This will tell you the value of all the compilers and their options.
 
@@ -104,7 +119,7 @@ Debugging
 
    Given a MultiFab ``mf``, you can dump out the state as:
 
-   ::
+   .. code:: c++
 
            print_state(mf, IntVect(AMREX_D_DECL(10, 20, 30)));
 
@@ -119,7 +134,7 @@ Debugging
    You can simply output a FAB to ``std::cout``. Imagine that you
    are in an MFIter loop, with a MultiFab ``mf``:
 
-   ::
+   .. code:: c++
 
            S = FArrayBox& mf[mfi];
            std::cout << S << std::endl;
@@ -143,9 +158,9 @@ Profiling
    When you run, a file named ``gmon.out`` will be produced. This can
    be processed with gprof by running:
 
-   ::
+   .. prompt:: bash
 
-         gprof exec-name
+      gprof exec-name
 
    where *exec-name* is the name of the executable. More detailed
    line-by-line information can be obtained by passing the -l
@@ -159,9 +174,9 @@ Managing Runs
 
    Create a file called ``dump_and_continue``, e.g., as:
 
-   ::
+   .. prompt:: bash
 
-       touch dump_and_continue
+      touch dump_and_continue
 
    This will force the code to output a checkpoint file that can be used
    to restart. Other options are ``plot_and_continue`` to output
@@ -193,9 +208,9 @@ Managing Runs
 
    The build information (including git hashes, modules, EoS, network, etc.) can be displayed by running the executable as
 
-   ::
+   .. prompt:: bash
 
-       ./Castro.exe --describe
+      ./Castro.exe --describe
 
 .. _ch:faq:vis:
 

diff --git a/Docs/source/mpi_plus_x.rst b/Docs/source/mpi_plus_x.rst
@@ -130,6 +130,37 @@ To enable this, compile with::
   USE_HIP = TRUE
 
 
+Printing Warnings from GPU Kernels
+==================================
+
+.. index:: USE_GPU_PRINTF
+
+Castro will output warnings if several assumptions are violated (often
+triggering a retry in the process).  On GPUs, printing from a kernel
+(using ``printf()``) can increase the number of registers a kernel needs,
+causing performance problems.  As a result, warnings are disabled by
+wrapping them in ``#ifndef AMREX_USE_GPU``.
+
+However, for debugging GPU runs, sometimes we want to see these
+warnings.  The build option ``USE_GPU_PRINTF=TRUE`` will enable these
+(by setting the preprocessor flag ``ALLOW_GPU_PRINTF``).
+
+.. note::
+
+   Not every warning has been enabled for GPUs.
+
+.. tip::
+
+   On AMD architectures, it seems necessary to use unbuffered I/O.  This
+   can be accomplished in the job submission script (for SLURM) by doing
+
+   ::
+
+      srun -u ./Castro...
+
+
+
+
 Working at Supercomputing Centers
 =================================
 

diff --git a/Exec/Make.Castro b/Exec/Make.Castro
@@ -55,6 +55,9 @@ endif
 # Require C++17
 CXXSTD := c++17
 
+# Use Lazy QueueReduction for the timing outputs
+LAZY := TRUE
+
 # default integrator
 INTEGRATOR_DIR ?= VODE
 
@@ -136,6 +139,10 @@ ifeq ($(USE_GPU),TRUE)
   endif
 endif
 
+ifeq ($(USE_GPU_PRINTF),TRUE)
+  DEFINES += -DALLOW_GPU_PRINTF
+endif
+
 CASTRO_AUTO_SOURCE_DIR := $(TmpBuildDir)/castro_sources/$(optionsSuffix).EXE
 
 
@@ -150,12 +157,6 @@ build_status:
 # The default is to include the sponge functionality
 DEFINES += -DSPONGE
 
-# OpenACC support
-ifeq ($(USE_ACC), TRUE)
-  DEFINES += -DACC
-endif
-
-
 
 #------------------------------------------------------------------------------
 # Castro directories

diff --git a/Exec/gravity_tests/hse_convergence/README.md b/Exec/gravity_tests/hse_convergence/README.md
@@ -7,29 +7,34 @@ in the plotfiles.
 
 To run this problem, use one of the convergence scripts:
 
-  * ``convergence_plm.sh`` :
+  * `convergence_plm.sh` :
 
-    this runs CTU + PLM using the default HSE BCs and default
-    use_pslope, then with reflect BCs, then without use_pslope, and
-    finally runs with reflect instead of HSE BCs.
+    this runs CTU + PLM using:
+    1. the default HSE BCs and `use_pslope`
+    2. the HSE BCs with reflection and `use_pslope`
+    3. reflect BCs instead of HSE BCs without `use_pslope`
+    4. reflect BCs with `use_pslope`
 
-    These tests show that the best results come from HSE BCs + reflect vel
+    These tests show that the best results (by far) come from
+    `use_pslope=1` and reflecting BCs
 
   * convergence_ppm.sh :
 
     this runs CTU + PPM in a similar set of configurations as PLM above
-    (with one additional one: grav_source_type = 4)
+    1. the default HSE BCs
+    2. HSE BCs with reflection
+    3. reflecting BCs
+    4. reflecting BCs with `use_pslope`
 
-    These tests show that the best results come from HSE BCs + reflect vel
+    These tests show that the best results (by far) come from
+    reflecting BCs with `use_pslope=1`, just like the PLM case.
 
   * convergence_sdc.sh :
 
-    this uses the TRUE_SDC integration, first with SDC-2 + PLM  and reflecting BCs,
-    the SDC-2 + PPM and reflecting BCs, then the same but HSE BCs, and finally
-    SDC-4 + reflect
+    this uses the TRUE_SDC integration, first with SDC-2 + PLM and
+    reflecting BCs, the SDC-2 + PPM and reflecting BCs, then the same
+    but HSE BCs, and finally SDC-4 + reflect
 
     These tests show that the PLM + reflect (which uses the
     well-balanced use_pslope) and the SDC-4 + reflect give the lowest
-    errors and expected (or better) convergence:
-
-
+    errors and expected (or better) convergence.
diff --git a/Exec/gravity_tests/hse_convergence/convergence_plm.sh b/Exec/gravity_tests/hse_convergence/convergence_plm.sh
@@ -58,43 +58,15 @@ pfile=`ls -t | grep -i hse_512_plt | head -1`
 fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
 
 
-## plm + hse reflect + no pslope
-
-ofile=plm-hsereflect-nopslope.converge.out
-
-RUNPARAMS="
-castro.ppm_type=0
-castro.use_pslope=0
-castro.hse_interp_temp=1
-castro.hse_reflect_vels=1
-"""
-
-${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out
-pfile=`ls -t | grep -i hse_64_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel > ${ofile}
-
-${EXEC} inputs.ppm.128 ${RUNPARAMS} >& 128.out
-pfile=`ls -t | grep -i hse_128_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
-
-${EXEC} inputs.ppm.256 ${RUNPARAMS} >& 256.out
-pfile=`ls -t | grep -i hse_256_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
-
-${EXEC} inputs.ppm.512 ${RUNPARAMS} >& 512.out
-pfile=`ls -t | grep -i hse_512_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
-
-
-## plm + reflect
+## plm + reflect + nopslope
 
-ofile=plm-reflect.converge.out
+ofile=plm-reflect-nopslope.converge.out
 
 RUNPARAMS="
 castro.ppm_type=0
-castro.use_pslope=1
 castro.lo_bc=3
 castro.hi_bc=3
+castro.use_pslope=0
 """
 
 ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out
@@ -114,16 +86,15 @@ pfile=`ls -t | grep -i hse_512_plt | head -1`
 fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
 
 
+## plm + reflect + pslope
 
-## plm + reflect + nopslope
-
-ofile=plm-reflect-nopslope.converge.out
+ofile=plm-reflect-pslope.converge.out
 
 RUNPARAMS="
 castro.ppm_type=0
 castro.lo_bc=3
 castro.hi_bc=3
-castro.use_pslope=0
+castro.use_pslope=1
 """
 
 ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out