diff --git a/CMakeLists.txt b/CMakeLists.txt
index e34c2051..4fcb09ef 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,21 +26,26 @@ include_directories(${YAML_CPP_INCLUDE_DIR})
 
 find_package(KokkosKernels REQUIRED)
 link_libraries(dl ${YAML_CPP_LIBRARIES} Kokkos::kokkoskernels)
-# Kokkos::BLAS Kokkos::LAPACK)
 
-set(exeName1 shawExe)
-add_executable(${exeName1}
+# executables
+add_executable(
+  shawExe
   ${CMAKE_CURRENT_SOURCE_DIR}/src/kokkos/main_fom.cc)
 
-set(exeName2 extractStateFromSnaps)
-add_executable(${exeName2}
+add_executable(
+  extractStateFromSnaps
   ${CMAKE_CURRENT_SOURCE_DIR}/src/tools/main_extract_state_from_snaps.cc)
 
+# tests
 enable_testing()
 add_subdirectory(tests)
 
 
 
+# ---------------------
+# REMOVE at some point
+# ---------------------
+# Kokkos::BLAS Kokkos::LAPACK)
 #include_directories(${EIGEN_INCLUDE_DIR} ${YAMLCPP_INCLUDE_DIR} ${KOKKOS_INCLUDE_DIR})
 #link_directories   (${BLAS_LIB_DIR} ${YAMLCPP_LIB_DIR})
 #link_libraries	   (dl ${OMPLINK} yaml-cpp Kokkos::kokkoskernels ${BLASLIBNAME} gfortran)
diff --git a/demos/fom_rank1/input.yaml b/demos/demo1/input.yaml
similarity index 100%
rename from demos/fom_rank1/input.yaml
rename to demos/demo1/input.yaml
diff --git a/demos/fom_rank1/plotSeismogram.py b/demos/demo1/plotSeismogram.py
similarity index 90%
rename from demos/fom_rank1/plotSeismogram.py
rename to demos/demo1/plotSeismogram.py
index 6b171fb5..e7c8f8bf 100644
--- a/demos/fom_rank1/plotSeismogram.py
+++ b/demos/demo1/plotSeismogram.py
@@ -8,19 +8,19 @@ def doPlot(panelId, t, data, key):
   plt.subplot(panelId)
   plt.grid('on')
 
-  plt.plot(t, data[key], '-o', color='m',
+  plt.plot(t, data[key], '-o', color='r',
              markerfacecolor='none',
-             markersize=3, linewidth=1, 
+             markersize=0, linewidth=2, 
              label="Receiver at " + key+'\u00b0')
 
   lg = plt.legend(loc="upper right",
              ncol=1, fontsize=15, labelspacing=.3,
              handletextpad=0.2,
              frameon=False, markerscale=0.75)
-  plt.setp(lg.get_texts(), color='w')
+  plt.setp(lg.get_texts(), color='gray')
 
   plt.xlim([-50, 2050])
-  plt.xticks(np.linspace(0, 2000, 6), color='w')
+  plt.xticks(np.linspace(0, 2000, 6), color='gray')
   plt.ylim([-1.6e-6, 1.6e-6])
 
   ylab = r'$v_{\phi}(t)$'
@@ -28,7 +28,7 @@ def doPlot(panelId, t, data, key):
   plt.xlabel(r'Time (seconds)', fontsize=15)
 
   ax = plt.gca()
-  mycolor = 'w'
+  mycolor = 'gray'
   ax.xaxis.label.set_color(mycolor);
   ax.tick_params(axis='x', colors=mycolor)
   ax.yaxis.label.set_color(mycolor);
diff --git a/demos/fom_rank1/plotWavefield.py b/demos/demo1/plotWavefield.py
similarity index 100%
rename from demos/fom_rank1/plotWavefield.py
rename to demos/demo1/plotWavefield.py
diff --git a/demos/fom_rank1_sample_depth/input.yaml b/demos/demo2/input.yaml
similarity index 100%
rename from demos/fom_rank1_sample_depth/input.yaml
rename to demos/demo2/input.yaml
diff --git a/demos/fom_rank1_sample_depth/plotSeismogram.py b/demos/demo2/plotSeismogram.py
similarity index 88%
rename from demos/fom_rank1_sample_depth/plotSeismogram.py
rename to demos/demo2/plotSeismogram.py
index eb8b6112..5fb3a3b5 100644
--- a/demos/fom_rank1_sample_depth/plotSeismogram.py
+++ b/demos/demo2/plotSeismogram.py
@@ -24,36 +24,36 @@ def doPlot(panelId, t, data, angle, depths):
   d3 = data['d3'][row, :]
 
   plt.title("Seismogram for receiver at " + angle+'\u00b0',
-            fontsize=15, color='w')
+            fontsize=15, color='gray')
 
   plt.plot(t, d0, '-o', color='m',
            markerfacecolor='none',
-           markersize=1, linewidth=1.8,
+           markersize=0, linewidth=1.8,
            label='With source depth='+depths[0]+' km')
 
   plt.plot(t, d1, '-s', color='c',
            markerfacecolor='none',
-           markersize=1, linewidth=1.8,
+           markersize=0, linewidth=1.8,
            label='With source depth='+depths[1]+' km')
 
   plt.plot(t, d2, '-*', color='r',
            markerfacecolor='none',
-           markersize=1, linewidth=1.8,
+           markersize=0, linewidth=1.8,
            label='With source depth='+depths[2]+' km')
 
   plt.plot(t, d3, '-v', color='y',
            markerfacecolor='none',
-           markersize=1, linewidth=1.8,
+           markersize=0, linewidth=1.8,
            label='With source depth='+depths[3]+' km')
 
   lg = plt.legend(loc="upper right",
              ncol=1, fontsize=12, labelspacing=.3,
              handletextpad=0.2,
              frameon=False, markerscale=0.75)
-  plt.setp(lg.get_texts(), color='w')
+  plt.setp(lg.get_texts(), color='gray')
 
   plt.xlim([-50, 2050])
-  plt.xticks(np.linspace(0, 2000, 6), color='w')
+  plt.xticks(np.linspace(0, 2000, 6), color='gray')
   plt.ylim([-2.5e-6, 2.5e-6])
 
   ylab = r'$v_{\phi}(t)$'
@@ -61,7 +61,7 @@ def doPlot(panelId, t, data, angle, depths):
   plt.xlabel(r'Time (seconds)', fontsize=15)
 
   ax = plt.gca()
-  mycolor = 'w'
+  mycolor = 'gray'
   ax.xaxis.label.set_color(mycolor);
   ax.tick_params(axis='x', colors=mycolor)
   ax.yaxis.label.set_color(mycolor);
diff --git a/demos/fom_rank1_sample_depth/plotWavefield.py b/demos/demo2/plotWavefield.py
similarity index 93%
rename from demos/fom_rank1_sample_depth/plotWavefield.py
rename to demos/demo2/plotWavefield.py
index 85395dc8..ec0bdf57 100644
--- a/demos/fom_rank1_sample_depth/plotWavefield.py
+++ b/demos/demo2/plotWavefield.py
@@ -35,7 +35,7 @@ def doPlot(th, r, z, figID, bd, outName, title, plotSource=False):
   fig1 = plt.figure(figID)
   ax1 = fig1.add_subplot(111, projection='polar')
 
-  h1=ax1.pcolormesh(th, r, z, cmap=cm1, shading = "flat",
+  h1=ax1.pcolormesh(th, r, z, cmap=cm1, shading = "auto",
                     vmin=bd[0], vmax=bd[1], zorder=1)
   ax1.set_ylim([cmbRadius, earthRadius])
   ax1.set_yticks([]) #[3480, 5701, 6371])
@@ -47,7 +47,7 @@ def doPlot(th, r, z, figID, bd, outName, title, plotSource=False):
                        r'$\pi/2$', r'$2\pi/6$', r'$\pi/6$', r'$0$'],
                       fontsize=11)
 
-  ax1.set_title(title, fontsize=15, color='w')
+  ax1.set_title(title, fontsize=15, color='gray')
   ax1.set_rorigin(-1)
   plotEarthSurf(ax1)
   plotCMB(ax1)
@@ -57,9 +57,9 @@ def doPlot(th, r, z, figID, bd, outName, title, plotSource=False):
   if plotSource:
     sourceRadius = earthRadius-640. #[km]
     c = ax1.scatter(np.pi/2.01, sourceRadius, c='r', s=15)  
-    ax1.text(np.pi/2.01, sourceRadius, "Source", horizontalalignment='center', verticalalignment='top', color='w')  
+    ax1.text(np.pi/2.01, sourceRadius, "Source", horizontalalignment='center', verticalalignment='top', color='gray')  
 
-  mycolor = 'w'
+  mycolor = 'gray'
   ax1.xaxis.label.set_color(mycolor);
   ax1.tick_params(axis='x', colors=mycolor)
   ax1.yaxis.label.set_color(mycolor);
diff --git a/demos/fom_rank2_sample_depth/input.yaml b/demos/demo3/input.yaml
similarity index 100%
rename from demos/fom_rank2_sample_depth/input.yaml
rename to demos/demo3/input.yaml
diff --git a/demos/fom_rank2_sample_depth/plotSeismogram.py b/demos/demo3/plotSeismogram.py
similarity index 95%
rename from demos/fom_rank2_sample_depth/plotSeismogram.py
rename to demos/demo3/plotSeismogram.py
index 0533ddda..2d7781be 100644
--- a/demos/fom_rank2_sample_depth/plotSeismogram.py
+++ b/demos/demo3/plotSeismogram.py
@@ -22,7 +22,7 @@ def doPlot(panelId, t, data, angle, depths):
   d3 = data['d3'][row, :]
 
   plt.title("Seismogram for receiver at " + angle+'\u00b0',
-            fontsize=15, color='w')
+            fontsize=15, color='gray')
 
   plt.plot(t, d0, '-o', color='m',
            markerfacecolor='none',
@@ -48,10 +48,10 @@ def doPlot(panelId, t, data, angle, depths):
              ncol=1, fontsize=12, labelspacing=.3,
              handletextpad=0.2,
              frameon=False, markerscale=0.75)
-  plt.setp(lg.get_texts(), color='w')
+  plt.setp(lg.get_texts(), color='gray')
 
   plt.xlim([-50, 2050])
-  plt.xticks(np.linspace(0, 2000, 6), color='w')
+  plt.xticks(np.linspace(0, 2000, 6), color='gray')
   plt.ylim([-2.5e-6, 2.5e-6])
 
   ylab = r'$v_{\phi}(t)$'
@@ -59,7 +59,7 @@ def doPlot(panelId, t, data, angle, depths):
   plt.xlabel(r'Time (seconds)', fontsize=15)
 
   ax = plt.gca()
-  mycolor = 'w'
+  mycolor = 'gray'
   ax.xaxis.label.set_color(mycolor);
   ax.tick_params(axis='x', colors=mycolor)
   ax.yaxis.label.set_color(mycolor);
diff --git a/docs/.doctrees/build_expert.doctree b/docs/.doctrees/build_expert.doctree
index 1d7082d7..00c628cb 100644
Binary files a/docs/.doctrees/build_expert.doctree and b/docs/.doctrees/build_expert.doctree differ
diff --git a/docs/.doctrees/build_stepbystep.doctree b/docs/.doctrees/build_stepbystep.doctree
index 372f4990..b74d0b0d 100644
Binary files a/docs/.doctrees/build_stepbystep.doctree and b/docs/.doctrees/build_stepbystep.doctree differ
diff --git a/docs/.doctrees/demo1.doctree b/docs/.doctrees/demo1.doctree
new file mode 100644
index 00000000..236f18a2
Binary files /dev/null and b/docs/.doctrees/demo1.doctree differ
diff --git a/docs/.doctrees/demo2.doctree b/docs/.doctrees/demo2.doctree
new file mode 100644
index 00000000..7ec46dd0
Binary files /dev/null and b/docs/.doctrees/demo2.doctree differ
diff --git a/docs/.doctrees/demo3.doctree b/docs/.doctrees/demo3.doctree
new file mode 100644
index 00000000..857cedcd
Binary files /dev/null and b/docs/.doctrees/demo3.doctree differ
diff --git a/docs/.doctrees/demo_rank1fom.doctree b/docs/.doctrees/demo_rank1fom.doctree
index dd3a48bb..2653b94f 100644
Binary files a/docs/.doctrees/demo_rank1fom.doctree and b/docs/.doctrees/demo_rank1fom.doctree differ
diff --git a/docs/.doctrees/demo_rank1fommulti.doctree b/docs/.doctrees/demo_rank1fommulti.doctree
index 56990079..2054867b 100644
Binary files a/docs/.doctrees/demo_rank1fommulti.doctree and b/docs/.doctrees/demo_rank1fommulti.doctree differ
diff --git a/docs/.doctrees/demo_rank2fom.doctree b/docs/.doctrees/demo_rank2fom.doctree
index d78ec261..ffa05e37 100644
Binary files a/docs/.doctrees/demo_rank2fom.doctree and b/docs/.doctrees/demo_rank2fom.doctree differ
diff --git a/docs/.doctrees/demos.doctree b/docs/.doctrees/demos.doctree
index c1d1055c..e2aa5885 100644
Binary files a/docs/.doctrees/demos.doctree and b/docs/.doctrees/demos.doctree differ
diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle
index a29fc153..4ac7e0a3 100644
Binary files a/docs/.doctrees/environment.pickle and b/docs/.doctrees/environment.pickle differ
diff --git a/docs/.doctrees/goveq.doctree b/docs/.doctrees/goveq.doctree
index 8548c276..4c51f9e8 100644
Binary files a/docs/.doctrees/goveq.doctree and b/docs/.doctrees/goveq.doctree differ
diff --git a/docs/.doctrees/index.doctree b/docs/.doctrees/index.doctree
index 1a24cfe1..3952622e 100644
Binary files a/docs/.doctrees/index.doctree and b/docs/.doctrees/index.doctree differ
diff --git a/docs/.doctrees/performance.doctree b/docs/.doctrees/performance.doctree
new file mode 100644
index 00000000..e33b72ef
Binary files /dev/null and b/docs/.doctrees/performance.doctree differ
diff --git a/docs/_images/demo1_f1.png b/docs/_images/demo1_f1.png
index aff98f30..4f1db28a 100644
Binary files a/docs/_images/demo1_f1.png and b/docs/_images/demo1_f1.png differ
diff --git a/docs/_images/demo1_f2.png b/docs/_images/demo1_f2.png
index 4f97830e..589733df 100644
Binary files a/docs/_images/demo1_f2.png and b/docs/_images/demo1_f2.png differ
diff --git a/docs/_images/demo1_f3.png b/docs/_images/demo1_f3.png
index 89a2c759..f08bd1d8 100644
Binary files a/docs/_images/demo1_f3.png and b/docs/_images/demo1_f3.png differ
diff --git a/docs/_images/demo1_f4.png b/docs/_images/demo1_f4.png
index 99943b4b..efd09468 100644
Binary files a/docs/_images/demo1_f4.png and b/docs/_images/demo1_f4.png differ
diff --git a/docs/_images/demo2_f1.png b/docs/_images/demo2_f1.png
index 6b5a0f75..3712e5b7 100644
Binary files a/docs/_images/demo2_f1.png and b/docs/_images/demo2_f1.png differ
diff --git a/docs/_images/demo2_f2.png b/docs/_images/demo2_f2.png
index 8c4f32b5..d46e710b 100644
Binary files a/docs/_images/demo2_f2.png and b/docs/_images/demo2_f2.png differ
diff --git a/docs/_images/demo2_f3.png b/docs/_images/demo2_f3.png
index a8ec7d43..baef6aa5 100644
Binary files a/docs/_images/demo2_f3.png and b/docs/_images/demo2_f3.png differ
diff --git a/docs/_images/demo3_f1.png b/docs/_images/demo3_f1.png
index eb24f6d1..2a2efe60 100644
Binary files a/docs/_images/demo3_f1.png and b/docs/_images/demo3_f1.png differ
diff --git a/docs/_images/fom_cpu_ave.png b/docs/_images/fom_cpu_ave.png
new file mode 100644
index 00000000..6f5b740b
Binary files /dev/null and b/docs/_images/fom_cpu_ave.png differ
diff --git a/docs/_sources/build_expert.rst.txt b/docs/_sources/build_expert.rst.txt
index 8852ea28..5d37521b 100644
--- a/docs/_sources/build_expert.rst.txt
+++ b/docs/_sources/build_expert.rst.txt
@@ -1,11 +1,12 @@
 Building: "expert" mode
 =======================
 
-You need:
+Prerequisites
+-------------
 
 * This repo: ``git clone https://github.com/Pressio/SHAW``
 
-* C++14 compiler: we have tested this with GCC 8.3.1 and GCC 8.4.0
+* C++14 compiler: we have tested this with GCC 8.3.1, GCC 8.4.0, GCC 10.2.0.
 
 * ``CMake>=3.16.0``
 
@@ -17,20 +18,19 @@ You need:
 * `yaml-cpp <https://github.com/jbeder/yaml-cpp>`_: last tested version ``0.7.0``
 
 
-Then, as usual for a typical CMake project, you can do:
+Build
+-----
 
 .. code-block:: shell
 
-   export CXX=<path-to-your-C++-compiler>
-   export SHAWDIR=<path-to-where-you-cloned-the-shaw-repository>
-
-   mkdir build && cd build
-
    cmake \
-   -DKokkosKernels_DIR=<your-kernels-install-path>/lib/cmake/KokkosKernels/ \
-   -Dyaml-cpp_DIR=<your-yamlcpp-install-path>/share/cmake/ \
-   ${SHAWDIR}
+   -DCMAKE_CXX_COMPILER=<fullpath-to-your-C++-compiler> \
+   -DKokkosKernels_DIR=<fullpath-to-your-kernels-install-path>/lib/cmake/KokkosKernels/ \
+   -Dyaml-cpp_DIR=<fullpath-to-your-yamlcpp-install-path>/share/cmake/ \
+   -B <fullpath-to-where-you-want-to-build-the-code> \
+   -S <fullpath-to-your-shaw-repository>
 
+   # from within your build dir
    make -j4
 
    # running the tests is advised
diff --git a/docs/_sources/build_stepbystep.rst.txt b/docs/_sources/build_stepbystep.rst.txt
index 3e081255..aa66dceb 100644
--- a/docs/_sources/build_stepbystep.rst.txt
+++ b/docs/_sources/build_stepbystep.rst.txt
@@ -5,8 +5,8 @@ If you are reading this page, it likely is because you want
 a simplified (automated) way the get this done,
 so that you can minimize the extra effort
 in building the TPLs needed and the SHAW code.
-This page tries to address this: it provides a step-by-step
-guide that leverages some scripts we have prepared to simplify this.
+This page tries to do so providing a step-by-step
+guide and some scripts we have prepared.
 
 
 Prerequisites
@@ -28,22 +28,19 @@ Prerequisites
 Step 1: Prepare environment
 ----------------------------
 
-Let's make things easy:
-
 .. code-block:: shell
 
-   export CXX=<path-to-your-C++14-compiler>
-   export SHAWDIR=<path-to-where-you-cloned-the-SHAW-repository>
+   export CXX=<fullpath-to-your-C++14-compiler>
+   export SHAWDIR=<fullpath-to-where-you-cloned-the-SHAW-repository>
 
    export WORKDIR=${HOME}/myFirstShawBuild
    mkdir -p ${WORKDIR}
 
 
 Step 2: Build TPLs
---------------------------------
+------------------
 
-To simplify this part, we have prepared script that
-automates getting the TPLs:
+We have prepared a script that automates this:
 
 .. code-block:: shell
 
@@ -51,12 +48,12 @@ automates getting the TPLs:
    bash build_tpls.sh ${WORKDIR} openmp
 
 This script will fetch, build and install inside ``WORKDIR/tpls``
-all TPLs needed: Kokkos-core, Kokkos-kernelas and yaml-cpp.
+all TPLs needed: Kokkos-core, Kokkos-kernels and yaml-cpp.
 
 .. Attention::
 
-   This will build Kokkos for host-only use with the OpenMP backend
-   but **without** any architecture specifications. This is on purpose,
+   This builds Kokkos with only the OpenMP backend and **without**
+   any architecture specifications. This is on purpose,
    because this step is meant to be as generic and simple as possible to get
    you started quickly. If you want to customize things, read
    more on the `Kokkos github <https://github.com/kokkos>`_.
@@ -90,14 +87,16 @@ Step 3: Build SHAW
 .. code-block:: shell
 
    cd ${WORKDIR}
-   mkdir shaw-build && cd shaw-build
 
+   # note that here there is not need to specify compiler because
+   # cmake will automatically pick the up the env var CXX tha we
+   # already set above in step 1
    cmake \
      -DKokkosKernels_DIR=${WORKDIR}/tpls/kokkos-kernels-install/lib/cmake/KokkosKernels/ \
      -Dyaml-cpp_DIR=${WORKDIR}/tpls/yamlcpp-install/share/cmake/ \
-     ${SHAWDIR}
+     -B ${WORKDIR}/shaw-build \
+     -S ${SHAWDIR}
 
+   cd ${WORKDIR}/shaw-build
    make -j4
-
-   # running the SHAW tests is advised
    ctest
diff --git a/docs/_sources/demo1.rst.txt b/docs/_sources/demo1.rst.txt
new file mode 100644
index 00000000..6716106d
--- /dev/null
+++ b/docs/_sources/demo1.rst.txt
@@ -0,0 +1,143 @@
+Demo 1
+======
+
+.. admonition:: Description:
+
+   This demo simulates the wave dynamic
+   for a single forcing using the PREM Earth's model.
+
+
+1. Prepare
+----------
+
+.. code-block:: bash
+
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
+
+   # create a dir to run the demo
+   export MYRUNDIR=${HOME}/myFirstDemo
+   mkdir -p ${MYRUNDIR}
+
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
+
+|
+
+2. Generate the mesh
+--------------------
+
+We use a grid of ``200`` x ``1000`` velocity points
+along the radial and polar directions, respectively.
+
+To generate the mesh files proceed as follows:
+
+.. code-block:: bash
+
+   cd ${SHAWDIR}/meshing
+   python create_single_mesh.py -nr 200 -nth 1000 -working-dir ${MYRUNDIR}
+
+Note that the grid generator script only needs the velocity points
+because the stress points are defined automatically
+based on the :ref:`staggered scheme <discretization>`.
+
+After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` directory containing:
+
+.. code-block:: bash
+
+   .
+   ├── [4.5M]  coeff_vp.dat
+   ├── [ 28M]  graph_sp.dat
+   ├── [ 16M]  graph_vp.dat
+   └── [ 231]  mesh_info.dat
+
+|
+
+3. Input file
+-------------
+
+We use the following input file (:doc:`learn more about input file <inputfile>`):
+
+.. literalinclude :: ../../demos/demo1/input.yaml
+  :language: yaml
+
+which we have ready for you to copy as:
+
+.. code-block:: bash
+
+   cp ${SHAWDIR}/demos/demo1/input.yaml ${MYRUNDIR}
+
+|
+
+4. Run the simulation
+---------------------
+
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+
+   # soft link the executable
+   ln -s ${EXEDIR}/shawExe .
+
+   # if you use OpenMP build, remember to set
+   # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread
+   ./shawExe input.yaml
+
+|
+
+5. Post-process data
+--------------------
+
+The demo should generate inside ``${MYRUNDIR}`` the following:
+
+.. code-block:: bash
+
+   coords_sp.txt #: coordinates of the velocity grid points
+   coords_vp.txt #: coordinates of the stresses grid points
+   seismogram_0  #: seismogram at the receiver locations set in input.yaml
+   snaps_vp_0    #: snapshot matrix for the velocity
+   snaps_sp_0    #: snapshot matrix for the stresses
+
+
+We created Python scripts for this:
+
+.. code-block:: bash
+
+   cp ${SHAWDIR}/demos/demo1/*.py ${MYRUNDIR}
+
+
+First, the seismogram data:
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   python plotSeismogram.py
+
+
+.. image:: ../img/demo1_f1.png
+
+
+Then, contour plots of the velocity field at ``t=250, 1000, 2000`` (seconds):
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   ln -s ${EXEDIR}/extractStateFromSnaps .
+
+   # extract from the velocity snapshots the velocity field at specific timesteps:
+   # since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``,
+   # correspond to *time steps* 1000, 4000, 8000
+   ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \
+     --outformat=ascii --timesteps=1000 4000 8000 \
+     --samplingfreq=100 --outfileappend=vp
+
+   python plotWavefield.py
+
+
+.. image:: ../img/demo1_f2.png
+   :width: 30%
+.. image:: ../img/demo1_f3.png
+   :width: 28%
+.. image:: ../img/demo1_f4.png
+   :width: 28%
diff --git a/docs/src/demo_rank1fommulti.rst b/docs/_sources/demo2.rst.txt
similarity index 69%
rename from docs/src/demo_rank1fommulti.rst
rename to docs/_sources/demo2.rst.txt
index b6a9282b..c8ef9681 100644
--- a/docs/src/demo_rank1fommulti.rst
+++ b/docs/_sources/demo2.rst.txt
@@ -1,27 +1,31 @@
-(2): Multi-forcing via rank-1
-=============================
+Demo 2
+======
 
-Before you start
+.. admonition:: Description:
 
-Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.
+   This demo simulates the wave dynamic for multiple
+   forcings using the rank-1 formulation and the PREM Earth's model.
 
 
-Prepare environment
--------------------
+1. Prepare
+----------
 
 .. code-block:: bash
 
-   export ESWSRCDIR=<fullpath-to-the-source-code-repository>
-   export SHAWEXEDIR=<fullpath-to-where-you-built-the-code-executables>
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
 
    # create a dir to run the demo
    export MYRUNDIR=${HOME}/mySecondDemo
    mkdir -p ${MYRUNDIR}
 
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
+
+|
 
-Generating the mesh
--------------------
+2. Generate the mesh
+--------------------
 
 For this demo, we use a grid of ``256`` x ``1024`` velocity points
 along the radial and polar directions, respectively.
@@ -29,7 +33,7 @@ To generate the mesh files proceed as follows:
 
 .. code-block:: bash
 
-   cd ${ESWSRCDIR}/meshing
+   cd ${SHAWDIR}/meshing
    python create_single_mesh.py -nr 256 -nth 1024 -working-dir ${MYRUNDIR}
 
 
@@ -43,60 +47,31 @@ After generating the grid, you should have a ``${MYRUNDIR}/mesh256x1024`` direct
    ├── [ 21M]  graph_vp.dat
    └── [ 231]  mesh_info.dat
 
+|
 
-Input file
-----------
-
-We use the following input file (`learn more about input file <{filename}/inputfile.rst>`_):
-
-.. code-block:: yaml
-
-  general:
-    meshDir: ./mesh256x1024
-    dt: 0.25
-    finalTime: 2000.0
-    checkNumericalDispersion: true
-    checkCfl: true
+3. Input file
+-------------
 
-  io:
-    snapshotMatrix:
-      binary: true
-      velocity: {freq: 100, fileName: snaps_vp}
-      stress:   {freq: 100, fileName: snaps_sp}
+We use the following input file (:doc:`learn more about input file <inputfile>`):
 
-  seismogram:
-    binary: false
-    freq: 4
-    receivers: [5,30,55,80,105,130,155,175]
+.. literalinclude :: ../../demos/demo2/input.yaml
+  :language: yaml
 
-  source:
-    signal:
-      kind: ricker
-
-      # here we pass a list of depths to use as samples
-      # this will automatically activate sampling
-      depth: [240.,440.,540.,700.]
-
-      period: 65.0
-      delay: 180.0
-
-  material:
-    kind: prem
-
-You can get the input file as:
+which we have ready for you to copy as:
 
 .. code-block:: bash
 
-   cp ${ESWSRCDIR}/demos/fom_rank1_sample_depth/input.yaml ${MYRUNDIR}
+   cp ${SHAWDIR}/demos/demo2/input.yaml ${MYRUNDIR}
 
+|
 
-Run the simulation
-------------------
+4. Run the simulation
+---------------------
 
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/shawExe .
+   ln -s ${EXEDIR}/shawExe .
 
    # if you use OpenMP build, remember to set
    # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread
@@ -109,8 +84,8 @@ and using a serial build of the code, each individual realization takes approxim
 of which the IO time for data collection is less than 1 second.
 
 
-Simulation data
----------------
+5. Simulation data
+------------------
 
 After running the demo (have some patience because it takes some a couple minutes
 if you use the serial mode), you should have inside ``${MYRUNDIR}`` the following files:
@@ -136,8 +111,8 @@ if you use the serial mode), you should have inside ``${MYRUNDIR}`` the followin
    snaps_sp_3    #: stresses snapshots for depth = 700
 
 
-Post-process data
------------------
+6. Post-process data
+--------------------
 
 To post-process the data, get the Python scripts created
 for this demo and visualize the seismogram:
@@ -145,7 +120,7 @@ for this demo and visualize the seismogram:
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   cp ${ESWSRCDIR}/demos/fom_rank1_sample_depth/plotSeismogram.py .
+   cp ${SHAWDIR}/demos/demo2/plotSeismogram.py .
    python plotSeismogram.py
 
 
@@ -158,7 +133,7 @@ for ``depth=240`` and ``depth=700``
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/extractStateFromSnaps .
+   ln -s ${EXEDIR}/extractStateFromSnaps .
 
    # snaps_vp_0 contains snapshots for depth=240 km
    # extract target state and write to file appending vp_d240 to identify the case
@@ -170,6 +145,7 @@ for ``depth=240`` and ``depth=700``
    ./extractStateFromSnaps --snaps=./snaps_vp_3 binary --fsize=1 \
      --outformat=ascii --timesteps=8000  --samplingfreq=100 --outfileappend=vp_d700
 
+   cp ${SHAWDIR}/demos/demo2/plotWavefield.py .
    python plotWavefield.py
 
 And plot them below, showing as expected the largely different pattern
diff --git a/docs/src/demo_rank2fom.rst b/docs/_sources/demo3.rst.txt
similarity index 58%
rename from docs/src/demo_rank2fom.rst
rename to docs/_sources/demo3.rst.txt
index 0cf06f52..51248069 100644
--- a/docs/src/demo_rank2fom.rst
+++ b/docs/_sources/demo3.rst.txt
@@ -1,75 +1,64 @@
-(3): Multi-forcing via rank-2
-=============================
+Demo 3
+======
 
-Before you start
+.. admonition:: Description:
 
-Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.
+   This demo simulates the wave dynamic for multiple
+   forcings using the rank-2 formulation and the PREM Earth's model.
+   For the sake of demonstration, this demo solves *the same problem*
+   described in :doc:`demo2`, except that here we use the *rank-2 formulation*,
+   which allows us to simulate several trajectories simultaneously.
 
-For the sake of demonstration, this demo solves *the same problem*
-described in the rank-1 demo, except that
-here we use the *rank-2 formulation*, which allows us to simulate several
-trajectories simultaneously.
-
-
-Prepare environment
---------------------
+1. Prepare
+----------
 
 .. code-block:: bash
 
-   export ESWSRCDIR=<fullpath-to-the-source-code-repository>
-   export SHAWEXEDIR=<fullpath-to-where-you-built-the-code-executables>
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
 
    # create a dir to run the demo
    export MYRUNDIR=${HOME}/myThirdDemo
    mkdir -p ${MYRUNDIR}
 
 
-Generating the mesh
--------------------
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
 
-This is identical to the mesh used in this demo:
+|
+
+2. Generate the mesh
+--------------------
 
 .. code-block:: bash
 
-   cd ${ESWSRCDIR}/meshing
+   cd ${SHAWDIR}/meshing
    python create_single_mesh.py -nr 256 -nth 1024 -working-dir ${MYRUNDIR}
 
+3. Input file
+-------------
 
+We use the following input file (:doc:`learn more about input file <inputfile>`):
 
-Input file
-----------
+.. literalinclude :: ../../demos/demo3/input.yaml
+  :language: yaml
 
-The input file is identical to `the one for the rank-1 demo <{filename}/rank1fommulti.rst>`_,
-except for the addition of one line to the ``source`` section:
 
-.. code-block:: yaml
-
-  #
-  # general, io, material: as in the other demo
-  #
-  source:
-    signal:
-      # kind, depth, period, delay: same as the other one
-      # ...
-
-      # forcingSize defines how many simultaneous trajectories to compute
-      forcingSize: 4
-
-The full input file can be copied:
+which we have ready for you to copy as:
 
 .. code-block:: bash
 
-   cp ${ESWSRCDIR}/demos/fom_rank2_sample_depth/input.yaml ${MYRUNDIR}
+   cp ${SHAWDIR}/demos/demo3/input.yaml ${MYRUNDIR}
 
+|
 
-Run the simulation
-------------------
+4. Run the simulation
+---------------------
 
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/shawExe .
+   ln -s ${EXEDIR}/shawExe .
    ./shawExe input.yaml
 
 To give an idea of runtime, on a MacPro with 2.4 GHz 8-Core Intel Core i9 and 32 GB 2667 MHz DDR4,
@@ -77,12 +66,12 @@ and using a serial build of the code, the run takes approximately 107 seconds,
 of which the IO time for data collection is less than 1 second.
 Note that this already gives a hint to the advantages of using the rank-2 formulation.
 In fact, while here it takes 107 seconds to simulate the four trajectories simultaneously,
-in the `rank-1 version of this demo <{filename}/rank1fommulti.rst>`_ it took
+in :doc:`rank-1 version of this demo <demo2>` it took
 about 150 seconds to simulate the same realizations.
 
 
-Simulation data
----------------
+5. Simulation data
+------------------
 
 The demo should generate inside ``${MYRUNDIR}`` the following:
 
@@ -104,8 +93,8 @@ The demo should generate inside ``${MYRUNDIR}`` the following:
    snaps_sp_0    #: snapshot matrix for the stresses for all realizations
 
 
-Post-process data
------------------
+6. Post-process data
+--------------------
 
 To post-process the data, get the Python scripts created
 for this demo and visualize the seismogram:
@@ -113,7 +102,7 @@ for this demo and visualize the seismogram:
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   cp ${ESWSRCDIR}/demos/fom_rank2_sample_depth/plotSeismogram.py .
+   cp ${SHAWDIR}/demos/demo3/plotSeismogram.py .
    python plotSeismogram.py
 
 Which generates a figure identical to the `seismogram plot obtained with the rank-1 <{filename}/rank1fommulti.rst>`_
diff --git a/docs/_sources/demo_rank1fom.rst.txt b/docs/_sources/demo_rank1fom.rst.txt
index 323b02ca..c661fcfc 100644
--- a/docs/_sources/demo_rank1fom.rst.txt
+++ b/docs/_sources/demo_rank1fom.rst.txt
@@ -1,37 +1,45 @@
-(1): Single Forcing
-===================
+Demo 1
+======
 
-Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.
+.. admonition:: Description:
 
-Prepare environment
--------------------
+   This demo simulates the wave dynamic
+   for a single forcing using the PREM Earth's model.
+
+
+1. Prepare
+----------
 
 .. code-block:: bash
 
-   export ESWSRCDIR=<fullpath-to-the-source-code-repository>
-   export SHAWEXEDIR=<fullpath-to-where-you-built-the-code-executables>
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
 
    # create a dir to run the demo
    export MYRUNDIR=${HOME}/myFirstDemo
    mkdir -p ${MYRUNDIR}
 
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
 
-Generating the mesh
--------------------
+|
 
-For this demo, we use a grid of ``200`` x ``1000`` velocity points
+2. Generate the mesh
+--------------------
+
+We use a grid of ``200`` x ``1000`` velocity points
 along the radial and polar directions, respectively.
+
 To generate the mesh files proceed as follows:
 
 .. code-block:: bash
 
-   cd ${ESWSRCDIR}/meshing
+   cd ${SHAWDIR}/meshing
    python create_single_mesh.py -nr 200 -nth 1000 -working-dir ${MYRUNDIR}
 
 Note that the grid generator script only needs the velocity points
 because the stress points are defined automatically
-based on the `staggered scheme <{filename}/goveq.rst>`_.
+based on the :ref:`staggered scheme <discretization>`.
 
 After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` directory containing:
 
@@ -43,66 +51,43 @@ After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` direct
    ├── [ 16M]  graph_vp.dat
    └── [ 231]  mesh_info.dat
 
+|
 
-Input file
-----------
+3. Input file
+-------------
 
-We use the following input file (`learn more about input file <{filename}/inputfile.rst>`_):
-
-.. code-block:: yaml
-
-   general:
-     # meshDir should contain the full path to the mesh directory
-     # as generated by the python script `meshing/create_single_mesh.py`
-     # we assume the input file is in the same location as mesh dir
-     meshDir: ./mesh200x1000
-     dt: 0.25
-     finalTime: 2000.0
-     checkNumericalDispersion: true
-     checkCfl: true
-
-   io:
-     snapshotMatrix:
-       binary: true
-       velocity: {freq: 100, fileName: snaps_vp}
-       stress:   {freq: 100, fileName: snaps_sp}
-     seismogram:
-       binary: false
-       freq: 4
-       receivers: [5,30,55,80,105,130,155,175]
-
-   source:
-     signal:
-       kind: ricker
-       depth: 640.0  # km
-       period: 65.0  # seconds
-       delay: 180.0  # seconds
-
-   material:
-     kind: prem
-
-You can get the input file as:
+We use the following input file (:doc:`learn more about input file <inputfile>`):
+
+.. literalinclude :: ../../demos/fom_rank1/input.yaml
+  :language: yaml
+
+which we have ready for you to copy as:
 
 .. code-block:: bash
 
-   cp ${ESWSRCDIR}/demos/fom_rank1/input.yaml ${MYRUNDIR}
+   cp ${SHAWDIR}/demos/fom_rank1/input.yaml ${MYRUNDIR}
+
+|
 
-Run the simulation
-------------------
+4. Run the simulation
+---------------------
 
 
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/shawExe .
+
+   # soft link the executable
+   ln -s ${EXEDIR}/shawExe .
 
    # if you use OpenMP build, remember to set
    # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread
    ./shawExe input.yaml
 
+|
 
-Simulation data
----------------
+5. Post-process data
+--------------------
 
 The demo should generate inside ``${MYRUNDIR}`` the following:
 
@@ -115,17 +100,14 @@ The demo should generate inside ``${MYRUNDIR}`` the following:
    snaps_sp_0    #: snapshot matrix for the stresses
 
 
-Post-process data
------------------
-
-To post-process the data, you can use the Python scripts created for this demo:
+We created Python scripts for this:
 
 .. code-block:: bash
 
-   cp ${ESWSRCDIR}/demos/fom_rank1/*.py ${MYRUNDIR}
+   cp ${SHAWDIR}/demos/fom_rank1/*.py ${MYRUNDIR}
 
 
-First, we visualize the seismogram data by doing:
+First, the seismogram data:
 
 .. code-block:: bash
 
@@ -136,18 +118,19 @@ First, we visualize the seismogram data by doing:
 .. image:: ../img/demo1_f1.png
 
 
-Then, we can extract and visualize the full wavefield at ``t=250, 1000, 2000`` (seconds):
+Then, contour plots of the velocity field at ``t=250, 1000, 2000`` (seconds):
 
 .. code-block:: bash
 
    cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/extractStateFromSnaps .
+   ln -s ${EXEDIR}/extractStateFromSnaps .
 
    # extract from the velocity snapshots the velocity field at specific timesteps:
    # since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``,
    # correspond to *time steps* 1000, 4000, 8000
    ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \
-     --outformat=ascii --timesteps=1000 4000 8000  --samplingfreq=100 --outfileappend=vp
+     --outformat=ascii --timesteps=1000 4000 8000 \
+     --samplingfreq=100 --outfileappend=vp
 
    python plotWavefield.py
 
diff --git a/docs/_sources/demo_rank1fommulti.rst.txt b/docs/_sources/demo_rank1fommulti.rst.txt
index b6a9282b..ae865100 100644
--- a/docs/_sources/demo_rank1fommulti.rst.txt
+++ b/docs/_sources/demo_rank1fommulti.rst.txt
@@ -1,14 +1,14 @@
-(2): Multi-forcing via rank-1
-=============================
+Demo 2
+======
 
-Before you start
+Earth, PREM, Multi-forcing (rank-1)
 
-Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.
+.. Important::
+   You need to have the code built to proceed wit this demo, see :doc:`build_expert` or :doc:`build_stepbystep`.
 
 
-Prepare environment
--------------------
+Prepare
+-------
 
 .. code-block:: bash
 
diff --git a/docs/_sources/demo_rank2fom.rst.txt b/docs/_sources/demo_rank2fom.rst.txt
index 0cf06f52..16822c36 100644
--- a/docs/_sources/demo_rank2fom.rst.txt
+++ b/docs/_sources/demo_rank2fom.rst.txt
@@ -1,19 +1,19 @@
-(3): Multi-forcing via rank-2
-=============================
+Demo 3
+======
 
-Before you start
+Earth, Multi-forcing (rank-2)
+
+.. Important::
+   You need to have the code built to proceed wit this demo, see :doc:`build_expert` or :doc:`build_stepbystep`.
 
-Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.
 
 For the sake of demonstration, this demo solves *the same problem*
 described in the rank-1 demo, except that
 here we use the *rank-2 formulation*, which allows us to simulate several
 trajectories simultaneously.
 
-
-Prepare environment
---------------------
+Prepare
+-------
 
 .. code-block:: bash
 
diff --git a/docs/_sources/demos.rst.txt b/docs/_sources/demos.rst.txt
index 462b35fb..967e4b84 100644
--- a/docs/_sources/demos.rst.txt
+++ b/docs/_sources/demos.rst.txt
@@ -6,6 +6,6 @@ End-to-end Demos
 .. toctree::
     :maxdepth: 2
 
-    demo_rank1fom
-    demo_rank1fommulti
-    demo_rank2fom
\ No newline at end of file
+    demo1
+    demo2
+    demo3
diff --git a/docs/_sources/goveq.rst.txt b/docs/_sources/goveq.rst.txt
index f1ca6d98..2a47616c 100644
--- a/docs/_sources/goveq.rst.txt
+++ b/docs/_sources/goveq.rst.txt
@@ -1,7 +1,7 @@
 Governing equations
 ===================
 
-This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.`
+This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.
 
 Assuming the target body/planet (e.g. Earth) can be approximated as a sphere,
 we adopt a spherical coordinate system as shown in the figure below:
@@ -11,10 +11,10 @@ we adopt a spherical coordinate system as shown in the figure below:
   :align: center
   :alt: Alternative text
 
-
-In the axisymmetric approximation, one assumes that fields/quantities
-do not vary along :math:`\phi`, implying that all the derivatives
-with respect to :math:`\phi` can be dropped.
+.. Important::
+   In the axisymmetric approximation, one assumes that fields/quantities
+   do not vary along :math:`\phi`, implying that all the derivatives
+   with respect to :math:`\phi` can be dropped.
 
 With this assumption, the set of equations governing the time evolution
 of elastic waves in the velocity-stress formulation can be written as:
@@ -44,20 +44,22 @@ where:
 
 - :math:`t` represents time
 
-- :math:`r \in [0, r_{surface}]` is the radial distance from origin to surface of the body
+- :math:`r \in [0, r_{surface}]` is the radial distance from origin to the surface of the body
 
 - :math:`\theta \in [0, \pi]` is the polar angle
 
 - :math:`\rho(r, \theta)` is the density
 
-- :math:`v(r, \theta, t)` is the velocity (for simplicity we drop the subscript, but it is intended to be the :math:`v_{\phi}` velocity component)
+- :math:`v(r, \theta, t)` is the velocity (for simplicity we drop the subscript,
+  but it is intended to be the :math:`v_{\phi}` velocity component)
 
-- :math:`\sigma_{r\phi}(r, \theta, t)` and :math:`\sigma_{\theta\phi}(r, \theta, t)` are the two components of the stress tensor remaining after the axisymmetric approximation
+- :math:`\sigma_{r\phi}(r, \theta, t)` and :math:`\sigma_{\theta\phi}(r, \theta, t)`
+  are the two components of the stress tensor remaining after the axisymmetric approximation
 
 - :math:`f(r, \theta,t)` is the forcing term
 
-- :math:`G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)` is the shear modulus and :math:`v_s` being the shear wave velocity.
-
+- :math:`G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)` is the shear modulus
+  and :math:`v_s` being the shear wave velocity.
 
 
 In practice, the axisymmetric approximation means that one solves the
@@ -66,8 +68,11 @@ Such a formulation is referred to as 2.5-dimensional because it involves
 a 2-dimensional spatial domain (a circular sector of the Earth)
 but models point sources with correct 3-dimensional spreading {cite}.
 
-Note that we assume both the density and shear modulus to only depend on the spatial coordinates.
+.. Note::
+   We assume both the density and shear modulus to only depend on the spatial coordinates.
+
 
+.. _discretization:
 
 Discretization
 ==============
@@ -97,7 +102,7 @@ We remark that, differently than (cite), we do not rely on ghost
 points to impose boundary conditions, but account for the boundary
 conditions directly when assembling the system matrix.
 
-As an example, the figure below shows the grid when modeling the Earth: the computational
+The figure below shows the grid when modeling the Earth: the computational
 domain extends from the surface to the core-mantle boundary, excluding the liquid core.
 
 .. figure:: ../img/mesh.png
diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt
index b21474c6..f892be39 100644
--- a/docs/_sources/index.rst.txt
+++ b/docs/_sources/index.rst.txt
@@ -51,9 +51,9 @@ Highlights and features
 *  The code relies on the `Kokkos programming model <https://github.com/kokkos>`_
    for performance portability
 
-*  We use the :doc:`velocity-stress formulation in an axi-symmetric domain <goveq>`
+*  :doc:`Velocity-stress formulation in an axi-symmetric domain <goveq>`
 
-*  We currently support the following material models:
+*  Support for the following material models:
 
    - :ref:`single layer model <singlelayerdescription>`
 
@@ -66,22 +66,22 @@ Highlights and features
    These are 1D models because they only depend on the radial distance.
    The modularity of the code allows one to easily add new models
 
-*  If you want to simulate the wave dynamics in another
-   planet/axisymmetric body, all you have to do is to create
-   a mesh suitable for that planet, and a suitable material model
+*  Simulating the dynamics in another planet/axisymmetric body is relatively easy:
+   you have to create a mesh suitable for that planet, and a suitable material model
 
 *  The code implements what we refer to as "rank-1" and "rank-2" formulations:
 
    *  *rank-1*:
 
-      * the discrete state and forcing term are stored as 1D arrays
+      * discrete state and forcing are stored as 1D arrays
 
-      * this is used to simulate the wave dynamics due to a *single forcing term*
+      * this is useful to simulate the wave dynamics due to a *single forcing term*
+
+      * :doc:`See the demo! <demo1>`
 
    *  *rank-2*:
 
-      * the discrete state and forcing term are stored
-	using rank-2 tensors (i.e. matrices)
+      * discrete state and forcing are stored using rank-2 tensors (i.e. matrices)
 
       * this is useful to *simultaneously* solve the wave
 	dynamics for *multiple forcing realizations* (e.g. multiple
@@ -90,6 +90,7 @@ Highlights and features
 	it has higher computational intensity, thus benefiting
 	efficient ensemble propagation
 
+      * :doc:`See the demo! <demo3>`
 
 How to cite
 -----------
@@ -122,6 +123,7 @@ Contents
     build_stepbystep
     inputfile
     demos
+    performance
     GitHub Repo <https://github.com/Pressio/SHAW>
     Open an issue/feature req. <https://github.com/Pressio/SHAW/issues>
     license
diff --git a/docs/_sources/performance.rst.txt b/docs/_sources/performance.rst.txt
new file mode 100644
index 00000000..868b3fe0
--- /dev/null
+++ b/docs/_sources/performance.rst.txt
@@ -0,0 +1,23 @@
+
+Performance
+===========
+
+The following plot shows performance results obtained on a workstation
+with two 18-core Intel(R) Xeon(R) Gold 6154 CPU @ 3.00 GHz,
+each with a 24.75MB L3 cache and 125GB total memory.
+We enable hyperthreading, thus supporting a maximum of 36 logical threads per CPU,
+so a total of 72 threads. We use GCC-8.3.1 and rely on kokkos
+and kokkos-kernels version 3.1.01.
+We use Blis-0.7.0 as the kokkos-kernels’ backend for all dense operations.
+We use the OpenMP backend for Kokkos.
+
+|
+
+.. figure:: ../img/fom_cpu_ave.png
+   :align: center
+   :width: 95%
+
+   M represents how many trajectories we are computing simultaneously:
+   when M=1, this what we refer to as rank-1 formulation,
+   while M>=2 corresponds to what we refer to as rank-2 formulation;
+   N is the *total* number of dofs (velocities plus stresses) for the problem.
diff --git a/docs/build_expert.html b/docs/build_expert.html
index ddb2d871..44d34634 100644
--- a/docs/build_expert.html
+++ b/docs/build_expert.html
@@ -134,7 +134,7 @@
           <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
         </button>
       </div>
-      <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
+      <label class="toc-overlay-icon toc-header-icon" for="__toc">
         <div class="visually-hidden">Toggle table of contents sidebar</div>
         <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
       </label>
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -196,7 +197,7 @@
               <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
+          <label class="toc-overlay-icon toc-content-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
             <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
           </label>
@@ -204,33 +205,35 @@
         <article role="main">
           <div class="section" id="building-expert-mode">
 <h1>Building: “expert” mode<a class="headerlink" href="#building-expert-mode" title="Permalink to this headline">¶</a></h1>
-<p>You need:</p>
+<div class="section" id="prerequisites">
+<h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to this headline">¶</a></h2>
 <ul class="simple">
 <li><p>This repo: <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">clone</span> <span class="pre">https://github.com/Pressio/SHAW</span></code></p></li>
-<li><p>C++14 compiler: we have tested this with GCC 8.3.1 and GCC 8.4.0</p></li>
+<li><p>C++14 compiler: we have tested this with GCC 8.3.1, GCC 8.4.0, GCC 10.2.0.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">CMake&gt;=3.16.0</span></code></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">BLAS</span></code> and <code class="docutils literal notranslate"><span class="pre">LAPACK</span></code></p></li>
 <li><p><a class="reference external" href="https://github.com/kokkos/kokkos">Kokkos</a> and
 <a class="reference external" href="https://github.com/kokkos/kokkos-kernels">Kokkos-kernels</a>: last tested version <code class="docutils literal notranslate"><span class="pre">3.5.00</span></code></p></li>
 <li><p><a class="reference external" href="https://github.com/jbeder/yaml-cpp">yaml-cpp</a>: last tested version <code class="docutils literal notranslate"><span class="pre">0.7.0</span></code></p></li>
 </ul>
-<p>Then, as usual for a typical CMake project, you can do:</p>
-<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">CXX</span><span class="o">=</span>&lt;path-to-your-C++-compiler&gt;
-<span class="nb">export</span> <span class="nv">SHAWDIR</span><span class="o">=</span>&lt;path-to-where-you-cloned-the-shaw-repository&gt;
-
-mkdir build <span class="o">&amp;&amp;</span> <span class="nb">cd</span> build
-
-cmake <span class="se">\</span>
--DKokkosKernels_DIR<span class="o">=</span>&lt;your-kernels-install-path&gt;/lib/cmake/KokkosKernels/ <span class="se">\</span>
--Dyaml-cpp_DIR<span class="o">=</span>&lt;your-yamlcpp-install-path&gt;/share/cmake/ <span class="se">\</span>
-<span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>
+</div>
+<div class="section" id="build">
+<h2>Build<a class="headerlink" href="#build" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>cmake <span class="se">\</span>
+-DCMAKE_CXX_COMPILER<span class="o">=</span>&lt;fullpath-to-your-C++-compiler&gt; <span class="se">\</span>
+-DKokkosKernels_DIR<span class="o">=</span>&lt;fullpath-to-your-kernels-install-path&gt;/lib/cmake/KokkosKernels/ <span class="se">\</span>
+-Dyaml-cpp_DIR<span class="o">=</span>&lt;fullpath-to-your-yamlcpp-install-path&gt;/share/cmake/ <span class="se">\</span>
+-B &lt;fullpath-to-where-you-want-to-build-the-code&gt; <span class="se">\</span>
+-S &lt;fullpath-to-your-shaw-repository&gt;
 
+<span class="c1"># from within your build dir</span>
 make -j4
 
 <span class="c1"># running the tests is advised</span>
 ctest
 </pre></div>
 </div>
+</div>
 </div>
 
         </article>
@@ -262,7 +265,7 @@ <h1>Building: “expert” mode<a class="headerlink" href="#building-expert-mode
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
@@ -275,9 +278,29 @@ <h1>Building: “expert” mode<a class="headerlink" href="#building-expert-mode
         
       </footer>
     </div>
-    <aside class="toc-drawer no-toc">
+    <aside class="toc-drawer">
       
       
+      <div class="toc-sticky toc-scroll">
+        <div class="toc-title-container">
+          <span class="toc-title">
+            Contents
+          </span>
+        </div>
+        <div class="toc-tree-container">
+          <div class="toc-tree">
+            <ul>
+<li><a class="reference internal" href="#">Building: “expert” mode</a><ul>
+<li><a class="reference internal" href="#prerequisites">Prerequisites</a></li>
+<li><a class="reference internal" href="#build">Build</a></li>
+</ul>
+</li>
+</ul>
+
+          </div>
+        </div>
+      </div>
+      
       
     </aside>
   </div>
diff --git a/docs/build_stepbystep.html b/docs/build_stepbystep.html
index 31621e40..39a1d49d 100644
--- a/docs/build_stepbystep.html
+++ b/docs/build_stepbystep.html
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -208,8 +209,8 @@ <h1>Building: step-by-step<a class="headerlink" href="#building-step-by-step" ti
 a simplified (automated) way the get this done,
 so that you can minimize the extra effort
 in building the TPLs needed and the SHAW code.
-This page tries to address this: it provides a step-by-step
-guide that leverages some scripts we have prepared to simplify this.</p>
+This page tries to do so providing a step-by-step
+guide and some scripts we have prepared.</p>
 <div class="section" id="prerequisites">
 <h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to this headline">¶</a></h2>
 <ul class="simple">
@@ -229,9 +230,8 @@ <h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to
 </div>
 <div class="section" id="step-1-prepare-environment">
 <h2>Step 1: Prepare environment<a class="headerlink" href="#step-1-prepare-environment" title="Permalink to this headline">¶</a></h2>
-<p>Let’s make things easy:</p>
-<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">CXX</span><span class="o">=</span>&lt;path-to-your-C++14-compiler&gt;
-<span class="nb">export</span> <span class="nv">SHAWDIR</span><span class="o">=</span>&lt;path-to-where-you-cloned-the-SHAW-repository&gt;
+<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">CXX</span><span class="o">=</span>&lt;fullpath-to-your-C++14-compiler&gt;
+<span class="nb">export</span> <span class="nv">SHAWDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-cloned-the-SHAW-repository&gt;
 
 <span class="nb">export</span> <span class="nv">WORKDIR</span><span class="o">=</span><span class="si">${</span><span class="nv">HOME</span><span class="si">}</span>/myFirstShawBuild
 mkdir -p <span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span>
@@ -240,18 +240,17 @@ <h2>Step 1: Prepare environment<a class="headerlink" href="#step-1-prepare-envir
 </div>
 <div class="section" id="step-2-build-tpls">
 <h2>Step 2: Build TPLs<a class="headerlink" href="#step-2-build-tpls" title="Permalink to this headline">¶</a></h2>
-<p>To simplify this part, we have prepared script that
-automates getting the TPLs:</p>
+<p>We have prepared a script that automates this:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/bash_scripts
 bash build_tpls.sh <span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span> openmp
 </pre></div>
 </div>
 <p>This script will fetch, build and install inside <code class="docutils literal notranslate"><span class="pre">WORKDIR/tpls</span></code>
-all TPLs needed: Kokkos-core, Kokkos-kernelas and yaml-cpp.</p>
+all TPLs needed: Kokkos-core, Kokkos-kernels and yaml-cpp.</p>
 <div class="admonition attention">
 <p class="admonition-title">Attention</p>
-<p>This will build Kokkos for host-only use with the OpenMP backend
-but <strong>without</strong> any architecture specifications. This is on purpose,
+<p>This builds Kokkos with only the OpenMP backend and <strong>without</strong>
+any architecture specifications. This is on purpose,
 because this step is meant to be as generic and simple as possible to get
 you started quickly. If you want to customize things, read
 more on the <a class="reference external" href="https://github.com/kokkos">Kokkos github</a>.</p>
@@ -279,16 +278,18 @@ <h2>Step 2: Build TPLs<a class="headerlink" href="#step-2-build-tpls" title="Per
 <div class="section" id="step-3-build-shaw">
 <h2>Step 3: Build SHAW<a class="headerlink" href="#step-3-build-shaw" title="Permalink to this headline">¶</a></h2>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span>
-mkdir shaw-build <span class="o">&amp;&amp;</span> <span class="nb">cd</span> shaw-build
 
+<span class="c1"># note that here there is not need to specify compiler because</span>
+<span class="c1"># cmake will automatically pick the up the env var CXX tha we</span>
+<span class="c1"># already set above in step 1</span>
 cmake <span class="se">\</span>
   -DKokkosKernels_DIR<span class="o">=</span><span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span>/tpls/kokkos-kernels-install/lib/cmake/KokkosKernels/ <span class="se">\</span>
   -Dyaml-cpp_DIR<span class="o">=</span><span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span>/tpls/yamlcpp-install/share/cmake/ <span class="se">\</span>
-  <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>
+  -B <span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span>/shaw-build <span class="se">\</span>
+  -S <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>
 
+<span class="nb">cd</span> <span class="si">${</span><span class="nv">WORKDIR</span><span class="si">}</span>/shaw-build
 make -j4
-
-<span class="c1"># running the SHAW tests is advised</span>
 ctest
 </pre></div>
 </div>
@@ -324,7 +325,7 @@ <h2>Step 3: Build SHAW<a class="headerlink" href="#step-3-build-shaw" title="Per
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/demo_rank1fom.html b/docs/demo1.html
similarity index 74%
rename from docs/demo_rank1fom.html
rename to docs/demo1.html
index 4b647c55..62520e74 100644
--- a/docs/demo_rank1fom.html
+++ b/docs/demo1.html
@@ -2,10 +2,10 @@
 <html class="no-js">
   <head><meta charset="utf-8"/>
     <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="(2): Multi-forcing via rank-1" href="demo_rank1fommulti.html" /><link rel="prev" title="End-to-end Demos" href="demos.html" />
+    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Demo 2" href="demo2.html" /><link rel="prev" title="End-to-end Demos" href="demos.html" />
 
     <meta name="generator" content="sphinx-4.2.0, furo 2021.10.09"/>
-        <title>(1): Single Forcing - SHAW 0.1.0 documentation</title>
+        <title>Demo 1 - SHAW 0.1.0 documentation</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
     <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=0254c309f5cadf746f1a613e7677379ac9c8cdcd" />
     <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 current has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current">
-<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -202,33 +203,43 @@
           </label>
         </div>
         <article role="main">
-          <div class="section" id="single-forcing">
-<h1>(1): Single Forcing<a class="headerlink" href="#single-forcing" title="Permalink to this headline">¶</a></h1>
-<p>Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.</p>
-<div class="section" id="prepare-environment">
-<h2>Prepare environment<a class="headerlink" href="#prepare-environment" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">ESWSRCDIR</span><span class="o">=</span>&lt;fullpath-to-the-source-code-repository&gt;
-<span class="nb">export</span> <span class="nv">SHAWEXEDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-built-the-code-executables&gt;
+          <div class="section" id="demo-1">
+<h1>Demo 1<a class="headerlink" href="#demo-1" title="Permalink to this headline">¶</a></h1>
+<div class="admonition-description admonition">
+<p class="admonition-title">Description:</p>
+<p>This demo simulates the wave dynamic
+for a single forcing using the PREM Earth’s model.</p>
+</div>
+<div class="section" id="prepare">
+<h2>1. Prepare<a class="headerlink" href="#prepare" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">SHAWDIR</span><span class="o">=</span>&lt;fullpath-to-the-source-code-repository&gt;
+<span class="nb">export</span> <span class="nv">EXEDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-built-the-code-executables&gt;
 
 <span class="c1"># create a dir to run the demo</span>
 <span class="nb">export</span> <span class="nv">MYRUNDIR</span><span class="o">=</span><span class="si">${</span><span class="nv">HOME</span><span class="si">}</span>/myFirstDemo
 mkdir -p <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
+<div class="admonition important">
+<p class="admonition-title">Important</p>
+<p>You need to have the code built to proceed, see <a class="reference internal" href="build_expert.html"><span class="doc">Building: “expert” mode</span></a> or <a class="reference internal" href="build_stepbystep.html"><span class="doc">Building: step-by-step</span></a>.</p>
+</div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
-<div class="section" id="generating-the-mesh">
-<h2>Generating the mesh<a class="headerlink" href="#generating-the-mesh" title="Permalink to this headline">¶</a></h2>
-<p>For this demo, we use a grid of <code class="docutils literal notranslate"><span class="pre">200</span></code> x <code class="docutils literal notranslate"><span class="pre">1000</span></code> velocity points
-along the radial and polar directions, respectively.
-To generate the mesh files proceed as follows:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/meshing
+<div class="section" id="generate-the-mesh">
+<h2>2. Generate the mesh<a class="headerlink" href="#generate-the-mesh" title="Permalink to this headline">¶</a></h2>
+<p>We use a grid of <code class="docutils literal notranslate"><span class="pre">200</span></code> x <code class="docutils literal notranslate"><span class="pre">1000</span></code> velocity points
+along the radial and polar directions, respectively.</p>
+<p>To generate the mesh files proceed as follows:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/meshing
 python create_single_mesh.py -nr <span class="m">200</span> -nth <span class="m">1000</span> -working-dir <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
 <p>Note that the grid generator script only needs the velocity points
 because the stress points are defined automatically
-based on the <a class="reference external" href="{filename}/goveq.rst">staggered scheme</a>.</p>
+based on the <a class="reference internal" href="goveq.html#discretization"><span class="std std-ref">staggered scheme</span></a>.</p>
 <p>After generating the grid, you should have a <code class="docutils literal notranslate"><span class="pre">${MYRUNDIR}/mesh200x1000</span></code> directory containing:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>.
 ├── <span class="o">[</span><span class="m">4</span>.5M<span class="o">]</span>  coeff_vp.dat
@@ -237,14 +248,14 @@ <h2>Generating the mesh<a class="headerlink" href="#generating-the-mesh" title="
 └── <span class="o">[</span> <span class="m">231</span><span class="o">]</span>  mesh_info.dat
 </pre></div>
 </div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
 <div class="section" id="input-file">
-<h2>Input file<a class="headerlink" href="#input-file" title="Permalink to this headline">¶</a></h2>
-<p>We use the following input file (<a class="reference external" href="{filename}/inputfile.rst">learn more about input file</a>):</p>
+<h2>3. Input file<a class="headerlink" href="#input-file" title="Permalink to this headline">¶</a></h2>
+<p>We use the following input file (<a class="reference internal" href="inputfile.html"><span class="doc">learn more about input file</span></a>):</p>
 <div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">general</span><span class="p">:</span>
-  <span class="c1"># meshDir should contain the full path to the mesh directory</span>
-  <span class="c1"># as generated by the python script `meshing/create_single_mesh.py`</span>
-  <span class="c1"># we assume the input file is in the same location as mesh dir</span>
   <span class="nt">meshDir</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">./mesh200x1000</span>
   <span class="nt">dt</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">0.25</span>
   <span class="nt">finalTime</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">2000.0</span>
@@ -254,42 +265,58 @@ <h2>Input file<a class="headerlink" href="#input-file" title="Permalink to this
 <span class="nt">io</span><span class="p">:</span>
   <span class="nt">snapshotMatrix</span><span class="p">:</span>
     <span class="nt">binary</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">true</span>
-    <span class="nt">velocity</span><span class="p">:</span> <span class="p p-Indicator">{</span><span class="nt">freq</span><span class="p">:</span> <span class="nv">100</span><span class="p p-Indicator">,</span><span class="nt"> fileName</span><span class="p">:</span> <span class="nv">snaps_vp</span><span class="p p-Indicator">}</span>
-    <span class="nt">stress</span><span class="p">:</span>   <span class="p p-Indicator">{</span><span class="nt">freq</span><span class="p">:</span> <span class="nv">100</span><span class="p p-Indicator">,</span><span class="nt"> fileName</span><span class="p">:</span> <span class="nv">snaps_sp</span><span class="p p-Indicator">}</span>
+    <span class="nt">velocity</span><span class="p">:</span>
+      <span class="nt">freq</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">100</span>
+      <span class="nt">fileName</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">snaps_vp</span>
+    <span class="nt">stress</span><span class="p">:</span>
+      <span class="nt">freq</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">100</span>
+      <span class="nt">fileName</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">snaps_sp</span>
+
   <span class="nt">seismogram</span><span class="p">:</span>
     <span class="nt">binary</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">false</span>
     <span class="nt">freq</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">4</span>
-    <span class="nt">receivers</span><span class="p">:</span> <span class="p p-Indicator">[</span><span class="nv">5</span><span class="p p-Indicator">,</span><span class="nv">30</span><span class="p p-Indicator">,</span><span class="nv">55</span><span class="p p-Indicator">,</span><span class="nv">80</span><span class="p p-Indicator">,</span><span class="nv">105</span><span class="p p-Indicator">,</span><span class="nv">130</span><span class="p p-Indicator">,</span><span class="nv">155</span><span class="p p-Indicator">,</span><span class="nv">175</span><span class="p p-Indicator">]</span>
+    <span class="nt">receivers</span><span class="p">:</span>
+    <span class="p p-Indicator">-</span> <span class="l l-Scalar l-Scalar-Plain">5</span>
+    <span class="p p-Indicator">-</span> <span class="l l-Scalar l-Scalar-Plain">30</span>
+    <span class="p p-Indicator">-</span> <span class="l l-Scalar l-Scalar-Plain">80</span>
 
 <span class="nt">source</span><span class="p">:</span>
   <span class="nt">signal</span><span class="p">:</span>
     <span class="nt">kind</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">ricker</span>
-    <span class="nt">depth</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">640.0</span>  <span class="c1"># km</span>
-    <span class="nt">period</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">65.0</span>  <span class="c1"># seconds</span>
-    <span class="nt">delay</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">180.0</span>  <span class="c1"># seconds</span>
+    <span class="nt">depth</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">640.0</span>
+    <span class="nt">period</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">65.0</span>
+    <span class="nt">delay</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">180.0</span>
 
 <span class="nt">material</span><span class="p">:</span>
   <span class="nt">kind</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">prem</span>
 </pre></div>
 </div>
-<p>You can get the input file as:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/demos/fom_rank1/input.yaml <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
+<p>which we have ready for you to copy as:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo1/input.yaml <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
 <div class="section" id="run-the-simulation">
-<h2>Run the simulation<a class="headerlink" href="#run-the-simulation" title="Permalink to this headline">¶</a></h2>
+<h2>4. Run the simulation<a class="headerlink" href="#run-the-simulation" title="Permalink to this headline">¶</a></h2>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-ln -s <span class="si">${</span><span class="nv">SHAWEXEDIR</span><span class="si">}</span>/shawExe .
+
+<span class="c1"># soft link the executable</span>
+ln -s <span class="si">${</span><span class="nv">EXEDIR</span><span class="si">}</span>/shawExe .
 
 <span class="c1"># if you use OpenMP build, remember to set</span>
 <span class="c1"># OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread</span>
 ./shawExe input.yaml
 </pre></div>
 </div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
-<div class="section" id="simulation-data">
-<h2>Simulation data<a class="headerlink" href="#simulation-data" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="post-process-data">
+<h2>5. Post-process data<a class="headerlink" href="#post-process-data" title="Permalink to this headline">¶</a></h2>
 <p>The demo should generate inside <code class="docutils literal notranslate"><span class="pre">${MYRUNDIR}</span></code> the following:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>coords_sp.txt <span class="c1">#: coordinates of the velocity grid points</span>
 coords_vp.txt <span class="c1">#: coordinates of the stresses grid points</span>
@@ -298,35 +325,33 @@ <h2>Simulation data<a class="headerlink" href="#simulation-data" title="Permalin
 snaps_sp_0    <span class="c1">#: snapshot matrix for the stresses</span>
 </pre></div>
 </div>
-</div>
-<div class="section" id="post-process-data">
-<h2>Post-process data<a class="headerlink" href="#post-process-data" title="Permalink to this headline">¶</a></h2>
-<p>To post-process the data, you can use the Python scripts created for this demo:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/demos/fom_rank1/*.py <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
+<p>We created Python scripts for this:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo1/*.py <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
-<p>First, we visualize the seismogram data by doing:</p>
+<p>First, the seismogram data:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 python plotSeismogram.py
 </pre></div>
 </div>
 <img alt="_images/demo1_f1.png" src="_images/demo1_f1.png"/>
-<p>Then, we can extract and visualize the full wavefield at <code class="docutils literal notranslate"><span class="pre">t=250,</span> <span class="pre">1000,</span> <span class="pre">2000</span></code> (seconds):</p>
+<p>Then, contour plots of the velocity field at <code class="docutils literal notranslate"><span class="pre">t=250,</span> <span class="pre">1000,</span> <span class="pre">2000</span></code> (seconds):</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-ln -s <span class="si">${</span><span class="nv">SHAWEXEDIR</span><span class="si">}</span>/extractStateFromSnaps .
+ln -s <span class="si">${</span><span class="nv">EXEDIR</span><span class="si">}</span>/extractStateFromSnaps .
 
 <span class="c1"># extract from the velocity snapshots the velocity field at specific timesteps:</span>
 <span class="c1"># since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``,</span>
 <span class="c1"># correspond to *time steps* 1000, 4000, 8000</span>
 ./extractStateFromSnaps --snaps<span class="o">=</span>./snaps_vp_0 binary --fsize<span class="o">=</span><span class="m">1</span> <span class="se">\</span>
-  --outformat<span class="o">=</span>ascii --timesteps<span class="o">=</span><span class="m">1000</span> <span class="m">4000</span> <span class="m">8000</span>  --samplingfreq<span class="o">=</span><span class="m">100</span> --outfileappend<span class="o">=</span>vp
+  --outformat<span class="o">=</span>ascii --timesteps<span class="o">=</span><span class="m">1000</span> <span class="m">4000</span> <span class="m">8000</span> <span class="se">\</span>
+  --samplingfreq<span class="o">=</span><span class="m">100</span> --outfileappend<span class="o">=</span>vp
 
 python plotWavefield.py
 </pre></div>
 </div>
 <a class="reference internal image-reference" href="_images/demo1_f2.png"><img alt="_images/demo1_f2.png" src="_images/demo1_f2.png" style="width: 30%;"/></a>
 <a class="reference internal image-reference" href="_images/demo1_f3.png"><img alt="_images/demo1_f3.png" src="_images/demo1_f3.png" style="width: 28%;"/></a>
-<a class="reference internal image-reference" href="_images/demo1_f4.png"><img alt="_images/demo1_f4.png" src="_images/demo1_f4.png" style="width: 30%;"/></a>
+<a class="reference internal image-reference" href="_images/demo1_f4.png"><img alt="_images/demo1_f4.png" src="_images/demo1_f4.png" style="width: 28%;"/></a>
 </div>
 </div>
 
@@ -335,12 +360,12 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
       <footer>
         
         <div class="related-pages">
-          <a class="next-page" href="demo_rank1fommulti.html">
+          <a class="next-page" href="demo2.html">
               <div class="page-info">
                 <div class="context">
                   <span>Next</span>
                 </div>
-                <div class="title">(2): Multi-forcing via rank-1</div>
+                <div class="title">Demo 2</div>
               </div>
               <svg><use href="#svg-arrow-right"></use></svg>
             </a>
@@ -359,12 +384,12 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
               <a href="https://github.com/pradyunsg/furo">Furo theme</a>. |
-            <a class="muted-link" href="_sources/demo_rank1fom.rst.txt"
+            <a class="muted-link" href="_sources/demo1.rst.txt"
                rel="nofollow">
               Show Source
             </a>
@@ -384,13 +409,12 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
         <div class="toc-tree-container">
           <div class="toc-tree">
             <ul>
-<li><a class="reference internal" href="#">(1): Single Forcing</a><ul>
-<li><a class="reference internal" href="#prepare-environment">Prepare environment</a></li>
-<li><a class="reference internal" href="#generating-the-mesh">Generating the mesh</a></li>
-<li><a class="reference internal" href="#input-file">Input file</a></li>
-<li><a class="reference internal" href="#run-the-simulation">Run the simulation</a></li>
-<li><a class="reference internal" href="#simulation-data">Simulation data</a></li>
-<li><a class="reference internal" href="#post-process-data">Post-process data</a></li>
+<li><a class="reference internal" href="#">Demo 1</a><ul>
+<li><a class="reference internal" href="#prepare">1. Prepare</a></li>
+<li><a class="reference internal" href="#generate-the-mesh">2. Generate the mesh</a></li>
+<li><a class="reference internal" href="#input-file">3. Input file</a></li>
+<li><a class="reference internal" href="#run-the-simulation">4. Run the simulation</a></li>
+<li><a class="reference internal" href="#post-process-data">5. Post-process data</a></li>
 </ul>
 </li>
 </ul>
diff --git a/docs/demo_rank1fommulti.html b/docs/demo2.html
similarity index 79%
rename from docs/demo_rank1fommulti.html
rename to docs/demo2.html
index 6d9faed5..3e2432d6 100644
--- a/docs/demo_rank1fommulti.html
+++ b/docs/demo2.html
@@ -2,10 +2,10 @@
 <html class="no-js">
   <head><meta charset="utf-8"/>
     <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="(3): Multi-forcing via rank-2" href="demo_rank2fom.html" /><link rel="prev" title="(1): Single Forcing" href="demo_rank1fom.html" />
+    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Demo 3" href="demo3.html" /><link rel="prev" title="Demo 1" href="demo1.html" />
 
     <meta name="generator" content="sphinx-4.2.0, furo 2021.10.09"/>
-        <title>(2): Multi-forcing via rank-1 - SHAW 0.1.0 documentation</title>
+        <title>Demo 2 - SHAW 0.1.0 documentation</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
     <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=0254c309f5cadf746f1a613e7677379ac9c8cdcd" />
     <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 current has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current">
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -202,28 +203,37 @@
           </label>
         </div>
         <article role="main">
-          <div class="section" id="multi-forcing-via-rank-1">
-<h1>(2): Multi-forcing via rank-1<a class="headerlink" href="#multi-forcing-via-rank-1" title="Permalink to this headline">¶</a></h1>
-<p>Before you start</p>
-<p>Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.</p>
-<div class="section" id="prepare-environment">
-<h2>Prepare environment<a class="headerlink" href="#prepare-environment" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">ESWSRCDIR</span><span class="o">=</span>&lt;fullpath-to-the-source-code-repository&gt;
-<span class="nb">export</span> <span class="nv">SHAWEXEDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-built-the-code-executables&gt;
+          <div class="section" id="demo-2">
+<h1>Demo 2<a class="headerlink" href="#demo-2" title="Permalink to this headline">¶</a></h1>
+<div class="admonition-description admonition">
+<p class="admonition-title">Description:</p>
+<p>This demo simulates the wave dynamic for multiple
+forcings using the rank-1 formulation and the PREM Earth’s model.</p>
+</div>
+<div class="section" id="prepare">
+<h2>1. Prepare<a class="headerlink" href="#prepare" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">SHAWDIR</span><span class="o">=</span>&lt;fullpath-to-the-source-code-repository&gt;
+<span class="nb">export</span> <span class="nv">EXEDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-built-the-code-executables&gt;
 
 <span class="c1"># create a dir to run the demo</span>
 <span class="nb">export</span> <span class="nv">MYRUNDIR</span><span class="o">=</span><span class="si">${</span><span class="nv">HOME</span><span class="si">}</span>/mySecondDemo
 mkdir -p <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
+<div class="admonition important">
+<p class="admonition-title">Important</p>
+<p>You need to have the code built to proceed, see <a class="reference internal" href="build_expert.html"><span class="doc">Building: “expert” mode</span></a> or <a class="reference internal" href="build_stepbystep.html"><span class="doc">Building: step-by-step</span></a>.</p>
+</div>
+<div class="line-block">
+<div class="line"><br/></div>
 </div>
-<div class="section" id="generating-the-mesh">
-<h2>Generating the mesh<a class="headerlink" href="#generating-the-mesh" title="Permalink to this headline">¶</a></h2>
+</div>
+<div class="section" id="generate-the-mesh">
+<h2>2. Generate the mesh<a class="headerlink" href="#generate-the-mesh" title="Permalink to this headline">¶</a></h2>
 <p>For this demo, we use a grid of <code class="docutils literal notranslate"><span class="pre">256</span></code> x <code class="docutils literal notranslate"><span class="pre">1024</span></code> velocity points
 along the radial and polar directions, respectively.
 To generate the mesh files proceed as follows:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/meshing
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/meshing
 python create_single_mesh.py -nr <span class="m">256</span> -nth <span class="m">1024</span> -working-dir <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
@@ -235,11 +245,18 @@ <h2>Generating the mesh<a class="headerlink" href="#generating-the-mesh" title="
 └── <span class="o">[</span> <span class="m">231</span><span class="o">]</span>  mesh_info.dat
 </pre></div>
 </div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
 <div class="section" id="input-file">
-<h2>Input file<a class="headerlink" href="#input-file" title="Permalink to this headline">¶</a></h2>
-<p>We use the following input file (<a class="reference external" href="{filename}/inputfile.rst">learn more about input file</a>):</p>
+<h2>3. Input file<a class="headerlink" href="#input-file" title="Permalink to this headline">¶</a></h2>
+<p>We use the following input file (<a class="reference internal" href="inputfile.html"><span class="doc">learn more about input file</span></a>):</p>
 <div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">general</span><span class="p">:</span>
+  <span class="c1"># meshDir should contain the full path to the mesh directory</span>
+  <span class="c1"># as generated by the python script `meshing/create_single_mesh.py`</span>
+  <span class="c1"># but here we use this for simplicity since this input file</span>
+  <span class="c1"># is used in the doc showing how to run a case</span>
   <span class="nt">meshDir</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">./mesh256x1024</span>
   <span class="nt">dt</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">0.25</span>
   <span class="nt">finalTime</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">2000.0</span>
@@ -252,19 +269,15 @@ <h2>Input file<a class="headerlink" href="#input-file" title="Permalink to this
     <span class="nt">velocity</span><span class="p">:</span> <span class="p p-Indicator">{</span><span class="nt">freq</span><span class="p">:</span> <span class="nv">100</span><span class="p p-Indicator">,</span><span class="nt"> fileName</span><span class="p">:</span> <span class="nv">snaps_vp</span><span class="p p-Indicator">}</span>
     <span class="nt">stress</span><span class="p">:</span>   <span class="p p-Indicator">{</span><span class="nt">freq</span><span class="p">:</span> <span class="nv">100</span><span class="p p-Indicator">,</span><span class="nt"> fileName</span><span class="p">:</span> <span class="nv">snaps_sp</span><span class="p p-Indicator">}</span>
 
-<span class="nt">seismogram</span><span class="p">:</span>
-  <span class="nt">binary</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">false</span>
-  <span class="nt">freq</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">4</span>
-  <span class="nt">receivers</span><span class="p">:</span> <span class="p p-Indicator">[</span><span class="nv">5</span><span class="p p-Indicator">,</span><span class="nv">30</span><span class="p p-Indicator">,</span><span class="nv">55</span><span class="p p-Indicator">,</span><span class="nv">80</span><span class="p p-Indicator">,</span><span class="nv">105</span><span class="p p-Indicator">,</span><span class="nv">130</span><span class="p p-Indicator">,</span><span class="nv">155</span><span class="p p-Indicator">,</span><span class="nv">175</span><span class="p p-Indicator">]</span>
+  <span class="nt">seismogram</span><span class="p">:</span>
+    <span class="nt">binary</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">false</span>
+    <span class="nt">freq</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">4</span>
+    <span class="nt">receivers</span><span class="p">:</span> <span class="p p-Indicator">[</span><span class="nv">5</span><span class="p p-Indicator">,</span><span class="nv">30</span><span class="p p-Indicator">,</span><span class="nv">55</span><span class="p p-Indicator">,</span><span class="nv">80</span><span class="p p-Indicator">,</span><span class="nv">105</span><span class="p p-Indicator">,</span><span class="nv">130</span><span class="p p-Indicator">,</span><span class="nv">155</span><span class="p p-Indicator">,</span><span class="nv">175</span><span class="p p-Indicator">]</span>
 
 <span class="nt">source</span><span class="p">:</span>
   <span class="nt">signal</span><span class="p">:</span>
     <span class="nt">kind</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">ricker</span>
-
-    <span class="c1"># here we pass a list of depths to use as samples</span>
-    <span class="c1"># this will automatically activate sampling</span>
     <span class="nt">depth</span><span class="p">:</span> <span class="p p-Indicator">[</span><span class="nv">240.</span><span class="p p-Indicator">,</span><span class="nv">440.</span><span class="p p-Indicator">,</span><span class="nv">540.</span><span class="p p-Indicator">,</span><span class="nv">700.</span><span class="p p-Indicator">]</span>
-
     <span class="nt">period</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">65.0</span>
     <span class="nt">delay</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">180.0</span>
 
@@ -272,15 +285,18 @@ <h2>Input file<a class="headerlink" href="#input-file" title="Permalink to this
   <span class="nt">kind</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">prem</span>
 </pre></div>
 </div>
-<p>You can get the input file as:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/demos/fom_rank1_sample_depth/input.yaml <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
+<p>which we have ready for you to copy as:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo2/input.yaml <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
 <div class="section" id="run-the-simulation">
-<h2>Run the simulation<a class="headerlink" href="#run-the-simulation" title="Permalink to this headline">¶</a></h2>
+<h2>4. Run the simulation<a class="headerlink" href="#run-the-simulation" title="Permalink to this headline">¶</a></h2>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-ln -s <span class="si">${</span><span class="nv">SHAWEXEDIR</span><span class="si">}</span>/shawExe .
+ln -s <span class="si">${</span><span class="nv">EXEDIR</span><span class="si">}</span>/shawExe .
 
 <span class="c1"># if you use OpenMP build, remember to set</span>
 <span class="c1"># OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread</span>
@@ -294,7 +310,7 @@ <h2>Run the simulation<a class="headerlink" href="#run-the-simulation" title="Pe
 of which the IO time for data collection is less than 1 second.</p>
 </div>
 <div class="section" id="simulation-data">
-<h2>Simulation data<a class="headerlink" href="#simulation-data" title="Permalink to this headline">¶</a></h2>
+<h2>5. Simulation data<a class="headerlink" href="#simulation-data" title="Permalink to this headline">¶</a></h2>
 <p>After running the demo (have some patience because it takes some a couple minutes
 if you use the serial mode), you should have inside <code class="docutils literal notranslate"><span class="pre">${MYRUNDIR}</span></code> the following files:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>coords_sp.txt <span class="c1">#: coordinates of the velocity grid points</span>
@@ -318,11 +334,11 @@ <h2>Simulation data<a class="headerlink" href="#simulation-data" title="Permalin
 </div>
 </div>
 <div class="section" id="post-process-data">
-<h2>Post-process data<a class="headerlink" href="#post-process-data" title="Permalink to this headline">¶</a></h2>
+<h2>6. Post-process data<a class="headerlink" href="#post-process-data" title="Permalink to this headline">¶</a></h2>
 <p>To post-process the data, get the Python scripts created
 for this demo and visualize the seismogram:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-cp <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/demos/fom_rank1_sample_depth/plotSeismogram.py .
+cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo2/plotSeismogram.py .
 python plotSeismogram.py
 </pre></div>
 </div>
@@ -330,7 +346,7 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
 <p>We now extract and compare the velocity wavefield at <code class="docutils literal notranslate"><span class="pre">t=2000</span></code> (seconds)
 for <code class="docutils literal notranslate"><span class="pre">depth=240</span></code> and <code class="docutils literal notranslate"><span class="pre">depth=700</span></code></p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-ln -s <span class="si">${</span><span class="nv">SHAWEXEDIR</span><span class="si">}</span>/extractStateFromSnaps .
+ln -s <span class="si">${</span><span class="nv">EXEDIR</span><span class="si">}</span>/extractStateFromSnaps .
 
 <span class="c1"># snaps_vp_0 contains snapshots for depth=240 km</span>
 <span class="c1"># extract target state and write to file appending vp_d240 to identify the case</span>
@@ -342,6 +358,7 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
 ./extractStateFromSnaps --snaps<span class="o">=</span>./snaps_vp_3 binary --fsize<span class="o">=</span><span class="m">1</span> <span class="se">\</span>
   --outformat<span class="o">=</span>ascii --timesteps<span class="o">=</span><span class="m">8000</span>  --samplingfreq<span class="o">=</span><span class="m">100</span> --outfileappend<span class="o">=</span>vp_d700
 
+cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo2/plotWavefield.py .
 python plotWavefield.py
 </pre></div>
 </div>
@@ -358,23 +375,23 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
       <footer>
         
         <div class="related-pages">
-          <a class="next-page" href="demo_rank2fom.html">
+          <a class="next-page" href="demo3.html">
               <div class="page-info">
                 <div class="context">
                   <span>Next</span>
                 </div>
-                <div class="title">(3): Multi-forcing via rank-2</div>
+                <div class="title">Demo 3</div>
               </div>
               <svg><use href="#svg-arrow-right"></use></svg>
             </a>
-          <a class="prev-page" href="demo_rank1fom.html">
+          <a class="prev-page" href="demo1.html">
               <svg><use href="#svg-arrow-right"></use></svg>
               <div class="page-info">
                 <div class="context">
                   <span>Previous</span>
                 </div>
                 
-                <div class="title">(1): Single Forcing</div>
+                <div class="title">Demo 1</div>
                 
               </div>
             </a>
@@ -382,12 +399,12 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
               <a href="https://github.com/pradyunsg/furo">Furo theme</a>. |
-            <a class="muted-link" href="_sources/demo_rank1fommulti.rst.txt"
+            <a class="muted-link" href="_sources/demo2.rst.txt"
                rel="nofollow">
               Show Source
             </a>
@@ -407,13 +424,13 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
         <div class="toc-tree-container">
           <div class="toc-tree">
             <ul>
-<li><a class="reference internal" href="#">(2): Multi-forcing via rank-1</a><ul>
-<li><a class="reference internal" href="#prepare-environment">Prepare environment</a></li>
-<li><a class="reference internal" href="#generating-the-mesh">Generating the mesh</a></li>
-<li><a class="reference internal" href="#input-file">Input file</a></li>
-<li><a class="reference internal" href="#run-the-simulation">Run the simulation</a></li>
-<li><a class="reference internal" href="#simulation-data">Simulation data</a></li>
-<li><a class="reference internal" href="#post-process-data">Post-process data</a></li>
+<li><a class="reference internal" href="#">Demo 2</a><ul>
+<li><a class="reference internal" href="#prepare">1. Prepare</a></li>
+<li><a class="reference internal" href="#generate-the-mesh">2. Generate the mesh</a></li>
+<li><a class="reference internal" href="#input-file">3. Input file</a></li>
+<li><a class="reference internal" href="#run-the-simulation">4. Run the simulation</a></li>
+<li><a class="reference internal" href="#simulation-data">5. Simulation data</a></li>
+<li><a class="reference internal" href="#post-process-data">6. Post-process data</a></li>
 </ul>
 </li>
 </ul>
diff --git a/docs/demo_rank2fom.html b/docs/demo3.html
similarity index 66%
rename from docs/demo_rank2fom.html
rename to docs/demo3.html
index 6b6e4015..f6a33b2e 100644
--- a/docs/demo_rank2fom.html
+++ b/docs/demo3.html
@@ -2,10 +2,10 @@
 <html class="no-js">
   <head><meta charset="utf-8"/>
     <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="License" href="license.html" /><link rel="prev" title="(2): Multi-forcing via rank-1" href="demo_rank1fommulti.html" />
+    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Performance" href="performance.html" /><link rel="prev" title="Demo 2" href="demo2.html" />
 
     <meta name="generator" content="sphinx-4.2.0, furo 2021.10.09"/>
-        <title>(3): Multi-forcing via rank-2 - SHAW 0.1.0 documentation</title>
+        <title>Demo 3 - SHAW 0.1.0 documentation</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
     <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=0254c309f5cadf746f1a613e7677379ac9c8cdcd" />
     <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 current has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current">
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -202,59 +203,90 @@
           </label>
         </div>
         <article role="main">
-          <div class="section" id="multi-forcing-via-rank-2">
-<h1>(3): Multi-forcing via rank-2<a class="headerlink" href="#multi-forcing-via-rank-2" title="Permalink to this headline">¶</a></h1>
-<p>Before you start</p>
-<p>Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.</p>
-<p>For the sake of demonstration, this demo solves <em>the same problem</em>
-described in the rank-1 demo, except that
-here we use the <em>rank-2 formulation</em>, which allows us to simulate several
-trajectories simultaneously.</p>
-<div class="section" id="prepare-environment">
-<h2>Prepare environment<a class="headerlink" href="#prepare-environment" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">ESWSRCDIR</span><span class="o">=</span>&lt;fullpath-to-the-source-code-repository&gt;
-<span class="nb">export</span> <span class="nv">SHAWEXEDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-built-the-code-executables&gt;
+          <div class="section" id="demo-3">
+<h1>Demo 3<a class="headerlink" href="#demo-3" title="Permalink to this headline">¶</a></h1>
+<div class="admonition-description admonition">
+<p class="admonition-title">Description:</p>
+<p>This demo simulates the wave dynamic for multiple
+forcings using the rank-2 formulation and the PREM Earth’s model.
+For the sake of demonstration, this demo solves <em>the same problem</em>
+described in <a class="reference internal" href="demo2.html"><span class="doc">Demo 2</span></a>, except that here we use the <em>rank-2 formulation</em>,
+which allows us to simulate several trajectories simultaneously.</p>
+</div>
+<div class="section" id="prepare">
+<h2>1. Prepare<a class="headerlink" href="#prepare" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">SHAWDIR</span><span class="o">=</span>&lt;fullpath-to-the-source-code-repository&gt;
+<span class="nb">export</span> <span class="nv">EXEDIR</span><span class="o">=</span>&lt;fullpath-to-where-you-built-the-code-executables&gt;
 
 <span class="c1"># create a dir to run the demo</span>
 <span class="nb">export</span> <span class="nv">MYRUNDIR</span><span class="o">=</span><span class="si">${</span><span class="nv">HOME</span><span class="si">}</span>/myThirdDemo
 mkdir -p <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
+<div class="admonition important">
+<p class="admonition-title">Important</p>
+<p>You need to have the code built to proceed, see <a class="reference internal" href="build_expert.html"><span class="doc">Building: “expert” mode</span></a> or <a class="reference internal" href="build_stepbystep.html"><span class="doc">Building: step-by-step</span></a>.</p>
+</div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
-<div class="section" id="generating-the-mesh">
-<h2>Generating the mesh<a class="headerlink" href="#generating-the-mesh" title="Permalink to this headline">¶</a></h2>
-<p>This is identical to the mesh used in this demo:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/meshing
+<div class="section" id="generate-the-mesh">
+<h2>2. Generate the mesh<a class="headerlink" href="#generate-the-mesh" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/meshing
 python create_single_mesh.py -nr <span class="m">256</span> -nth <span class="m">1024</span> -working-dir <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="input-file">
-<h2>Input file<a class="headerlink" href="#input-file" title="Permalink to this headline">¶</a></h2>
-<p>The input file is identical to <a class="reference external" href="{filename}/rank1fommulti.rst">the one for the rank-1 demo</a>,
-except for the addition of one line to the <code class="docutils literal notranslate"><span class="pre">source</span></code> section:</p>
-<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="c1">#</span>
-<span class="c1"># general, io, material: as in the other demo</span>
-<span class="c1">#</span>
+<h2>3. Input file<a class="headerlink" href="#input-file" title="Permalink to this headline">¶</a></h2>
+<p>We use the following input file (<a class="reference internal" href="inputfile.html"><span class="doc">learn more about input file</span></a>):</p>
+<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">general</span><span class="p">:</span>
+  <span class="c1"># meshDir should contain the full path to the mesh directory</span>
+  <span class="c1"># as generated by the python script `meshing/create_single_mesh.py`</span>
+  <span class="c1"># but here we use this for simplicity since this input file</span>
+  <span class="c1"># is used in the doc showing how to run a case</span>
+  <span class="nt">meshDir</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">./mesh256x1024</span>
+  <span class="nt">dt</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">0.25</span>
+  <span class="nt">finalTime</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">2000.0</span>
+  <span class="nt">checkNumericalDispersion</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">true</span>
+  <span class="nt">checkCfl</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">true</span>
+
+<span class="nt">io</span><span class="p">:</span>
+  <span class="nt">snapshotMatrix</span><span class="p">:</span>
+    <span class="nt">binary</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">true</span>
+    <span class="nt">velocity</span><span class="p">:</span> <span class="p p-Indicator">{</span><span class="nt">freq</span><span class="p">:</span> <span class="nv">100</span><span class="p p-Indicator">,</span><span class="nt"> fileName</span><span class="p">:</span> <span class="nv">snaps_vp</span><span class="p p-Indicator">}</span>
+    <span class="nt">stress</span><span class="p">:</span>   <span class="p p-Indicator">{</span><span class="nt">freq</span><span class="p">:</span> <span class="nv">100</span><span class="p p-Indicator">,</span><span class="nt"> fileName</span><span class="p">:</span> <span class="nv">snaps_sp</span><span class="p p-Indicator">}</span>
+
+  <span class="nt">seismogram</span><span class="p">:</span>
+    <span class="nt">binary</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">false</span>
+    <span class="nt">freq</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">4</span>
+    <span class="nt">receivers</span><span class="p">:</span> <span class="p p-Indicator">[</span><span class="nv">5</span><span class="p p-Indicator">,</span><span class="nv">30</span><span class="p p-Indicator">,</span><span class="nv">55</span><span class="p p-Indicator">,</span><span class="nv">80</span><span class="p p-Indicator">,</span><span class="nv">105</span><span class="p p-Indicator">,</span><span class="nv">130</span><span class="p p-Indicator">,</span><span class="nv">155</span><span class="p p-Indicator">,</span><span class="nv">175</span><span class="p p-Indicator">]</span>
+
 <span class="nt">source</span><span class="p">:</span>
   <span class="nt">signal</span><span class="p">:</span>
-    <span class="c1"># kind, depth, period, delay: same as the other one</span>
-    <span class="c1"># ...</span>
-
-    <span class="c1"># forcingSize defines how many simultaneous trajectories to compute</span>
+    <span class="nt">kind</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">ricker</span>
+    <span class="nt">depth</span><span class="p">:</span> <span class="p p-Indicator">[</span><span class="nv">240.</span><span class="p p-Indicator">,</span><span class="nv">440.</span><span class="p p-Indicator">,</span><span class="nv">540.</span><span class="p p-Indicator">,</span><span class="nv">700.</span><span class="p p-Indicator">]</span>
+    <span class="nt">period</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">65.0</span>
+    <span class="nt">delay</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">180.0</span>
     <span class="nt">forcingSize</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">4</span>
+
+<span class="nt">material</span><span class="p">:</span>
+  <span class="nt">kind</span><span class="p">:</span> <span class="l l-Scalar l-Scalar-Plain">prem</span>
 </pre></div>
 </div>
-<p>The full input file can be copied:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/demos/fom_rank2_sample_depth/input.yaml <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
+<p>which we have ready for you to copy as:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo3/input.yaml <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
 </pre></div>
 </div>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
 </div>
 <div class="section" id="run-the-simulation">
-<h2>Run the simulation<a class="headerlink" href="#run-the-simulation" title="Permalink to this headline">¶</a></h2>
+<h2>4. Run the simulation<a class="headerlink" href="#run-the-simulation" title="Permalink to this headline">¶</a></h2>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-ln -s <span class="si">${</span><span class="nv">SHAWEXEDIR</span><span class="si">}</span>/shawExe .
+ln -s <span class="si">${</span><span class="nv">EXEDIR</span><span class="si">}</span>/shawExe .
 ./shawExe input.yaml
 </pre></div>
 </div>
@@ -263,11 +295,11 @@ <h2>Run the simulation<a class="headerlink" href="#run-the-simulation" title="Pe
 of which the IO time for data collection is less than 1 second.
 Note that this already gives a hint to the advantages of using the rank-2 formulation.
 In fact, while here it takes 107 seconds to simulate the four trajectories simultaneously,
-in the <a class="reference external" href="{filename}/rank1fommulti.rst">rank-1 version of this demo</a> it took
+in <a class="reference internal" href="demo2.html"><span class="doc">rank-1 version of this demo</span></a> it took
 about 150 seconds to simulate the same realizations.</p>
 </div>
 <div class="section" id="simulation-data">
-<h2>Simulation data<a class="headerlink" href="#simulation-data" title="Permalink to this headline">¶</a></h2>
+<h2>5. Simulation data<a class="headerlink" href="#simulation-data" title="Permalink to this headline">¶</a></h2>
 <p>The demo should generate inside <code class="docutils literal notranslate"><span class="pre">${MYRUNDIR}</span></code> the following:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>coords_sp.txt <span class="c1">#: coordinates of the velocity grid points</span>
 coords_vp.txt <span class="c1">#: oordinates of the stresses grid points</span>
@@ -287,11 +319,11 @@ <h2>Simulation data<a class="headerlink" href="#simulation-data" title="Permalin
 </div>
 </div>
 <div class="section" id="post-process-data">
-<h2>Post-process data<a class="headerlink" href="#post-process-data" title="Permalink to this headline">¶</a></h2>
+<h2>6. Post-process data<a class="headerlink" href="#post-process-data" title="Permalink to this headline">¶</a></h2>
 <p>To post-process the data, get the Python scripts created
 for this demo and visualize the seismogram:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> <span class="si">${</span><span class="nv">MYRUNDIR</span><span class="si">}</span>
-cp <span class="si">${</span><span class="nv">ESWSRCDIR</span><span class="si">}</span>/demos/fom_rank2_sample_depth/plotSeismogram.py .
+cp <span class="si">${</span><span class="nv">SHAWDIR</span><span class="si">}</span>/demos/demo3/plotSeismogram.py .
 python plotSeismogram.py
 </pre></div>
 </div>
@@ -306,23 +338,23 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
       <footer>
         
         <div class="related-pages">
-          <a class="next-page" href="license.html">
+          <a class="next-page" href="performance.html">
               <div class="page-info">
                 <div class="context">
                   <span>Next</span>
                 </div>
-                <div class="title">License</div>
+                <div class="title">Performance</div>
               </div>
               <svg><use href="#svg-arrow-right"></use></svg>
             </a>
-          <a class="prev-page" href="demo_rank1fommulti.html">
+          <a class="prev-page" href="demo2.html">
               <svg><use href="#svg-arrow-right"></use></svg>
               <div class="page-info">
                 <div class="context">
                   <span>Previous</span>
                 </div>
                 
-                <div class="title">(2): Multi-forcing via rank-1</div>
+                <div class="title">Demo 2</div>
                 
               </div>
             </a>
@@ -330,12 +362,12 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
               <a href="https://github.com/pradyunsg/furo">Furo theme</a>. |
-            <a class="muted-link" href="_sources/demo_rank2fom.rst.txt"
+            <a class="muted-link" href="_sources/demo3.rst.txt"
                rel="nofollow">
               Show Source
             </a>
@@ -355,13 +387,13 @@ <h2>Post-process data<a class="headerlink" href="#post-process-data" title="Perm
         <div class="toc-tree-container">
           <div class="toc-tree">
             <ul>
-<li><a class="reference internal" href="#">(3): Multi-forcing via rank-2</a><ul>
-<li><a class="reference internal" href="#prepare-environment">Prepare environment</a></li>
-<li><a class="reference internal" href="#generating-the-mesh">Generating the mesh</a></li>
-<li><a class="reference internal" href="#input-file">Input file</a></li>
-<li><a class="reference internal" href="#run-the-simulation">Run the simulation</a></li>
-<li><a class="reference internal" href="#simulation-data">Simulation data</a></li>
-<li><a class="reference internal" href="#post-process-data">Post-process data</a></li>
+<li><a class="reference internal" href="#">Demo 3</a><ul>
+<li><a class="reference internal" href="#prepare">1. Prepare</a></li>
+<li><a class="reference internal" href="#generate-the-mesh">2. Generate the mesh</a></li>
+<li><a class="reference internal" href="#input-file">3. Input file</a></li>
+<li><a class="reference internal" href="#run-the-simulation">4. Run the simulation</a></li>
+<li><a class="reference internal" href="#simulation-data">5. Simulation data</a></li>
+<li><a class="reference internal" href="#post-process-data">6. Post-process data</a></li>
 </ul>
 </li>
 </ul>
diff --git a/docs/demos.html b/docs/demos.html
index c6468e0b..dc138ed3 100644
--- a/docs/demos.html
+++ b/docs/demos.html
@@ -2,7 +2,7 @@
 <html class="no-js">
   <head><meta charset="utf-8"/>
     <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="(1): Single Forcing" href="demo_rank1fom.html" /><link rel="prev" title="Template" href="inputfile_template.html" />
+    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Demo 1" href="demo1.html" /><link rel="prev" title="Template" href="inputfile_template.html" />
 
     <meta name="generator" content="sphinx-4.2.0, furo 2021.10.09"/>
         <title>End-to-end Demos - SHAW 0.1.0 documentation</title>
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 current has-children current-page"><a class="current reference internal" href="#">End-to-end Demos</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -206,31 +207,30 @@
 <h1>End-to-end Demos<a class="headerlink" href="#end-to-end-demos" title="Permalink to this headline">¶</a></h1>
 <div class="toctree-wrapper compound">
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html#prepare-environment">Prepare environment</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html#generating-the-mesh">Generating the mesh</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html#input-file">Input file</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html#run-the-simulation">Run the simulation</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html#simulation-data">Simulation data</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html#post-process-data">Post-process data</a></li>
+<li class="toctree-l1"><a class="reference internal" href="demo1.html">Demo 1</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html#prepare">1. Prepare</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html#generate-the-mesh">2. Generate the mesh</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html#input-file">3. Input file</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html#run-the-simulation">4. Run the simulation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html#post-process-data">5. Post-process data</a></li>
 </ul>
 </li>
-<li class="toctree-l1"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html#prepare-environment">Prepare environment</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html#generating-the-mesh">Generating the mesh</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html#input-file">Input file</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html#run-the-simulation">Run the simulation</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html#simulation-data">Simulation data</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html#post-process-data">Post-process data</a></li>
+<li class="toctree-l1"><a class="reference internal" href="demo2.html">Demo 2</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html#prepare">1. Prepare</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html#generate-the-mesh">2. Generate the mesh</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html#input-file">3. Input file</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html#run-the-simulation">4. Run the simulation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html#simulation-data">5. Simulation data</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html#post-process-data">6. Post-process data</a></li>
 </ul>
 </li>
-<li class="toctree-l1"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html#prepare-environment">Prepare environment</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html#generating-the-mesh">Generating the mesh</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html#input-file">Input file</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html#run-the-simulation">Run the simulation</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html#simulation-data">Simulation data</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html#post-process-data">Post-process data</a></li>
+<li class="toctree-l1"><a class="reference internal" href="demo3.html">Demo 3</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html#prepare">1. Prepare</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html#generate-the-mesh">2. Generate the mesh</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html#input-file">3. Input file</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html#run-the-simulation">4. Run the simulation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html#simulation-data">5. Simulation data</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html#post-process-data">6. Post-process data</a></li>
 </ul>
 </li>
 </ul>
@@ -242,12 +242,12 @@ <h1>End-to-end Demos<a class="headerlink" href="#end-to-end-demos" title="Permal
       <footer>
         
         <div class="related-pages">
-          <a class="next-page" href="demo_rank1fom.html">
+          <a class="next-page" href="demo1.html">
               <div class="page-info">
                 <div class="context">
                   <span>Next</span>
                 </div>
-                <div class="title">(1): Single Forcing</div>
+                <div class="title">Demo 1</div>
               </div>
               <svg><use href="#svg-arrow-right"></use></svg>
             </a>
@@ -266,7 +266,7 @@ <h1>End-to-end Demos<a class="headerlink" href="#end-to-end-demos" title="Permal
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/genindex.html b/docs/genindex.html
index ff2e1955..5c02f140 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -167,11 +167,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -218,7 +219,7 @@ <h1 id="index">Index</h1>
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/goveq.html b/docs/goveq.html
index 4f9f83d5..b3993c97 100644
--- a/docs/goveq.html
+++ b/docs/goveq.html
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -204,13 +205,16 @@
         <article role="main">
           <div class="section" id="governing-equations">
 <h1>Governing equations<a class="headerlink" href="#governing-equations" title="Permalink to this headline">¶</a></h1>
-<p>This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.`</p>
+<p>This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.</p>
 <p>Assuming the target body/planet (e.g. Earth) can be approximated as a sphere,
 we adopt a spherical coordinate system as shown in the figure below:</p>
 <a class="reference internal image-reference" href="_images/sc.svg"><img alt="Alternative text" class="align-center" src="_images/sc.svg" width="35%"/></a>
+<div class="admonition important">
+<p class="admonition-title">Important</p>
 <p>In the axisymmetric approximation, one assumes that fields/quantities
 do not vary along <span class="math notranslate nohighlight">\(\phi\)</span>, implying that all the derivatives
 with respect to <span class="math notranslate nohighlight">\(\phi\)</span> can be dropped.</p>
+</div>
 <p>With this assumption, the set of equations governing the time evolution
 of elastic waves in the velocity-stress formulation can be written as:</p>
 <div class="math-wrapper"><div class="math notranslate nohighlight">
@@ -229,23 +233,29 @@ <h1>Governing equations<a class="headerlink" href="#governing-equations" title="
 <p>where:</p>
 <ul class="simple">
 <li><p><span class="math notranslate nohighlight">\(t\)</span> represents time</p></li>
-<li><p><span class="math notranslate nohighlight">\(r \in [0, r_{surface}]\)</span> is the radial distance from origin to surface of the body</p></li>
+<li><p><span class="math notranslate nohighlight">\(r \in [0, r_{surface}]\)</span> is the radial distance from origin to the surface of the body</p></li>
 <li><p><span class="math notranslate nohighlight">\(\theta \in [0, \pi]\)</span> is the polar angle</p></li>
 <li><p><span class="math notranslate nohighlight">\(\rho(r, \theta)\)</span> is the density</p></li>
-<li><p><span class="math notranslate nohighlight">\(v(r, \theta, t)\)</span> is the velocity (for simplicity we drop the subscript, but it is intended to be the <span class="math notranslate nohighlight">\(v_{\phi}\)</span> velocity component)</p></li>
-<li><p><span class="math notranslate nohighlight">\(\sigma_{r\phi}(r, \theta, t)\)</span> and <span class="math notranslate nohighlight">\(\sigma_{\theta\phi}(r, \theta, t)\)</span> are the two components of the stress tensor remaining after the axisymmetric approximation</p></li>
+<li><p><span class="math notranslate nohighlight">\(v(r, \theta, t)\)</span> is the velocity (for simplicity we drop the subscript,
+but it is intended to be the <span class="math notranslate nohighlight">\(v_{\phi}\)</span> velocity component)</p></li>
+<li><p><span class="math notranslate nohighlight">\(\sigma_{r\phi}(r, \theta, t)\)</span> and <span class="math notranslate nohighlight">\(\sigma_{\theta\phi}(r, \theta, t)\)</span>
+are the two components of the stress tensor remaining after the axisymmetric approximation</p></li>
 <li><p><span class="math notranslate nohighlight">\(f(r, \theta,t)\)</span> is the forcing term</p></li>
-<li><p><span class="math notranslate nohighlight">\(G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)\)</span> is the shear modulus and <span class="math notranslate nohighlight">\(v_s\)</span> being the shear wave velocity.</p></li>
+<li><p><span class="math notranslate nohighlight">\(G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)\)</span> is the shear modulus
+and <span class="math notranslate nohighlight">\(v_s\)</span> being the shear wave velocity.</p></li>
 </ul>
 <p>In practice, the axisymmetric approximation means that one solves the
 above governing equations over a <em>circular sector/block arc</em>.
 Such a formulation is referred to as 2.5-dimensional because it involves
 a 2-dimensional spatial domain (a circular sector of the Earth)
 but models point sources with correct 3-dimensional spreading {cite}.</p>
-<p>Note that we assume both the density and shear modulus to only depend on the spatial coordinates.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>We assume both the density and shear modulus to only depend on the spatial coordinates.</p>
+</div>
 </div>
 <div class="section" id="discretization">
-<h1>Discretization<a class="headerlink" href="#discretization" title="Permalink to this headline">¶</a></h1>
+<span id="id1"></span><h1>Discretization<a class="headerlink" href="#discretization" title="Permalink to this headline">¶</a></h1>
 <p>Shear waves cannot propagate in liquids.
 Therefore, when modeling the Earth, the system of equations above is not
 applicable to the core region of the Earth, and is solved in the region
@@ -269,11 +279,11 @@ <h1>Discretization<a class="headerlink" href="#discretization" title="Permalink
 We remark that, differently than (cite), we do not rely on ghost
 points to impose boundary conditions, but account for the boundary
 conditions directly when assembling the system matrix.</p>
-<p>As an example, the figure below shows the grid when modeling the Earth: the computational
+<p>The figure below shows the grid when modeling the Earth: the computational
 domain extends from the surface to the core-mantle boundary, excluding the liquid core.</p>
-<div class="figure align-center" id="id1">
+<div class="figure align-center" id="id2">
 <a class="reference internal image-reference" href="_images/mesh.png"><img alt="_images/mesh.png" src="_images/mesh.png" style="width: 450px;"/></a>
-<p class="caption"><span class="caption-text">Schematic of the axi-symmetric domain for the Earth and staggered grid used for its discretization.</span><a class="headerlink" href="#id1" title="Permalink to this image">¶</a></p>
+<p class="caption"><span class="caption-text">Schematic of the axi-symmetric domain for the Earth and staggered grid used for its discretization.</span><a class="headerlink" href="#id2" title="Permalink to this image">¶</a></p>
 </div>
 </div>
 
@@ -306,7 +316,7 @@ <h1>Discretization<a class="headerlink" href="#discretization" title="Permalink
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/img/demo1_f1.png b/docs/img/demo1_f1.png
index aff98f30..4f1db28a 100644
Binary files a/docs/img/demo1_f1.png and b/docs/img/demo1_f1.png differ
diff --git a/docs/img/demo1_f2.png b/docs/img/demo1_f2.png
index 4f97830e..589733df 100644
Binary files a/docs/img/demo1_f2.png and b/docs/img/demo1_f2.png differ
diff --git a/docs/img/demo1_f3.png b/docs/img/demo1_f3.png
index 89a2c759..f08bd1d8 100644
Binary files a/docs/img/demo1_f3.png and b/docs/img/demo1_f3.png differ
diff --git a/docs/img/demo1_f4.png b/docs/img/demo1_f4.png
index 99943b4b..efd09468 100644
Binary files a/docs/img/demo1_f4.png and b/docs/img/demo1_f4.png differ
diff --git a/docs/img/demo2_f1.png b/docs/img/demo2_f1.png
index 6b5a0f75..3712e5b7 100644
Binary files a/docs/img/demo2_f1.png and b/docs/img/demo2_f1.png differ
diff --git a/docs/img/demo2_f2.png b/docs/img/demo2_f2.png
index 8c4f32b5..d46e710b 100644
Binary files a/docs/img/demo2_f2.png and b/docs/img/demo2_f2.png differ
diff --git a/docs/img/demo2_f3.png b/docs/img/demo2_f3.png
index a8ec7d43..baef6aa5 100644
Binary files a/docs/img/demo2_f3.png and b/docs/img/demo2_f3.png differ
diff --git a/docs/img/demo3_f1.png b/docs/img/demo3_f1.png
index eb24f6d1..2a2efe60 100644
Binary files a/docs/img/demo3_f1.png and b/docs/img/demo3_f1.png differ
diff --git a/docs/img/fom_cpu_ave.png b/docs/img/fom_cpu_ave.png
new file mode 100644
index 00000000..6f5b740b
Binary files /dev/null and b/docs/img/fom_cpu_ave.png differ
diff --git a/docs/img/top1.jpg b/docs/img/top1.jpg
deleted file mode 100644
index ab1be6f5..00000000
Binary files a/docs/img/top1.jpg and /dev/null differ
diff --git a/docs/img/top2.png b/docs/img/top2.png
deleted file mode 100644
index 37b6bcf7..00000000
Binary files a/docs/img/top2.png and /dev/null differ
diff --git a/docs/img/top3.jpg b/docs/img/top3.jpg
deleted file mode 100644
index 2f57dd84..00000000
Binary files a/docs/img/top3.jpg and /dev/null differ
diff --git a/docs/img/top4.jpg b/docs/img/top4.jpg
deleted file mode 100644
index f06aee42..00000000
Binary files a/docs/img/top4.jpg and /dev/null differ
diff --git a/docs/img/top5.jpg b/docs/img/top5.jpg
deleted file mode 100644
index c1b8c9dc..00000000
Binary files a/docs/img/top5.jpg and /dev/null differ
diff --git a/docs/img/top6.jpg b/docs/img/top6.jpg
deleted file mode 100644
index 7ad94542..00000000
Binary files a/docs/img/top6.jpg and /dev/null differ
diff --git a/docs/img/top7.jpg b/docs/img/top7.jpg
deleted file mode 100644
index 049b3d95..00000000
Binary files a/docs/img/top7.jpg and /dev/null differ
diff --git a/docs/img/top8.jpg b/docs/img/top8.jpg
deleted file mode 100644
index 1c63ee52..00000000
Binary files a/docs/img/top8.jpg and /dev/null differ
diff --git a/docs/index.html b/docs/index.html
index 9cec8c34..e5c91e14 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -238,8 +239,8 @@ <h2>Highlights and features<a class="headerlink" href="#highlights-and-features"
 <ul>
 <li><p>The code relies on the <a class="reference external" href="https://github.com/kokkos">Kokkos programming model</a>
 for performance portability</p></li>
-<li><p>We use the <a class="reference internal" href="goveq.html"><span class="doc">velocity-stress formulation in an axi-symmetric domain</span></a></p></li>
-<li><p>We currently support the following material models:</p>
+<li><p><a class="reference internal" href="goveq.html"><span class="doc">Velocity-stress formulation in an axi-symmetric domain</span></a></p></li>
+<li><p>Support for the following material models:</p>
 <ul class="simple">
 <li><p><a class="reference internal" href="inputfile_description.html#singlelayerdescription"><span class="std std-ref">single layer model</span></a></p></li>
 <li><p><a class="reference internal" href="inputfile_description.html#twolayerdescription"><span class="std std-ref">bilayer model</span></a></p></li>
@@ -249,27 +250,27 @@ <h2>Highlights and features<a class="headerlink" href="#highlights-and-features"
 <p>These are 1D models because they only depend on the radial distance.
 The modularity of the code allows one to easily add new models</p>
 </li>
-<li><p>If you want to simulate the wave dynamics in another
-planet/axisymmetric body, all you have to do is to create
-a mesh suitable for that planet, and a suitable material model</p></li>
+<li><p>Simulating the dynamics in another planet/axisymmetric body is relatively easy:
+you have to create a mesh suitable for that planet, and a suitable material model</p></li>
 <li><p>The code implements what we refer to as “rank-1” and “rank-2” formulations:</p>
 <ul class="simple">
 <li><p><em>rank-1</em>:</p>
 <ul>
-<li><p>the discrete state and forcing term are stored as 1D arrays</p></li>
-<li><p>this is used to simulate the wave dynamics due to a <em>single forcing term</em></p></li>
+<li><p>discrete state and forcing are stored as 1D arrays</p></li>
+<li><p>this is useful to simulate the wave dynamics due to a <em>single forcing term</em></p></li>
+<li><p><a class="reference internal" href="demo1.html"><span class="doc">See the demo!</span></a></p></li>
 </ul>
 </li>
 <li><p><em>rank-2</em>:</p>
 <ul>
-<li><p>the discrete state and forcing term are stored
-using rank-2 tensors (i.e. matrices)</p></li>
+<li><p>discrete state and forcing are stored using rank-2 tensors (i.e. matrices)</p></li>
 <li><p>this is useful to <em>simultaneously</em> solve the wave
 dynamics for <em>multiple forcing realizations</em> (e.g. multiple
 source locations and/or periods). This rank-2 formulation
 has an advantage from a computational standpoint because
 it has higher computational intensity, thus benefiting
 efficient ensemble propagation</p></li>
+<li><p><a class="reference internal" href="demo3.html"><span class="doc">See the demo!</span></a></p></li>
 </ul>
 </li>
 </ul>
@@ -300,7 +301,11 @@ <h1>Contents<a class="headerlink" href="#contents" title="Permalink to this head
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="goveq.html">Governing equations</a></li>
 <li class="toctree-l1"><a class="reference internal" href="goveq.html#discretization">Discretization</a></li>
-<li class="toctree-l1"><a class="reference internal" href="build_expert.html">Building: “expert” mode</a></li>
+<li class="toctree-l1"><a class="reference internal" href="build_expert.html">Building: “expert” mode</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="build_expert.html#prerequisites">Prerequisites</a></li>
+<li class="toctree-l2"><a class="reference internal" href="build_expert.html#build">Build</a></li>
+</ul>
+</li>
 <li class="toctree-l1"><a class="reference internal" href="build_stepbystep.html">Building: step-by-step</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="build_stepbystep.html#prerequisites">Prerequisites</a></li>
 <li class="toctree-l2"><a class="reference internal" href="build_stepbystep.html#step-1-prepare-environment">Step 1: Prepare environment</a></li>
@@ -317,11 +322,12 @@ <h1>Contents<a class="headerlink" href="#contents" title="Permalink to this head
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="demos.html">End-to-end Demos</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -348,7 +354,7 @@ <h1>Contents<a class="headerlink" href="#contents" title="Permalink to this head
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/inputfile.html b/docs/inputfile.html
index 217332a7..7d36434c 100644
--- a/docs/inputfile.html
+++ b/docs/inputfile.html
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -282,7 +283,7 @@ <h1>Input File<a class="headerlink" href="#input-file" title="Permalink to this
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/inputfile_description.html b/docs/inputfile_description.html
index 4ca535db..16a1aa96 100644
--- a/docs/inputfile_description.html
+++ b/docs/inputfile_description.html
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -488,7 +489,7 @@ <h1>Material Model Section<a class="headerlink" href="#material-model-section" t
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/inputfile_template.html b/docs/inputfile_template.html
index 405df41d..bac7e6bf 100644
--- a/docs/inputfile_template.html
+++ b/docs/inputfile_template.html
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -327,7 +328,7 @@ <h1>Template<a class="headerlink" href="#template" title="Permalink to this head
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/license.html b/docs/license.html
index cfb04929..8cd04051 100644
--- a/docs/license.html
+++ b/docs/license.html
@@ -2,7 +2,7 @@
 <html class="no-js">
   <head><meta charset="utf-8"/>
     <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="prev" title="(3): Multi-forcing via rank-2" href="demo_rank2fom.html" />
+    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="prev" title="Performance" href="performance.html" />
 
     <meta name="generator" content="sphinx-4.2.0, furo 2021.10.09"/>
         <title>License - SHAW 0.1.0 documentation</title>
@@ -168,11 +168,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1 current current-page"><a class="current reference internal" href="#">License</a></li>
@@ -258,14 +259,14 @@ <h1>License<a class="headerlink" href="#license" title="Permalink to this headli
         
         <div class="related-pages">
           
-          <a class="prev-page" href="demo_rank2fom.html">
+          <a class="prev-page" href="performance.html">
               <svg><use href="#svg-arrow-right"></use></svg>
               <div class="page-info">
                 <div class="context">
                   <span>Previous</span>
                 </div>
                 
-                <div class="title">(3): Multi-forcing via rank-2</div>
+                <div class="title">Performance</div>
                 
               </div>
             </a>
@@ -273,7 +274,7 @@ <h1>License<a class="headerlink" href="#license" title="Permalink to this headli
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/objects.inv b/docs/objects.inv
index 76a68654..89bf27e0 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/performance.html b/docs/performance.html
new file mode 100644
index 00000000..ef79c961
--- /dev/null
+++ b/docs/performance.html
@@ -0,0 +1,284 @@
+<!doctype html>
+<html class="no-js">
+  <head><meta charset="utf-8"/>
+    <meta name="viewport" content="width=device-width,initial-scale=1"/>
+    <meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="License" href="license.html" /><link rel="prev" title="Demo 3" href="demo3.html" />
+
+    <meta name="generator" content="sphinx-4.2.0, furo 2021.10.09"/>
+        <title>Performance - SHAW 0.1.0 documentation</title>
+      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=0254c309f5cadf746f1a613e7677379ac9c8cdcd" />
+    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=16fb25fabf47304eee183a5e9af80b1ba98259b1" />
+    
+    
+
+
+<style>
+  body {
+    --color-code-background: #eeffcc;
+  --color-code-foreground: black;
+  
+  }
+  body[data-theme="dark"] {
+    --color-code-background: #202020;
+  --color-code-foreground: #d0d0d0;
+  
+  }
+  @media (prefers-color-scheme: dark) {
+    body:not([data-theme="light"]) {
+      --color-code-background: #202020;
+  --color-code-foreground: #d0d0d0;
+  
+    }
+  }
+</style></head>
+  <body>
+    <script>
+      document.body.dataset.theme = localStorage.getItem("theme") || "auto";
+    </script>
+    
+<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
+  <symbol id="svg-toc" viewBox="0 0 24 24">
+    <title>Contents</title>
+    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor"
+      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
+      <path stroke="none" d="M0 0h24v24H0z" />
+      <line x1="4" y1="6" x2="20" y2="6" />
+      <line x1="10" y1="12" x2="20" y2="12" />
+      <line x1="6" y1="18" x2="20" y2="18" />
+    </svg>
+  </symbol>
+  <symbol id="svg-menu" viewBox="0 0 24 24">
+    <title>Menu</title>
+    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor"
+      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
+      <line x1="3" y1="12" x2="21" y2="12"></line>
+      <line x1="3" y1="6" x2="21" y2="6"></line>
+      <line x1="3" y1="18" x2="21" y2="18"></line>
+    </svg>
+  </symbol>
+  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+    <title>Expand</title>
+    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor"
+      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
+      <polyline points="9 18 15 12 9 6"></polyline>
+    </svg>
+  </symbol>
+  <symbol id="svg-sun" viewBox="0 0 24 24">
+    <title>Light mode</title>
+    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor"
+      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
+      <circle cx="12" cy="12" r="5"></circle>
+      <line x1="12" y1="1" x2="12" y2="3"></line>
+      <line x1="12" y1="21" x2="12" y2="23"></line>
+      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+      <line x1="1" y1="12" x2="3" y2="12"></line>
+      <line x1="21" y1="12" x2="23" y2="12"></line>
+      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+    </svg>
+  </symbol>
+  <symbol id="svg-moon" viewBox="0 0 24 24">
+    <title>Dark mode</title>
+    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor"
+      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
+      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
+    </svg>
+  </symbol>
+  <symbol id="svg-sun-half" viewBox="0 0 24 24">
+    <title>Auto light/dark mode</title>
+    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor"
+      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
+      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
+      <circle cx="12" cy="12" r="9" />
+      <path d="M13 12h5" />
+      <path d="M13 15h4" />
+      <path d="M13 18h1" />
+      <path d="M13 9h4" />
+      <path d="M13 6h1" />
+    </svg>
+  </symbol>
+</svg>
+
+<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
+<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
+<label class="overlay sidebar-overlay" for="__navigation">
+  <div class="visually-hidden">Hide navigation sidebar</div>
+</label>
+<label class="overlay toc-overlay" for="__toc">
+  <div class="visually-hidden">Hide table of contents sidebar</div>
+</label>
+
+
+
+<div class="page">
+  <header class="mobile-header">
+    <div class="header-left">
+      <label class="nav-overlay-icon" for="__navigation">
+        <div class="visually-hidden">Toggle site navigation sidebar</div>
+        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
+      </label>
+    </div>
+    <div class="header-center">
+      <a href="index.html"><div class="brand">SHAW 0.1.0 documentation</div></a>
+    </div>
+    <div class="header-right">
+      <div class="theme-toggle-container theme-toggle-header">
+        <button class="theme-toggle">
+          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
+          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
+          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
+          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+        </button>
+      </div>
+      <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
+        <div class="visually-hidden">Toggle table of contents sidebar</div>
+        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+      </label>
+    </div>
+  </header>
+  <aside class="sidebar-drawer">
+    <div class="sidebar-container">
+      
+      <div class="sidebar-sticky"><a class="sidebar-brand" href="index.html">
+  
+  
+  <span class="sidebar-brand-text">SHAW 0.1.0 documentation</span>
+  
+</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
+  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
+  <input type="hidden" name="check_keywords" value="yes">
+  <input type="hidden" name="area" value="default">
+</form>
+<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
+  <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="goveq.html">Governing equations</a></li>
+<li class="toctree-l1"><a class="reference internal" href="goveq.html#discretization">Discretization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="build_expert.html">Building: “expert” mode</a></li>
+<li class="toctree-l1"><a class="reference internal" href="build_stepbystep.html">Building: step-by-step</a></li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="inputfile.html">Input File</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
+<li class="toctree-l2"><a class="reference internal" href="inputfile_description.html">General Section</a></li>
+<li class="toctree-l2"><a class="reference internal" href="inputfile_description.html#io-section">IO Section</a></li>
+<li class="toctree-l2"><a class="reference internal" href="inputfile_description.html#source-forcing-section">Source/forcing Section</a></li>
+<li class="toctree-l2"><a class="reference internal" href="inputfile_description.html#material-model-section">Material Model Section</a></li>
+<li class="toctree-l2"><a class="reference internal" href="inputfile_template.html">Template</a></li>
+</ul>
+</li>
+<li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
+</ul>
+</li>
+<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Performance</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
+<li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
+<li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
+</ul>
+
+</div>
+</div>
+      </div>
+      
+    </div>
+  </aside>
+  <div class="main">
+    <div class="content">
+      <div class="article-container">
+        <div class="content-icon-container">
+          <div class="theme-toggle-container theme-toggle-content">
+            <button class="theme-toggle">
+              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
+              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
+              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
+              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+            </button>
+          </div>
+          <label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
+            <div class="visually-hidden">Toggle table of contents sidebar</div>
+            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+          </label>
+        </div>
+        <article role="main">
+          <div class="section" id="performance">
+<h1>Performance<a class="headerlink" href="#performance" title="Permalink to this headline">¶</a></h1>
+<p>The following plot shows performance results obtained on a workstation
+with two 18-core Intel(R) Xeon(R) Gold 6154 CPU @ 3.00 GHz,
+each with a 24.75MB L3 cache and 125GB total memory.
+We enable hyperthreading, thus supporting a maximum of 36 logical threads per CPU,
+so a total of 72 threads. We use GCC-8.3.1 and rely on kokkos
+and kokkos-kernels version 3.1.01.
+We use Blis-0.7.0 as the kokkos-kernels’ backend for all dense operations.
+We use the OpenMP backend for Kokkos.</p>
+<div class="line-block">
+<div class="line"><br/></div>
+</div>
+<div class="figure align-center" id="id1">
+<a class="reference internal image-reference" href="_images/fom_cpu_ave.png"><img alt="_images/fom_cpu_ave.png" src="_images/fom_cpu_ave.png" style="width: 95%;"/></a>
+<p class="caption"><span class="caption-text">M represents how many trajectories we are computing simultaneously:
+when M=1, this what we refer to as rank-1 formulation,
+while M&gt;=2 corresponds to what we refer to as rank-2 formulation;
+N is the <em>total</em> number of dofs (velocities plus stresses) for the problem.</span><a class="headerlink" href="#id1" title="Permalink to this image">¶</a></p>
+</div>
+</div>
+
+        </article>
+      </div>
+      <footer>
+        
+        <div class="related-pages">
+          <a class="next-page" href="license.html">
+              <div class="page-info">
+                <div class="context">
+                  <span>Next</span>
+                </div>
+                <div class="title">License</div>
+              </div>
+              <svg><use href="#svg-arrow-right"></use></svg>
+            </a>
+          <a class="prev-page" href="demo3.html">
+              <svg><use href="#svg-arrow-right"></use></svg>
+              <div class="page-info">
+                <div class="context">
+                  <span>Previous</span>
+                </div>
+                
+                <div class="title">Demo 3</div>
+                
+              </div>
+            </a>
+        </div>
+
+        <div class="related-information">
+              Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
+            Last updated on Dec 04, 2021. |
+            Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
+              and
+              <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
+              <a href="https://github.com/pradyunsg/furo">Furo theme</a>. |
+            <a class="muted-link" href="_sources/performance.rst.txt"
+               rel="nofollow">
+              Show Source
+            </a>
+        </div>
+        
+      </footer>
+    </div>
+    <aside class="toc-drawer no-toc">
+      
+      
+      
+    </aside>
+  </div>
+</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script src="_static/jquery.js"></script>
+    <script src="_static/underscore.js"></script>
+    <script src="_static/doctools.js"></script>
+    <script src="_static/scripts/main.js"></script>
+    <script src="_static/clipboard.min.js"></script>
+    <script src="_static/copybutton.js"></script>
+    </body>
+</html>
\ No newline at end of file
diff --git a/docs/search.html b/docs/search.html
index 45717d98..9cd6c9fa 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -166,11 +166,12 @@
 </ul>
 </li>
 <li class="toctree-l1 has-children"><a class="reference internal" href="demos.html">End-to-end Demos</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fom.html">(1): Single Forcing</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank1fommulti.html">(2): Multi-forcing via rank-1</a></li>
-<li class="toctree-l2"><a class="reference internal" href="demo_rank2fom.html">(3): Multi-forcing via rank-2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo1.html">Demo 1</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo2.html">Demo 2</a></li>
+<li class="toctree-l2"><a class="reference internal" href="demo3.html">Demo 3</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="performance.html">Performance</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW">GitHub Repo</a></li>
 <li class="toctree-l1"><a class="reference external" href="https://github.com/Pressio/SHAW/issues">Open an issue/feature req.</a></li>
 <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
@@ -223,7 +224,7 @@
 
         <div class="related-information">
               Copyright &#169; 2021, National Technology &amp; Engineering Solutions of Sandia, LLC (NTESS) |
-            Last updated on Dec 01, 2021. |
+            Last updated on Dec 04, 2021. |
             Built with <a href="https://www.sphinx-doc.org/">Sphinx</a>
               and
               <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
diff --git a/docs/searchindex.js b/docs/searchindex.js
index e47b0a77..47ebc021 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["build_expert","build_stepbystep","demo_rank1fom","demo_rank1fommulti","demo_rank2fom","demos","goveq","index","inputfile","inputfile_description","inputfile_template","license"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["build_expert.rst","build_stepbystep.rst","demo_rank1fom.rst","demo_rank1fommulti.rst","demo_rank2fom.rst","demos.rst","goveq.rst","index.rst","inputfile.rst","inputfile_description.rst","inputfile_template.rst","license.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":[0,1,2,3,6,9,10],"00":[0,1],"004":[],"0045":7,"01":[],"03":[],"05":[],"09":[],"1":[0,4,5,6,7,8,10,11],"10":[1,7,9,10],"100":[2,3,10],"1000":2,"1016":7,"1024":[3,4],"105":[2,3],"107":4,"11":[],"1100":[9,10],"113973":7,"11742":[],"12":[],"120":10,"13":[],"130":[2,3],"14":[0,1],"15":[],"150":[4,9,10],"155":[2,3],"16":[0,1,4],"160":10,"16m":2,"17":4,"175":[2,3],"18":[],"180":[2,3],"19":[],"1981":9,"1d":[7,9],"2":[5,6,7,8,10,11],"20":9,"200":[2,9],"2000":[2,3,10],"2009":[],"2019":11,"2021":7,"21":[],"21m":3,"22":[],"23":10,"231":[2,3],"24":4,"240":[3,4],"25":[2,3,4,9,10],"250":2,"256":[3,4],"26":[],"2667":[3,4],"28":[],"28m":2,"297":9,"2d":[6,9],"3":[0,5,6,7,9,10,11],"30":[2,3],"30b":[],"31":[],"32":[3,4],"34b":[],"356":9,"36":3,"371":[6,9],"37m":3,"384":7,"4":[0,1,2,3,4,9,10],"40":[9,10],"4000":2,"44":10,"440":[3,4],"45":10,"480":[6,9],"5":[0,1,2,3,6,9,10],"50":10,"5000":10,"540":[3,4],"55":[2,3],"550":[9,10],"556":10,"5m":2,"6":[6,9],"640":2,"65":[2,3],"650":9,"68":[],"7":[0,1],"700":[3,4],"78":[],"7825":7,"8":[0,1,3,4,10],"80":[2,3],"8000":[2,3],"86":[],"9":4,"94":[],"95":[],"96b":[],"9m":3,"case":[3,9],"class":9,"default":9,"do":[0,2,6,7,9],"enum":9,"export":[0,1,2,3,4],"final":[9,10],"import":7,"int":9,"new":[7,9],"true":[2,3,9,10],"try":9,"var":[],"while":[4,7],A:[7,8,9,11],AND:11,AS:11,And:3,As:6,At:[6,8],BE:11,BUT:11,BY:11,FOR:11,For:[2,3,4,9],IF:11,IN:11,IS:11,If:[1,7,9],In:[4,6],NO:11,NOT:[6,11],OF:11,ON:11,OR:11,One:[],SUCH:11,Such:6,THE:11,TO:11,The:[2,4,6,7,8],Then:[0,2],These:[7,9],To:[1,2,3,4,9],With:6,_:[],a0:9,a1:9,a2:9,a_0:9,a_1:9,a_2:9,ab:[],about:[2,3,4],abov:[6,9,11],accept:10,accord:9,account:6,accuraci:7,activ:[3,7],ad:9,add:[7,9],addit:4,address:1,adopt:[6,9],adoptedo:[],advanc:10,advantag:[4,7],advis:[0,1,11],affect:7,after:[2,3,6,7],aka:7,all:[1,3,4,6,7,9,10],allow:[4,7,9,10],along:[2,3,6],alreadi:[1,2,3,4],also:[6,7,9],alwai:6,an:[3,4,6,7,9],anderson:9,angl:[6,9],ani:[1,11],anisotrop:7,anoth:[7,9],append:3,appli:7,applic:6,approxim:[3,4,6],ar:[1,2,6,7,9,10,11],arc:6,arch:[],architectur:1,aris:11,arrai:[7,9],articl:7,artifici:7,arxiv:[],ascii:[2,3,4,9,10],assembl:6,assum:[2,3,4,6,9],assumpt:6,aug:[],author:7,autom:1,automat:[2,3],avail:8,averag:9,axi:[6,7],axisymmetr:[6,7],b0:9,b1:9,b2:9,b:7,b_0:9,b_1:9,b_2:9,backend:1,base:2,bash:1,bash_script:1,batch:10,becaus:[1,2,3,6,7],befor:[3,4],being:6,below:[2,3,4,6,9,10],benefit:7,between:[6,7,9],big:6,bilay:[7,10],binari:[2,3,9,10,11],bla:[0,1],blas_root:[],blaslibnam:[],block:6,blonigan:7,bodi:[6,7],both:[6,7,9],bound:[6,7,9],boundari:[6,9],broadli:7,bsd:11,build:[2,3,4,7],build_kokkos_and_kernel:[],build_openbla:[],build_tpl:1,built:[2,3,4],busi:11,c0:9,c1:9,c2:9,c:[0,1,7],call:[7,9],can:[0,1,2,3,4,6,7,8,9,10],cannot:6,caus:11,cc:10,cd:[0,1,2,3,4],center:6,certain:11,cfa:9,cfl:[9,10],challeng:7,chang:9,check:[9,10],checkcfl:[2,3,9,10],checknumericaldispers:[2,3,9,10],choic:[9,10],choos:9,chose:[],circular:6,cite:6,claus:11,clone:[0,1],cma:7,cmake:[0,1],cmb:[6,9,10],code:[1,2,3,4,6,7,9,10,11],coeff_vp:[2,3],coeffici:[9,10],collect:[3,4,9,10],column:10,com:[0,1,7],come:7,comma:9,commonli:9,compar:3,compil:[0,1],complet:[2,3,4,10],complex:3,compon:6,composit:7,comput:[4,6,7,9],computeat:9,condit:[6,11],consequenti:11,consid:9,constant:9,contact:11,contain:[2,3,7,8,9],contract:11,contribut:7,contributor:11,coord:[],coordin:[2,3,4,6],coords_sp:[2,3,4],coords_vp:[2,3,4],copi:4,copyright:11,core:[1,3,4,6,9],correct:6,correspond:[2,9],cost:7,cot:6,cotang:6,countour:7,coupl:3,cover:10,cp:[2,3,4],cpp:[0,1],cpp_dir:[0,1],creat:[2,3,4,7,9],create_single_mesh:[2,3,4],critic:7,ctest:[0,1],cuda:[],current:[7,9,10],custom:[1,7,8,10],cxx:[0,1],d0:9,d1:9,d2:9,d:9,damag:11,dat:[2,3],data:[5,9,10,11],ddr4:[3,4],de:11,deep:9,defin:[2,4,6,7,9],degre:[9,10],delai:[2,3,4,9,10],demo:[2,3,4,7,9],demonstr:4,densiti:[6,7,9,10],depend:[6,7,9],depth:[2,3,4,9,10],deriv:[6,9,11],describ:[2,3,4],descript:[],desir:9,detail:[8,9],differ:[3,4,6,7,9,10],dimension:6,dir:[2,3,4],direct:[2,3,7,11],directli:[6,9],directori:[2,3,9,10],disabl:[9,10],disclaim:11,discontinu:[3,9,10],discret:[7,9],discuss:9,dispers:[9,10],displai:[],distanc:[6,7],distinguish:7,distribut:11,divisor:9,dkokkoskernels_dir:[0,1],do_build:[],doc:[],document:11,doe:[],doi:7,domain:[6,7,9],don:[],done:[1,9],down:10,drop:6,drwxr:[],ds:9,dt:[2,3,9,10],due:[3,6,7],dure:9,dyaml:[0,1],dylib:[],dynam:[7,9],dziewonski:9,e:[6,7,9],each:[3,4,8,9],earth:[6,7,9,10],earthquak:7,easi:1,easili:[1,7,9],edu:9,effect:[3,7],effici:[4,7],effort:1,elast:[6,7],emc:9,enabl:[9,10],end:[7,9],endors:11,engin:[7,11],ensembl:7,env:[],environ:[5,7],envis:[],equat:7,eric:7,eswsrcdir:[2,3,4],etc:7,even:11,event:[7,11],everi:[9,10],evolut:[6,7],exampl:[6,9,10],except:4,exclud:6,execut:[2,3,4],exemplari:11,expect:[3,4],expert:7,explanatori:9,explor:9,explos:7,express:11,extend:[6,9],extens:[],extra:1,extract:[2,3],extractstatefromsnap:[2,3],f:6,fact:4,fail:[],fairli:[],fals:[2,3,9,10],fc:[],fetch:1,field:[2,6,7,9,10],figur:[4,6,9],file:[5,7,9],filenam:[2,3,9,10],fill:10,finaltim:[2,3,9,10],finit:6,first:[2,9],fit:11,fix:9,flag:[],fnrizzi:11,follow:[1,2,3,4,7,8,9,11],fom_rank1:2,fom_rank1_sample_depth:3,fom_rank2_sample_depth:4,fominnerdomain:[],fomnearcmb:[],fomnearsurfac:[],fomsymmetryaxisthetapi:[],fomsymmetryaxisthetazero:[],forc:[5,6,7,8,10],forcing_rank1:[],forcings:[4,9,10],forget:9,form:[7,9,11],format:[4,8],formul:[3,4,6,7,9],fortran:[],four:[3,4],frac:6,francesco:[7,11],free:6,freq:[2,3,9,10],frequenc:9,from:[2,6,7,9,11],fsize:[2,3],full:[2,4,8,9,10],fulli:6,fullpath:[2,3,4],g:[6,7,9],galerkin:7,gaussderiv:9,gb:[3,4],gcc:[0,1],gener:[1,5,6,7,8,10],get:[1,2,3,4],ghost:6,ghz:[3,4],git:[0,1],github:[0,1,7],give:[3,4],given:9,global:7,good:11,gov:11,goveq:[],govern:[7,11],graph:[],graph_sp:[2,3],graph_vp:[2,3],grid:[2,3,4,6,9],guid:[1,2,3,4],gz:1,ha:7,handl:9,harvard:9,have:[0,1,2,3,4,7,9,10],header:11,here:[3,4,6,8],high:[7,8],higher:7,hint:4,holder:11,home:[1,2,3,4],homogenouo:9,host:1,how:[2,3,4,9,10],howev:[6,11],http:[0,1,7,9],i9:[3,4],i:[6,7,9],idea:[3,4],ident:4,identifi:[3,9],implement:7,impli:[6,11],impos:6,incident:11,includ:[6,11],indirect:11,individu:3,input:[5,7,9],inputfil:[],inputfile_descript:[],insid:[1,2,3,4,9],instal:[0,1],instanc:8,instead:9,integ:9,intel:[3,4],intend:[6,9],intens:7,interest:9,interior:7,intern:9,interrupt:11,invari:7,involv:[6,9],io:[2,3,4,7,8,10],iri:9,issn:7,issu:7,its:[6,7,11],itself:6,j4:[0,1],j:7,jacobian_sp:[],jacobian_vp:[],john:7,journal:7,just:[4,9],ker:[],kernel:[0,1],kernela:1,keyword:[],kg:9,kilomet:9,kind:[2,3,4,9,10],km:[2,3,4,6,9,10],kokko:[0,1,7],kokkos_instal:[],kokkos_kernels_instal:[],kokkoskernel:[0,1],kokkoskerpfx:[],kokkospfx:[],l:[1,9],lapack:[0,1],lapack_root:[],lapacklibnam:[],larg:[3,7],last:[0,9],latest:[],layer1:[9,10],layer2:[9,10],layer:[7,8,10],lead:[],learn:[2,3],least:9,left:6,less:[3,4],let:1,level:8,leverag:1,liabil:11,liabl:11,lib:[0,1],libopenbla:[],licens:7,like:[1,8],limit:11,line:4,linear:7,link:8,liquid:6,list:[3,9,11],llc:11,ln:[2,3,4],local:9,locat:[2,4,6,7,9,10],longitudin:7,look:[2,3,4,8],loss:11,lrwxr:[],lzeng:9,m:9,machin:1,macpro:[3,4],mai:11,main:[7,9],main_fom:10,make:[0,1,9,10],manag:1,mandatori:[9,10],mani:[2,3,4,9,10],mantl:[6,9],materi:[2,3,4,7,8,10,11],matric:7,matrix:[2,4,6,9],mean:[6,10],meant:1,mechan:7,medium:[7,9],merchant:11,mesh200x1000:2,mesh256x1024:3,mesh:[5,7,9,10],mesh_info:[2,3],meshdir:[2,3,9,10],meshinfo:[],met:11,method:[6,7,9],mhz:[3,4],mimic:9,minim:1,minut:3,miss:10,mkdir:[0,1,2,3,4],mode:[3,7],model:[3,6,7,8,10],modif:11,modifi:9,modular:[7,9],modulu:6,moon:7,moonquak:7,more:[1,2,3,4,9],most:9,move:[],multi:[5,7,8,10],multidepthsandperiodsforcingrank1:[],multidepthsandperiodsforcingrank2:[],multidepthsforcingrank1:[],multidepthsforcingrank2:[],multiperiodsforcingrank1:[],multiperiodsforcingrank2:[],multipl:[7,9,10],must:[6,8,9,11],mycustommaterialmodel:9,myfirstdemo:2,myfirstshawbuild:1,myrundir:[2,3,4],myseconddemo:3,mythirddemo:4,mywavetest:[],myworkdir:[],na0003525:11,name:[7,9,11],nation:11,need:[0,1,2,6,9],neglig:11,neither:11,node:9,nor:11,note:[2,4,6,9],notic:[3,11],now:3,nr:[2,3,4],ntess:11,nth:[2,3,4],number:[7,9],numer:[9,10],obtain:[4,7],omit:9,omp:[],omp_num_thread:[2,3],omp_plac:[2,3],omp_proc_bind:[2,3],onc:[1,9],one:[2,3,4,6,7,9,10],ones:[],onli:[1,2,6,7,9,10],oordin:4,open:7,openbla:[],openmp:[1,2,3],oper:6,optim:[],option:[9,10],order:6,org:7,organ:8,origin:6,oscil:7,other:[4,7,9,11],otherwis:11,our:2,out:11,outfileappend:[2,3],outformat:[2,3],output:[9,10],over:[6,9],own:9,p:[1,2,3,4,7],packag:1,page:[1,7],paper:[7,9],paramet:7,parametr:[7,9,10],parish:7,parser:9,parser_test_1:[],parser_test_2:[],parser_test_3:[],parser_test_4:[],parser_test_5:[],parser_test_6:[],part:1,partial:6,particl:7,particular:11,pass:3,path:[0,1,2,9,10],patienc:3,patrick:7,pattern:3,pdf:9,perform:7,period:[2,3,4,7,9,10],permiss:11,permit:11,perpendicularli:7,pfx:[],phi:6,phy:9,physic:7,pi:6,pii:7,pkgconfig:[],plan:9,planet:[6,7],pleas:7,plot:[3,4,7],plotseismogram:[2,3,4],plotwavefield:[2,3],point:[2,3,4,6,8,9],polar:[2,3,6],portabl:7,possibl:[1,11],post:5,potenti:9,practic:6,preliminari:[7,9,10],prem:[2,3,7,8,10],prepar:[5,7],prerequisit:7,present:1,pressio:[0,1,11],pressur:7,pretti:[],primari:7,print:[9,10],prior:11,problem:[4,6],proce:[2,3],process:5,procur:11,product:[9,11],profil:[9,10],profit:11,program:7,project:[0,7],promot:11,propag:[3,6,7],proper:6,properti:[7,9],provid:[1,9,11],purpos:[1,11],py:[2,3,4],python:[2,3,4],quadrat:9,quantif:[],quantiti:6,question:11,quick:[],quickli:1,r:6,r_:[6,9],radial:[2,3,6,7,9],rank:[5,7,8,10],read:1,readi:[],real:9,realiz:[3,4,7,9],receiv:[2,3,4,9,10],recogn:9,record:9,redistribut:11,reduct:7,ref:[],refer:[6,7,9,10],reflect:[3,6],refract:3,region:[6,9],reli:[6,7],reliz:9,remain:6,remark:6,rememb:[2,3],repo:[0,1,7],repositori:[0,1,2,3,4],repres:[6,9],reproduc:11,req:7,requir:7,rerun:[],research:7,respect:[2,3,6],retain:11,rho:[6,9],ricker:[2,3,9],right:[6,10,11],rizzi2021113973:7,rizzi:[7,11],role:9,row:4,run:[0,1,5,8,10],runtim:[3,4],s0045782521003042:7,s:[1,2,3,4,7,8,9,11],safe:[],sake:4,same:[2,4,7,9],sampl:[3,4,7,9,10],samplingfreq:[2,3],sandia:11,save:[9,10],scale:7,scenario:[9,10],schemat:6,scheme:2,scienc:7,sciencedirect:7,script:[1,2,3,4,9],seamlessli:[],sec:10,second:[2,3,4,6,9,10],secondari:7,section:[4,7,8,10],sector:6,see:[1,8,9],seismic:[6,7],seismogram:[2,3,4,9,10],seismogram_0:[2,3,4],seismogram_1:3,seismogram_2:3,seismogram_3:3,seismogram_test:[],select:9,self:9,sens:[9,10],separ:[9,10],sequenti:[3,9,10],serial:[2,3,4],servic:11,set:[2,3,4,6,9,10],setup:10,sever:4,sh:1,shall:11,share:[0,1],shaw:[0,8],shaw_build:[],shawdir:[0,1],shawex:[2,3,4],shawexedir:[2,3,4],shear:[6,9,10],should:[1,2,3,4,9],show:[3,6],shown:[6,9],sigma_:6,signal:[2,3,4,7,9,10],simil:[],simpl:1,simpli:9,simplic:[2,3,4,6],simplif:6,simplifi:1,simul:[5,6,8,10],simultan:[4,7,9,10],sinc:[2,3,4,6],singl:[5,7,8,10],singlelayerdescript:[],singular:6,sinusoid:[9,10],size:[9,10],snap:[2,3],snaps_sp:[2,3,9,10],snaps_sp_0:[2,3,4],snaps_sp_1:3,snaps_sp_2:3,snaps_sp_3:3,snaps_vp:[2,3,9,10],snaps_vp_0:[2,3,4],snaps_vp_1:3,snaps_vp_2:3,snaps_vp_3:3,snapshot:[2,3,4,9,10],snapshotmatrix:[2,3,9,10],snaptshot:9,so:[1,9],softwar:11,solut:[9,10,11],solv:[3,4,6,7,9,10],some:[1,3,7],someth:[8,9],soon:[],sourc:[2,3,4,6,7,8,10,11],spatial:6,special:11,specif:[1,2,9,11],specifi:9,sphere:6,spheric:6,spread:[2,3,6],staff:[],stagger:[2,6],standard:9,standpoint:7,start:[1,3,4,8,10],startion:9,state:[3,7,9,10],station:4,step:[2,3,4,7,9,10],stiff:7,store:[7,9],stress:[2,3,4,6,7,9,10],stress_label:[],strict:11,structur:[1,9],strucutur:[],subnod:10,subscript:6,substanti:6,substitut:11,succe:[],suitabl:7,support:[1,7,9,10],suppos:9,surfac:[6,9,10],symmetr:[6,7],symmetri:6,system:[6,7],t:[2,3,6],tab:[2,3,4],take:[3,4],tar:1,target:[3,6,9,10],tartget:2,technolog:11,templat:[7,8],tencer:7,tensor:[6,7,9],term:[3,6,7,9,10,11],termin:1,test:[0,1],than:[3,4,6],thei:7,them:3,theori:11,therefor:[6,9],theta:6,thi:[0,1,2,3,4,6,7,9,10,11],thing:[1,9],thread:[2,3],three:10,through:[3,7],thu:[7,9],time:[2,3,4,6,7,9,10],timestep:[2,3],titl:7,took:4,toolkit:[],top:10,tort:11,total:9,tpl:7,trail:3,trajectori:[4,9],transvers:7,travel:7,tree:1,tri:1,two:[6,7,8,10],txt:[2,3,4],type:7,typic:[0,6],u:11,uncertainti:[],undefin:6,under:11,understand:7,unilay:[9,10],unit:[9,10],unset:[],until:10,up:[9,10],url:7,us:[1,2,3,4,6,7,8,10,11],user:7,userguid:[],usual:0,v:6,v_:[6,9],valid:8,vari:6,veloc:[2,3,4,6,7,9,10],version:[0,4],via:[1,5,7],visual:[2,3,4],volum:7,vp:2,vp_d240:3,vp_d700:3,wa:[],wai:[1,4,11],want:[1,2,3,7,9,10],warranti:11,wave:[3,6,9],wavefield:[2,3],we:[0,1,2,3,4,6,7,9,10],well:7,what:7,when:[4,6,9],where:[0,1,2,3,4,6,9,10],whether:11,which:[3,4,6,10],within:9,without:[1,9,11],work:[2,3,4],workdir:1,write:[3,9],written:[6,11],www:[7,9],x:[2,3,9],xr:[],yaml:[0,1,2,3,4,8,9],yamlcpp:[0,1],year:7,yield:6,you:[0,1,2,3,4,7,8,9,10],your:[0,1,9,10],yourself:[],zero:[6,9]},titles:["Building: \u201cexpert\u201d mode","Building: step-by-step","(1): Single Forcing","(2): Multi-forcing via rank-1","(3): Multi-forcing via rank-2","End-to-end Demos","Governing equations","SHeAr Waves (SHAW) Simulator","Input File","General Section","Template","License"],titleterms:{"1":[1,2,3,9],"2":[1,3,4,9],"3":[1,4],"4":[],A:[],The:9,av:[],bilay:[],bla:[],build:[0,1],citat:[],cite:7,cmake:[],code:[],content:7,cuda:[],custom:9,data:[2,3,4],demo:5,descript:[],discret:6,e:[],ear:[],earth:[],end:5,environ:[1,2,3,4],equat:6,expert:0,featur:7,figur:[],file:[2,3,4,8],forc:[2,3,4,9],from:[],gener:[2,3,4,9],govern:6,have:[],highlight:7,host:[],how:7,input:[2,3,4,8],instal:[],io:9,kernel:[],kokko:[],lapack:[],layer:9,let:[],licens:11,main:[],materi:9,mesh:[2,3,4],mode:0,model:9,motiv:7,multi:[3,4,9],onli:[],openmp:[],out:[],post:[2,3,4],preliminari:[],prem:9,prepar:[1,2,3,4],prerequisit:1,process:[2,3,4],r:[],rank:[3,4,9],red:[],refer:[],run:[2,3,4,9],section:9,serial:[],sh:[],shaw:[1,7],shear:7,simul:[2,3,4,7,9],singl:[2,9],sourc:9,specif:[],step:1,templat:10,test:[],them:[],tpl:1,two:9,us:9,via:[3,4],w:[],want:[],wave:7,you:[]}})
\ No newline at end of file
+Search.setIndex({docnames:["build_expert","build_stepbystep","demo1","demo2","demo3","demos","goveq","index","inputfile","inputfile_description","inputfile_template","license","performance"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["build_expert.rst","build_stepbystep.rst","demo1.rst","demo2.rst","demo3.rst","demos.rst","goveq.rst","index.rst","inputfile.rst","inputfile_description.rst","inputfile_template.rst","license.rst","performance.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":[0,1,2,3,4,6,9,10,12],"00":[0,1,12],"004":[],"0045":7,"01":12,"03":[],"05":[],"09":[],"1":[0,5,6,7,8,10,11,12],"10":[0,1,7,9,10],"100":[2,3,4,10],"1000":2,"1016":7,"1024":[3,4],"105":[3,4],"107":4,"11":[],"1100":[9,10],"113973":7,"11742":[],"12":[],"120":10,"125gb":12,"13":[],"130":[3,4],"14":[0,1],"15":[],"150":[4,9,10],"155":[3,4],"16":[0,1,4],"160":10,"16m":2,"17":4,"175":[3,4],"18":12,"180":[2,3,4],"19":[],"1981":9,"1d":[7,9],"2":[0,5,6,7,8,10,11,12],"20":9,"200":[2,9],"2000":[2,3,4,10],"2009":[],"2019":11,"2021":7,"21":[],"21m":3,"22":[],"23":10,"231":[2,3],"24":[4,12],"240":[3,4],"25":[2,3,4,9,10],"250":2,"256":[3,4],"26":[],"2667":[3,4],"28":[],"28m":2,"297":9,"2d":[6,9],"3":[0,5,6,7,9,10,11,12],"30":[2,3,4],"30b":[],"31":[],"32":[3,4],"34b":[],"356":9,"36":[3,12],"371":[6,9],"37m":3,"384":7,"4":[0,1,5,9,10],"40":[9,10],"4000":2,"44":10,"440":[3,4],"45":10,"480":[6,9],"5":[0,1,5,6,9,10],"50":10,"5000":10,"540":[3,4],"55":[3,4],"550":[9,10],"556":10,"5m":2,"6":[5,6,9],"6154":12,"640":2,"65":[2,3,4],"650":9,"68":[],"7":[0,1,12],"700":[3,4],"72":12,"75mb":12,"78":[],"7825":7,"8":[0,1,3,4,10,12],"80":[2,3,4],"8000":[2,3],"86":[],"9":4,"94":[],"95":[],"96b":[],"9m":3,"case":[3,4,9],"class":9,"default":9,"do":[1,6,9],"enum":9,"export":[1,2,3,4],"final":[9,10],"import":7,"int":9,"new":[7,9],"true":[2,3,4,9,10],"try":9,"var":1,"while":[4,7,12],A:[7,8,9,11],AND:11,AS:11,And:3,As:[],At:[6,8],BE:11,BUT:11,BY:11,FOR:11,For:[3,4,9],IF:11,IN:11,IS:11,If:[1,7,9],In:[4,6],NO:11,NOT:[6,11],OF:11,ON:11,OR:11,One:[],SUCH:11,Such:6,THE:11,TO:11,The:[2,4,6,7,8,12],Then:2,These:[7,9],To:[2,3,4,9],With:6,_:[],a0:9,a1:9,a2:9,a_0:9,a_1:9,a_2:9,ab:[],about:[2,3,4],abov:[1,6,9,11],accept:10,accord:9,account:6,accuraci:7,activ:7,ad:9,add:[7,9],addit:[],address:[],adopt:[6,9],adoptedo:[],advanc:10,advantag:[4,7],advis:[0,11],affect:7,after:[2,3,6,7],aka:7,all:[1,3,4,6,9,10,12],allow:[4,7,9,10],along:[2,3,6],alreadi:[1,4],also:[6,7,9],alwai:6,an:[3,4,6,7,9],anderson:9,angl:[6,9],ani:[1,11],anisotrop:7,anoth:[7,9],append:3,appli:7,applic:6,approxim:[3,4,6],ar:[1,2,6,7,9,10,11,12],arc:6,arch:[],architectur:1,aris:11,arrai:[7,9],articl:7,artifici:7,arxiv:[],ascii:[2,3,4,9,10],assembl:6,assum:[6,9],assumpt:6,aug:[],author:7,autom:1,automat:[1,2],avail:8,averag:9,axi:[6,7],axisymmetr:[6,7],b0:9,b1:9,b2:9,b:[0,1,7],b_0:9,b_1:9,b_2:9,backend:[1,12],base:2,bash:1,bash_script:1,batch:10,becaus:[1,2,3,6,7],befor:[],being:6,below:[3,6,9,10],benefit:7,between:[6,7,9],big:6,bilay:[7,10],binari:[2,3,4,9,10,11],bla:[0,1],blas_root:[],blaslibnam:[],bli:12,block:6,blonigan:7,bodi:[6,7],both:[6,7,9],bound:[6,7,9],boundari:[6,9],broadli:7,bsd:11,build:[2,3,4,7],build_kokkos_and_kernel:[],build_openbla:[],build_tpl:1,built:[2,3,4],busi:11,c0:9,c1:9,c2:9,c:[0,1,7],cach:12,call:[7,9],can:[1,6,7,8,9,10],cannot:6,caus:11,cc:10,cd:[1,2,3,4],center:6,certain:11,cfa:9,cfl:[9,10],challeng:7,chang:9,check:[9,10],checkcfl:[2,3,4,9,10],checknumericaldispers:[2,3,4,9,10],choic:[9,10],choos:9,chose:[],circular:6,cite:6,claus:11,clone:[0,1],cma:7,cmake:[0,1],cmb:[6,9,10],code:[0,1,2,3,4,6,7,9,10,11],coeff_vp:[2,3],coeffici:[9,10],collect:[3,4,9,10],column:10,com:[0,1,7],come:7,comma:9,commonli:9,compar:3,compil:[0,1],complet:10,complex:3,compon:6,composit:7,comput:[6,7,9,12],computeat:9,condit:[6,11],consequenti:11,consid:9,constant:9,contact:11,contain:[2,3,4,7,8,9],contour:2,contract:11,contribut:7,contributor:11,coord:[],coordin:[2,3,4,6],coords_sp:[2,3,4],coords_vp:[2,3,4],copi:[2,3,4],copyright:11,core:[1,3,4,6,9,12],correct:6,correspond:[2,9,12],cost:7,cot:6,cotang:6,countour:7,coupl:3,cover:10,cp:[2,3,4],cpp:[0,1],cpp_dir:[0,1],cpu:12,creat:[2,3,4,7,9],create_single_mesh:[2,3,4],critic:7,ctest:[0,1],cuda:[],current:[9,10],custom:[1,7,8,10],cxx:1,d0:9,d1:9,d2:9,d3:[],d:9,damag:11,dat:[2,3],data:[5,9,10,11],dcmake_cxx_compil:0,ddr4:[3,4],de:11,deep:9,defin:[2,6,7,9],degre:[9,10],delai:[2,3,4,9,10],demo1:2,demo2:3,demo3:4,demo:[7,9],demonstr:4,dens:12,densiti:[6,7,9,10],depend:[6,7,9],depth:[2,3,4,9,10],deriv:[6,9,11],describ:4,descript:[],desir:9,detail:[8,9],differ:[3,4,6,7,9,10],dimension:6,dir:[0,2,3,4],direct:[2,3,7,11],directli:[6,9],directori:[2,3,4,9,10],disabl:[9,10],disclaim:11,discontinu:[3,9,10],discret:[7,9],discuss:9,dispers:[9,10],displai:[],distanc:[6,7],distinguish:7,distribut:11,divisor:9,dkokkoskernels_dir:[0,1],do_build:[],doc:[3,4],document:11,doe:[],dof:12,doi:7,domain:[6,7,9],don:[],done:[1,9],down:10,drop:6,drwxr:[],ds:9,dt:[2,3,4,9,10],due:[3,6,7],dure:9,dyaml:[0,1],dylib:[],dynam:[2,3,4,7,9],dziewonski:9,e:[6,7,9],each:[3,4,8,9,12],earth:[2,3,4,6,7,9,10],earthquak:7,easi:7,easili:[1,7,9],edu:9,effect:[3,7],effici:[4,7],effort:1,elast:[6,7],emc:9,enabl:[9,10,12],end:[7,9],endors:11,engin:[7,11],ensembl:7,enter:[],env:1,environ:7,envis:[],equat:7,eric:7,eswsrcdir:[],etc:7,even:11,event:[7,11],everi:[9,10],evolut:[6,7],exampl:[9,10],except:4,exclud:6,execut:[2,3,4],exedir:[2,3,4],exemplari:11,expect:[3,4],expert:[2,3,4,7],explanatori:9,explor:9,explos:7,express:11,extend:[6,9],extens:[],extra:1,extract:[2,3],extractstatefromsnap:[2,3],f:6,fact:4,fail:[],fairli:[],fals:[2,3,4,9,10],fc:[],fetch:1,field:[2,6,7,9,10],figur:[4,6,9],file:[5,7,9],filenam:[2,3,4,9,10],fill:10,finaltim:[2,3,4,9,10],finit:6,first:[2,9],fit:11,fix:9,flag:[],fnrizzi:11,follow:[1,2,3,4,7,8,9,11,12],fom_rank1:[],fom_rank1_sample_depth:[],fom_rank2_sample_depth:[],fominnerdomain:[],fomnearcmb:[],fomnearsurfac:[],fomsymmetryaxisthetapi:[],fomsymmetryaxisthetazero:[],forc:[2,3,4,6,7,8,10],forcing_rank1:[],forcings:[4,9,10],forget:9,form:[7,9,11],format:[4,8],formul:[3,4,6,7,9,12],fortran:[],four:[3,4],frac:6,francesco:[7,11],free:6,freq:[2,3,4,9,10],frequenc:9,from:[0,2,6,7,9,11],fsize:[2,3],full:[3,4,8,9,10],fulli:6,fullpath:[0,1,2,3,4],g:[6,7,9],galerkin:7,gaussderiv:9,gb:[3,4],gcc:[0,1,12],gener:[1,5,6,7,8,10],get:[1,3,4],ghost:6,ghz:[3,4,12],git:[0,1],github:[0,1,7],give:[3,4],given:9,global:7,gold:12,good:11,gov:11,goveq:[],govern:[7,11],graph:[],graph_sp:[2,3],graph_vp:[2,3],grid:[2,3,4,6,9],guid:1,gz:1,ha:7,handl:9,harvard:9,have:[0,1,2,3,4,7,9,10],header:11,here:[1,3,4,6,8],high:[7,8],higher:7,hint:4,holder:11,home:[1,2,3,4],homogenouo:9,host:[],how:[2,3,4,9,10,12],howev:[6,11],http:[0,1,7,9],hyperthread:12,i9:[3,4],i:[6,7,9],idea:[3,4],ident:4,identifi:[3,9],implement:7,impli:[6,11],impos:6,incident:11,includ:[6,11],indirect:11,individu:3,input:[5,7,9],inputfil:[],inputfile_descript:[],insid:[1,2,3,4,9],instal:[0,1],instanc:8,instead:9,integ:9,intel:[3,4,12],intend:[6,9],intens:7,interest:9,interior:7,intern:9,interrupt:11,invari:7,involv:[6,9],io:[2,3,4,7,8,10],iri:9,issn:7,issu:7,its:[6,7,11],itself:6,j4:[0,1],j:7,jacobian_sp:[],jacobian_vp:[],john:7,journal:7,just:[4,9],ker:[],kernel:[0,1,12],kernela:[],keyword:[],kg:9,kilomet:9,kind:[2,3,4,9,10],km:[3,4,6,9,10],kokko:[0,1,7,12],kokkos_instal:[],kokkos_kernels_instal:[],kokkoskernel:[0,1],kokkoskerpfx:[],kokkospfx:[],l3:12,l:[1,9],lapack:[0,1],lapack_root:[],lapacklibnam:[],larg:[3,7],last:[0,9],latest:[],layer1:[9,10],layer2:[9,10],layer:[7,8,10],lead:[],learn:[2,3,4],least:9,left:6,less:[3,4],let:[],level:8,leverag:[],liabil:11,liabl:11,lib:[0,1],libopenbla:[],licens:7,like:[1,8],limit:11,line:[],linear:7,link:[2,8],liquid:6,list:[9,11],liter:[],llc:11,ln:[2,3,4],local:9,locat:[2,4,6,7,9,10],logic:12,longitudin:7,look:8,loss:11,lrwxr:[],lzeng:9,m:[9,12],machin:1,macpro:[3,4],mai:11,main:[7,9],main_fom:10,make:[0,1,9,10],manag:1,mandatori:[9,10],mani:[2,3,9,10,12],mantl:[6,9],materi:[2,3,4,7,8,10,11],matric:7,matrix:[2,4,6,9],maximum:12,mean:[6,10],meant:1,mechan:7,medium:[7,9],memori:12,merchant:11,mesh200x1000:2,mesh256x1024:[3,4],mesh:[5,7,9,10],mesh_info:[2,3],meshdir:[2,3,4,9,10],meshinfo:[],met:11,method:[6,7,9],mhz:[3,4],mimic:9,minim:1,minut:3,miss:10,mkdir:[1,2,3,4],mode:[2,3,4,7],model:[2,3,4,6,7,8,10],modif:11,modifi:9,modular:[7,9],modulu:6,moon:7,moonquak:7,more:[1,2,3,4,9],most:9,move:[],multi:[8,10],multidepthsandperiodsforcingrank1:[],multidepthsandperiodsforcingrank2:[],multidepthsforcingrank1:[],multidepthsforcingrank2:[],multiperiodsforcingrank1:[],multiperiodsforcingrank2:[],multipl:[3,4,7,9,10],must:[6,8,9,11],mycustommaterialmodel:9,myfirstdemo:2,myfirstshawbuild:1,myrundir:[2,3,4],myseconddemo:3,mythirddemo:4,mywavetest:[],myworkdir:[],n:12,na0003525:11,name:[7,9,11],nation:11,need:[1,2,3,4,6,9],neglig:11,neither:11,node:9,nor:11,note:[1,2,4,6,9],notic:[3,11],now:3,nr:[2,3,4],ntess:11,nth:[2,3,4],number:[7,9,12],numer:[9,10],obtain:[4,7,12],omit:9,omp:[],omp_num_thread:[2,3],omp_plac:[2,3],omp_proc_bind:[2,3],onc:[1,9],one:[6,7,9,10],ones:[],onli:[1,2,6,7,9,10],oordin:4,open:7,openbla:[],openmp:[1,2,3,12],oper:[6,12],optim:[],option:[9,10],order:6,org:7,organ:8,origin:6,oscil:7,other:[7,9,11],otherwis:11,our:2,out:11,outfileappend:[2,3],outformat:[2,3],output:[9,10],over:[6,9],own:9,p:[1,2,3,4,7],packag:1,page:[1,7],paper:[7,9],paramet:7,parametr:[7,9,10],parish:7,parser:9,parser_test_1:[],parser_test_2:[],parser_test_3:[],parser_test_4:[],parser_test_5:[],parser_test_6:[],part:[],partial:6,particl:7,particular:11,pass:[],path:[0,3,4,9,10],patienc:3,patrick:7,pattern:3,pdf:9,per:12,perform:7,period:[2,3,4,7,9,10],permiss:11,permit:11,perpendicularli:7,pfx:[],phi:6,phy:9,physic:7,pi:6,pick:1,pii:7,pkgconfig:[],plan:9,planet:[6,7],pleas:7,plot:[2,3,4,7,12],plotseismogram:[2,3,4],plotwavefield:[2,3],plu:12,point:[2,3,4,6,8,9],polar:[2,3,6],portabl:7,possibl:[1,11],post:5,potenti:9,practic:6,preliminari:[7,9,10],prem:[2,3,4,7,8,10],prepar:[5,7],prerequisit:7,present:1,pressio:[0,1,11],pressur:7,pretti:[],primari:7,print:[9,10],prior:11,problem:[4,6,12],proce:[2,3,4],process:5,procur:11,product:[9,11],profil:[9,10],profit:11,program:7,project:7,promot:11,propag:[3,6,7],proper:6,properti:[7,9],provid:[1,9,11],purpos:[1,11],py:[2,3,4],python:[2,3,4],quadrat:9,quantif:[],quantiti:6,question:11,quick:[],quickli:1,r:[6,12],r_:[6,9],radial:[2,3,6,7,9],rank:[3,4,7,8,10,12],read:1,readi:[2,3,4],real:9,realiz:[3,4,7,9],receiv:[2,3,4,9,10],recogn:9,record:9,redistribut:11,reduct:7,ref:[],refer:[6,7,9,10,12],reflect:[3,6],refract:3,region:[6,9],rel:7,reli:[6,7,12],reliz:9,remain:6,remark:6,rememb:[2,3],repo:[0,1,7],repositori:[0,1,2,3,4],repres:[6,9,12],reproduc:11,req:7,requir:7,rerun:[],research:7,respect:[2,3,6],result:12,retain:11,rho:[6,9],ricker:[2,3,4,9],right:[6,10,11],rizzi2021113973:7,rizzi:[7,11],role:9,row:4,run:[0,5,8,10],runtim:[3,4],s0045782521003042:7,s:[0,1,2,3,4,7,8,9,11],safe:[],sake:4,same:[4,7,9],sampl:[4,7,9,10],samplingfreq:[2,3],sandia:11,save:[9,10],scale:7,scenario:[9,10],schemat:6,scheme:2,scienc:7,sciencedirect:7,script:[1,2,3,4,9],seamlessli:[],sec:10,second:[2,3,4,6,9,10],secondari:7,section:[7,8,10],sector:6,see:[1,2,3,4,7,8,9],seismic:[6,7],seismogram:[2,3,4,9,10],seismogram_0:[2,3,4],seismogram_1:3,seismogram_2:3,seismogram_3:3,seismogram_test:[],select:9,self:9,sens:[9,10],separ:[9,10],sequenti:[3,9,10],serial:[3,4],servic:11,set:[1,2,3,4,6,9,10],setup:10,sever:4,sh:1,shall:11,share:[0,1],shaw:[0,8],shaw_build:[],shawdir:[1,2,3,4],shawex:[2,3,4],shawexedir:[],shear:[6,9,10],should:[1,2,3,4,9],show:[3,4,6,12],shown:[6,9],sigma_:6,signal:[2,3,4,7,9,10],simil:[],simpl:1,simpli:9,simplic:[3,4,6],simplif:6,simplifi:1,simul:[5,6,8,10],simultan:[4,7,9,10,12],sinc:[2,3,4,6],singl:[2,7,8,10],singlelayerdescript:[],singular:6,sinusoid:[9,10],size:[9,10],snap:[2,3],snaps_sp:[2,3,4,9,10],snaps_sp_0:[2,3,4],snaps_sp_1:3,snaps_sp_2:3,snaps_sp_3:3,snaps_vp:[2,3,4,9,10],snaps_vp_0:[2,3,4],snaps_vp_1:3,snaps_vp_2:3,snaps_vp_3:3,snapshot:[2,3,4,9,10],snapshotmatrix:[2,3,4,9,10],snaptshot:9,so:[1,9,12],soft:2,softwar:11,solut:[9,10,11],solv:[3,4,6,7,9,10],some:[1,3,7],someth:[8,9],soon:[],sourc:[2,3,4,6,7,8,10,11],spatial:6,special:11,specif:[1,2,9,11],specifi:[1,9],sphere:6,spheric:6,spread:[2,3,6],staff:[],stagger:[2,6],standard:9,standpoint:7,start:[1,8,10],startion:9,state:[3,7,9,10],station:4,step:[2,3,4,7,9,10],stiff:7,store:[7,9],stress:[2,3,4,6,7,9,10,12],stress_label:[],strict:11,structur:[1,9],strucutur:[],subnod:10,subscript:6,substanti:6,substitut:11,succe:[],suitabl:7,support:[1,7,9,10,12],suppos:9,surfac:[6,9,10],symmetr:[6,7],symmetri:6,system:[6,7],t:[2,3,6],tab:[],take:[3,4],tar:1,target:[3,6,9,10],tartget:2,technolog:11,templat:[7,8],tencer:7,tensor:[6,7,9],term:[3,6,7,9,10,11],termin:1,test:[0,1],tha:1,than:[3,4,6],thei:7,them:3,theori:11,therefor:[6,9],theta:6,thi:[0,1,2,3,4,6,7,9,10,11,12],thing:[1,9],thread:[2,3,12],three:10,through:[3,7],thu:[7,9,12],time:[2,3,4,6,7,9,10],timestep:[2,3],titl:7,took:4,toolkit:[],top:10,tort:11,total:[9,12],tpl:7,trail:3,trajectori:[4,9,12],transvers:7,travel:7,tree:1,tri:1,two:[6,7,8,10,12],txt:[2,3,4],type:7,typic:6,u:11,uncertainti:[],undefin:6,under:11,understand:7,unilay:[9,10],unit:[9,10],unset:[],until:10,up:[1,9,10],url:7,us:[1,2,3,4,6,7,8,10,11,12],user:7,userguid:[],usual:[],v:6,v_:[6,9],valid:8,vari:6,veloc:[2,3,4,6,7,9,10,12],version:[0,4,12],via:1,visual:[3,4],volum:7,vp:2,vp_d240:3,vp_d700:3,wa:[],wai:[1,4,11],want:[0,1,2,3,9,10],warranti:11,wave:[2,3,4,6,9],wavefield:3,we:[0,1,2,3,4,6,7,9,10,12],well:7,what:[7,12],when:[4,6,9,12],where:[0,1,2,3,4,6,9,10],whether:11,which:[2,3,4,6,10],wit:[],within:[0,9],without:[1,9,11],work:[2,3,4],workdir:1,workstat:12,write:[3,9],written:[6,11],www:[7,9],x:[2,3,9],xeon:12,xr:[],yaml:[0,1,2,3,4,8,9],yamlcpp:[0,1],year:7,yield:6,you:[0,1,2,3,4,7,8,9,10],your:[0,1,9,10],yourself:[],zero:[6,9]},titles:["Building: \u201cexpert\u201d mode","Building: step-by-step","Demo 1","Demo 2","Demo 3","End-to-end Demos","Governing equations","SHeAr Waves (SHAW) Simulator","Input File","General Section","Template","License","Performance"],titleterms:{"1":[1,2,3,4,9],"2":[1,2,3,4,9],"3":[1,2,3,4],"4":[2,3,4],"5":[2,3,4],"6":[3,4],A:[],The:9,av:[],bilay:[],bla:[],build:[0,1],citat:[],cite:7,cmake:[],code:[],content:7,cuda:[],custom:9,d1:[],d2:[],d3:[],data:[2,3,4],demo1:[],demo2:[],demo3:[],demo:[2,3,4,5],descript:[2,3,4],discret:6,e:[],ear:[],earth:[],end:5,environ:1,equat:6,expert:0,featur:7,figur:[],file:[2,3,4,8],forc:9,from:[],gener:[2,3,4,9],govern:6,have:[],highlight:7,host:[],how:7,input:[2,3,4,8],instal:[],io:9,kernel:[],kokko:[],lapack:[],layer:9,let:[],licens:11,main:[],materi:9,mesh:[2,3,4],mode:0,model:9,motiv:7,multi:9,onli:[],openmp:[],out:[],perform:12,post:[2,3,4],preliminari:[],prem:9,prepar:[1,2,3,4],prerequisit:[0,1],process:[2,3,4],r:[],rank:9,red:[],refer:[],run:[2,3,4,9],section:9,serial:[],sh:[],shaw:[1,7],shear:7,simul:[2,3,4,7,9],singl:9,sourc:9,specif:[],step:1,templat:10,test:[],them:[],tpl:1,two:9,us:9,via:[],w:[],want:[],wave:7,you:[]}})
\ No newline at end of file
diff --git a/docs/src/build_expert.rst b/docs/src/build_expert.rst
index 8852ea28..5d37521b 100644
--- a/docs/src/build_expert.rst
+++ b/docs/src/build_expert.rst
@@ -1,11 +1,12 @@
 Building: "expert" mode
 =======================
 
-You need:
+Prerequisites
+-------------
 
 * This repo: ``git clone https://github.com/Pressio/SHAW``
 
-* C++14 compiler: we have tested this with GCC 8.3.1 and GCC 8.4.0
+* C++14 compiler: we have tested this with GCC 8.3.1, GCC 8.4.0, GCC 10.2.0.
 
 * ``CMake>=3.16.0``
 
@@ -17,20 +18,19 @@ You need:
 * `yaml-cpp <https://github.com/jbeder/yaml-cpp>`_: last tested version ``0.7.0``
 
 
-Then, as usual for a typical CMake project, you can do:
+Build
+-----
 
 .. code-block:: shell
 
-   export CXX=<path-to-your-C++-compiler>
-   export SHAWDIR=<path-to-where-you-cloned-the-shaw-repository>
-
-   mkdir build && cd build
-
    cmake \
-   -DKokkosKernels_DIR=<your-kernels-install-path>/lib/cmake/KokkosKernels/ \
-   -Dyaml-cpp_DIR=<your-yamlcpp-install-path>/share/cmake/ \
-   ${SHAWDIR}
+   -DCMAKE_CXX_COMPILER=<fullpath-to-your-C++-compiler> \
+   -DKokkosKernels_DIR=<fullpath-to-your-kernels-install-path>/lib/cmake/KokkosKernels/ \
+   -Dyaml-cpp_DIR=<fullpath-to-your-yamlcpp-install-path>/share/cmake/ \
+   -B <fullpath-to-where-you-want-to-build-the-code> \
+   -S <fullpath-to-your-shaw-repository>
 
+   # from within your build dir
    make -j4
 
    # running the tests is advised
diff --git a/docs/src/build_stepbystep.rst b/docs/src/build_stepbystep.rst
index 3e081255..aa66dceb 100644
--- a/docs/src/build_stepbystep.rst
+++ b/docs/src/build_stepbystep.rst
@@ -5,8 +5,8 @@ If you are reading this page, it likely is because you want
 a simplified (automated) way the get this done,
 so that you can minimize the extra effort
 in building the TPLs needed and the SHAW code.
-This page tries to address this: it provides a step-by-step
-guide that leverages some scripts we have prepared to simplify this.
+This page tries to do so providing a step-by-step
+guide and some scripts we have prepared.
 
 
 Prerequisites
@@ -28,22 +28,19 @@ Prerequisites
 Step 1: Prepare environment
 ----------------------------
 
-Let's make things easy:
-
 .. code-block:: shell
 
-   export CXX=<path-to-your-C++14-compiler>
-   export SHAWDIR=<path-to-where-you-cloned-the-SHAW-repository>
+   export CXX=<fullpath-to-your-C++14-compiler>
+   export SHAWDIR=<fullpath-to-where-you-cloned-the-SHAW-repository>
 
    export WORKDIR=${HOME}/myFirstShawBuild
    mkdir -p ${WORKDIR}
 
 
 Step 2: Build TPLs
---------------------------------
+------------------
 
-To simplify this part, we have prepared script that
-automates getting the TPLs:
+We have prepared a script that automates this:
 
 .. code-block:: shell
 
@@ -51,12 +48,12 @@ automates getting the TPLs:
    bash build_tpls.sh ${WORKDIR} openmp
 
 This script will fetch, build and install inside ``WORKDIR/tpls``
-all TPLs needed: Kokkos-core, Kokkos-kernelas and yaml-cpp.
+all TPLs needed: Kokkos-core, Kokkos-kernels and yaml-cpp.
 
 .. Attention::
 
-   This will build Kokkos for host-only use with the OpenMP backend
-   but **without** any architecture specifications. This is on purpose,
+   This builds Kokkos with only the OpenMP backend and **without**
+   any architecture specifications. This is on purpose,
    because this step is meant to be as generic and simple as possible to get
    you started quickly. If you want to customize things, read
    more on the `Kokkos github <https://github.com/kokkos>`_.
@@ -90,14 +87,16 @@ Step 3: Build SHAW
 .. code-block:: shell
 
    cd ${WORKDIR}
-   mkdir shaw-build && cd shaw-build
 
+   # note that here there is not need to specify compiler because
+   # cmake will automatically pick the up the env var CXX tha we
+   # already set above in step 1
    cmake \
      -DKokkosKernels_DIR=${WORKDIR}/tpls/kokkos-kernels-install/lib/cmake/KokkosKernels/ \
      -Dyaml-cpp_DIR=${WORKDIR}/tpls/yamlcpp-install/share/cmake/ \
-     ${SHAWDIR}
+     -B ${WORKDIR}/shaw-build \
+     -S ${SHAWDIR}
 
+   cd ${WORKDIR}/shaw-build
    make -j4
-
-   # running the SHAW tests is advised
    ctest
diff --git a/docs/src/demo1.rst b/docs/src/demo1.rst
new file mode 100644
index 00000000..6716106d
--- /dev/null
+++ b/docs/src/demo1.rst
@@ -0,0 +1,143 @@
+Demo 1
+======
+
+.. admonition:: Description:
+
+   This demo simulates the wave dynamic
+   for a single forcing using the PREM Earth's model.
+
+
+1. Prepare
+----------
+
+.. code-block:: bash
+
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
+
+   # create a dir to run the demo
+   export MYRUNDIR=${HOME}/myFirstDemo
+   mkdir -p ${MYRUNDIR}
+
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
+
+|
+
+2. Generate the mesh
+--------------------
+
+We use a grid of ``200`` x ``1000`` velocity points
+along the radial and polar directions, respectively.
+
+To generate the mesh files proceed as follows:
+
+.. code-block:: bash
+
+   cd ${SHAWDIR}/meshing
+   python create_single_mesh.py -nr 200 -nth 1000 -working-dir ${MYRUNDIR}
+
+Note that the grid generator script only needs the velocity points
+because the stress points are defined automatically
+based on the :ref:`staggered scheme <discretization>`.
+
+After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` directory containing:
+
+.. code-block:: bash
+
+   .
+   ├── [4.5M]  coeff_vp.dat
+   ├── [ 28M]  graph_sp.dat
+   ├── [ 16M]  graph_vp.dat
+   └── [ 231]  mesh_info.dat
+
+|
+
+3. Input file
+-------------
+
+We use the following input file (:doc:`learn more about input file <inputfile>`):
+
+.. literalinclude :: ../../demos/demo1/input.yaml
+  :language: yaml
+
+which we have ready for you to copy as:
+
+.. code-block:: bash
+
+   cp ${SHAWDIR}/demos/demo1/input.yaml ${MYRUNDIR}
+
+|
+
+4. Run the simulation
+---------------------
+
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+
+   # soft link the executable
+   ln -s ${EXEDIR}/shawExe .
+
+   # if you use OpenMP build, remember to set
+   # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread
+   ./shawExe input.yaml
+
+|
+
+5. Post-process data
+--------------------
+
+The demo should generate inside ``${MYRUNDIR}`` the following:
+
+.. code-block:: bash
+
+   coords_sp.txt #: coordinates of the velocity grid points
+   coords_vp.txt #: coordinates of the stresses grid points
+   seismogram_0  #: seismogram at the receiver locations set in input.yaml
+   snaps_vp_0    #: snapshot matrix for the velocity
+   snaps_sp_0    #: snapshot matrix for the stresses
+
+
+We created Python scripts for this:
+
+.. code-block:: bash
+
+   cp ${SHAWDIR}/demos/demo1/*.py ${MYRUNDIR}
+
+
+First, the seismogram data:
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   python plotSeismogram.py
+
+
+.. image:: ../img/demo1_f1.png
+
+
+Then, contour plots of the velocity field at ``t=250, 1000, 2000`` (seconds):
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   ln -s ${EXEDIR}/extractStateFromSnaps .
+
+   # extract from the velocity snapshots the velocity field at specific timesteps:
+   # since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``,
+   # correspond to *time steps* 1000, 4000, 8000
+   ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \
+     --outformat=ascii --timesteps=1000 4000 8000 \
+     --samplingfreq=100 --outfileappend=vp
+
+   python plotWavefield.py
+
+
+.. image:: ../img/demo1_f2.png
+   :width: 30%
+.. image:: ../img/demo1_f3.png
+   :width: 28%
+.. image:: ../img/demo1_f4.png
+   :width: 28%
diff --git a/docs/src/demo2.rst b/docs/src/demo2.rst
new file mode 100644
index 00000000..c8ef9681
--- /dev/null
+++ b/docs/src/demo2.rst
@@ -0,0 +1,158 @@
+Demo 2
+======
+
+.. admonition:: Description:
+
+   This demo simulates the wave dynamic for multiple
+   forcings using the rank-1 formulation and the PREM Earth's model.
+
+
+1. Prepare
+----------
+
+.. code-block:: bash
+
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
+
+   # create a dir to run the demo
+   export MYRUNDIR=${HOME}/mySecondDemo
+   mkdir -p ${MYRUNDIR}
+
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
+
+|
+
+2. Generate the mesh
+--------------------
+
+For this demo, we use a grid of ``256`` x ``1024`` velocity points
+along the radial and polar directions, respectively.
+To generate the mesh files proceed as follows:
+
+.. code-block:: bash
+
+   cd ${SHAWDIR}/meshing
+   python create_single_mesh.py -nr 256 -nth 1024 -working-dir ${MYRUNDIR}
+
+
+After generating the grid, you should have a ``${MYRUNDIR}/mesh256x1024`` directory containing:
+
+.. code-block:: bash
+
+   .
+   ├── [5.9M]  coeff_vp.dat
+   ├── [ 37M]  graph_sp.dat
+   ├── [ 21M]  graph_vp.dat
+   └── [ 231]  mesh_info.dat
+
+|
+
+3. Input file
+-------------
+
+We use the following input file (:doc:`learn more about input file <inputfile>`):
+
+.. literalinclude :: ../../demos/demo2/input.yaml
+  :language: yaml
+
+which we have ready for you to copy as:
+
+.. code-block:: bash
+
+   cp ${SHAWDIR}/demos/demo2/input.yaml ${MYRUNDIR}
+
+|
+
+4. Run the simulation
+---------------------
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   ln -s ${EXEDIR}/shawExe .
+
+   # if you use OpenMP build, remember to set
+   # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread
+   ./shawExe input.yaml
+
+You will notice that since we use the rank-1 formulation, the code will solve
+sequentially all four realizations of the forcing term.
+To give an idea of runtime, on a MacPro with 2.4 GHz 8-Core Intel Core i9 and 32 GB 2667 MHz DDR4,
+and using a serial build of the code, each individual realization takes approximately 36 seconds,
+of which the IO time for data collection is less than 1 second.
+
+
+5. Simulation data
+------------------
+
+After running the demo (have some patience because it takes some a couple minutes
+if you use the serial mode), you should have inside ``${MYRUNDIR}`` the following files:
+
+.. code-block:: bash
+
+   coords_sp.txt #: coordinates of the velocity grid points
+   coords_vp.txt #: coordinates of the stresses grid points
+
+   seismogram_0  #: seismogram for depth = 240
+   seismogram_1  #: seismogram for depth = 440
+   seismogram_2  #: seismogram for depth = 540
+   seismogram_3  #: seismogram for depth = 700
+
+   snaps_vp_0    #: velocity snapshots for depth = 240
+   snaps_vp_1    #: velocity snapshots for depth = 440
+   snaps_vp_2    #: velocity snapshots for depth = 540
+   snaps_vp_3    #: velocity snapshots for depth = 700
+
+   snaps_sp_0    #: stresses snapshots for depth = 240
+   snaps_sp_1    #: stresses snapshots for depth = 440
+   snaps_sp_2    #: stresses snapshots for depth = 540
+   snaps_sp_3    #: stresses snapshots for depth = 700
+
+
+6. Post-process data
+--------------------
+
+To post-process the data, get the Python scripts created
+for this demo and visualize the seismogram:
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   cp ${SHAWDIR}/demos/demo2/plotSeismogram.py .
+   python plotSeismogram.py
+
+
+.. image:: ../img/demo2_f1.png
+
+
+We now extract and compare the velocity wavefield at ``t=2000`` (seconds)
+for ``depth=240`` and ``depth=700``
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   ln -s ${EXEDIR}/extractStateFromSnaps .
+
+   # snaps_vp_0 contains snapshots for depth=240 km
+   # extract target state and write to file appending vp_d240 to identify the case
+   ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \
+     --outformat=ascii --timesteps=8000  --samplingfreq=100 --outfileappend=vp_d240
+
+   # snaps_vp_3 contains snapshots for depth=700 km
+   # extract target state and write to file appending vp_d700 to identify the case
+   ./extractStateFromSnaps --snaps=./snaps_vp_3 binary --fsize=1 \
+     --outformat=ascii --timesteps=8000  --samplingfreq=100 --outfileappend=vp_d700
+
+   cp ${SHAWDIR}/demos/demo2/plotWavefield.py .
+   python plotWavefield.py
+
+And plot them below, showing as expected the largely different pattern
+and trailing waves due to the complex reflection/refraction effects
+of the waves propagating through the discontinuous PREM material model.
+
+.. image:: ../img/demo2_f2.png
+  :width: 45%
+.. image:: ../img/demo2_f3.png
+  :width: 45%
diff --git a/docs/src/demo3.rst b/docs/src/demo3.rst
new file mode 100644
index 00000000..51248069
--- /dev/null
+++ b/docs/src/demo3.rst
@@ -0,0 +1,111 @@
+Demo 3
+======
+
+.. admonition:: Description:
+
+   This demo simulates the wave dynamic for multiple
+   forcings using the rank-2 formulation and the PREM Earth's model.
+   For the sake of demonstration, this demo solves *the same problem*
+   described in :doc:`demo2`, except that here we use the *rank-2 formulation*,
+   which allows us to simulate several trajectories simultaneously.
+
+1. Prepare
+----------
+
+.. code-block:: bash
+
+   export SHAWDIR=<fullpath-to-the-source-code-repository>
+   export EXEDIR=<fullpath-to-where-you-built-the-code-executables>
+
+   # create a dir to run the demo
+   export MYRUNDIR=${HOME}/myThirdDemo
+   mkdir -p ${MYRUNDIR}
+
+
+.. Important::
+   You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`.
+
+|
+
+2. Generate the mesh
+--------------------
+
+.. code-block:: bash
+
+   cd ${SHAWDIR}/meshing
+   python create_single_mesh.py -nr 256 -nth 1024 -working-dir ${MYRUNDIR}
+
+3. Input file
+-------------
+
+We use the following input file (:doc:`learn more about input file <inputfile>`):
+
+.. literalinclude :: ../../demos/demo3/input.yaml
+  :language: yaml
+
+
+which we have ready for you to copy as:
+
+.. code-block:: bash
+
+   cp ${SHAWDIR}/demos/demo3/input.yaml ${MYRUNDIR}
+
+|
+
+4. Run the simulation
+---------------------
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   ln -s ${EXEDIR}/shawExe .
+   ./shawExe input.yaml
+
+To give an idea of runtime, on a MacPro with 2.4 GHz 8-Core Intel Core i9 and 32 GB 2667 MHz DDR4,
+and using a serial build of the code, the run takes approximately 107 seconds,
+of which the IO time for data collection is less than 1 second.
+Note that this already gives a hint to the advantages of using the rank-2 formulation.
+In fact, while here it takes 107 seconds to simulate the four trajectories simultaneously,
+in :doc:`rank-1 version of this demo <demo2>` it took
+about 150 seconds to simulate the same realizations.
+
+
+5. Simulation data
+------------------
+
+The demo should generate inside ``${MYRUNDIR}`` the following:
+
+.. code-block:: bash
+
+   coords_sp.txt #: coordinates of the velocity grid points
+   coords_vp.txt #: oordinates of the stresses grid points
+
+   # seismogram for all forcing realizations at the receiver locations
+   # the input file set the format to be ascii
+   # since we have 8 receivers and 4 sample depths, the file generated is as follows:
+   # rows 1-8  : seismogram for each station when source depth=240 Km
+   # rows 9-16 : seismogram for each station when source depth=440 Km
+   # rows 17-24: seismogram for each station when source depth=540 Km
+   # rows 25-32: seismogram for each station when source depth=700 Km
+   seismogram_0
+
+   snaps_vp_0    #: snapshot matrix for the velocity for all realizations
+   snaps_sp_0    #: snapshot matrix for the stresses for all realizations
+
+
+6. Post-process data
+--------------------
+
+To post-process the data, get the Python scripts created
+for this demo and visualize the seismogram:
+
+.. code-block:: bash
+
+   cd ${MYRUNDIR}
+   cp ${SHAWDIR}/demos/demo3/plotSeismogram.py .
+   python plotSeismogram.py
+
+Which generates a figure identical to the `seismogram plot obtained with the rank-1 <{filename}/rank1fommulti.rst>`_
+(as expected) since here we solve the sample problem just in a different, more efficient way.
+
+.. image:: ../img/demo3_f1.png
diff --git a/docs/src/demo_rank1fom.rst b/docs/src/demo_rank1fom.rst
deleted file mode 100644
index 323b02ca..00000000
--- a/docs/src/demo_rank1fom.rst
+++ /dev/null
@@ -1,160 +0,0 @@
-(1): Single Forcing
-===================
-
-Below we assume you already completed one of the builds described in the build tab.
-For simplicity, look at the serial step-by-step build guide.
-
-Prepare environment
--------------------
-
-.. code-block:: bash
-
-   export ESWSRCDIR=<fullpath-to-the-source-code-repository>
-   export SHAWEXEDIR=<fullpath-to-where-you-built-the-code-executables>
-
-   # create a dir to run the demo
-   export MYRUNDIR=${HOME}/myFirstDemo
-   mkdir -p ${MYRUNDIR}
-
-
-Generating the mesh
--------------------
-
-For this demo, we use a grid of ``200`` x ``1000`` velocity points
-along the radial and polar directions, respectively.
-To generate the mesh files proceed as follows:
-
-.. code-block:: bash
-
-   cd ${ESWSRCDIR}/meshing
-   python create_single_mesh.py -nr 200 -nth 1000 -working-dir ${MYRUNDIR}
-
-Note that the grid generator script only needs the velocity points
-because the stress points are defined automatically
-based on the `staggered scheme <{filename}/goveq.rst>`_.
-
-After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` directory containing:
-
-.. code-block:: bash
-
-   .
-   ├── [4.5M]  coeff_vp.dat
-   ├── [ 28M]  graph_sp.dat
-   ├── [ 16M]  graph_vp.dat
-   └── [ 231]  mesh_info.dat
-
-
-Input file
-----------
-
-We use the following input file (`learn more about input file <{filename}/inputfile.rst>`_):
-
-.. code-block:: yaml
-
-   general:
-     # meshDir should contain the full path to the mesh directory
-     # as generated by the python script `meshing/create_single_mesh.py`
-     # we assume the input file is in the same location as mesh dir
-     meshDir: ./mesh200x1000
-     dt: 0.25
-     finalTime: 2000.0
-     checkNumericalDispersion: true
-     checkCfl: true
-
-   io:
-     snapshotMatrix:
-       binary: true
-       velocity: {freq: 100, fileName: snaps_vp}
-       stress:   {freq: 100, fileName: snaps_sp}
-     seismogram:
-       binary: false
-       freq: 4
-       receivers: [5,30,55,80,105,130,155,175]
-
-   source:
-     signal:
-       kind: ricker
-       depth: 640.0  # km
-       period: 65.0  # seconds
-       delay: 180.0  # seconds
-
-   material:
-     kind: prem
-
-You can get the input file as:
-
-.. code-block:: bash
-
-   cp ${ESWSRCDIR}/demos/fom_rank1/input.yaml ${MYRUNDIR}
-
-Run the simulation
-------------------
-
-
-.. code-block:: bash
-
-   cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/shawExe .
-
-   # if you use OpenMP build, remember to set
-   # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread
-   ./shawExe input.yaml
-
-
-Simulation data
----------------
-
-The demo should generate inside ``${MYRUNDIR}`` the following:
-
-.. code-block:: bash
-
-   coords_sp.txt #: coordinates of the velocity grid points
-   coords_vp.txt #: coordinates of the stresses grid points
-   seismogram_0  #: seismogram at the receiver locations set in input.yaml
-   snaps_vp_0    #: snapshot matrix for the velocity
-   snaps_sp_0    #: snapshot matrix for the stresses
-
-
-Post-process data
------------------
-
-To post-process the data, you can use the Python scripts created for this demo:
-
-.. code-block:: bash
-
-   cp ${ESWSRCDIR}/demos/fom_rank1/*.py ${MYRUNDIR}
-
-
-First, we visualize the seismogram data by doing:
-
-.. code-block:: bash
-
-   cd ${MYRUNDIR}
-   python plotSeismogram.py
-
-
-.. image:: ../img/demo1_f1.png
-
-
-Then, we can extract and visualize the full wavefield at ``t=250, 1000, 2000`` (seconds):
-
-.. code-block:: bash
-
-   cd ${MYRUNDIR}
-   ln -s ${SHAWEXEDIR}/extractStateFromSnaps .
-
-   # extract from the velocity snapshots the velocity field at specific timesteps:
-   # since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``,
-   # correspond to *time steps* 1000, 4000, 8000
-   ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \
-     --outformat=ascii --timesteps=1000 4000 8000  --samplingfreq=100 --outfileappend=vp
-
-   python plotWavefield.py
-
-
-.. image:: ../img/demo1_f2.png
-   :width: 30%
-.. image:: ../img/demo1_f3.png
-   :width: 28%
-.. image:: ../img/demo1_f4.png
-   :width: 30%
diff --git a/docs/src/demos.rst b/docs/src/demos.rst
index 462b35fb..967e4b84 100644
--- a/docs/src/demos.rst
+++ b/docs/src/demos.rst
@@ -6,6 +6,6 @@ End-to-end Demos
 .. toctree::
     :maxdepth: 2
 
-    demo_rank1fom
-    demo_rank1fommulti
-    demo_rank2fom
\ No newline at end of file
+    demo1
+    demo2
+    demo3
diff --git a/docs/src/goveq.rst b/docs/src/goveq.rst
index f1ca6d98..2a47616c 100644
--- a/docs/src/goveq.rst
+++ b/docs/src/goveq.rst
@@ -1,7 +1,7 @@
 Governing equations
 ===================
 
-This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.`
+This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.
 
 Assuming the target body/planet (e.g. Earth) can be approximated as a sphere,
 we adopt a spherical coordinate system as shown in the figure below:
@@ -11,10 +11,10 @@ we adopt a spherical coordinate system as shown in the figure below:
   :align: center
   :alt: Alternative text
 
-
-In the axisymmetric approximation, one assumes that fields/quantities
-do not vary along :math:`\phi`, implying that all the derivatives
-with respect to :math:`\phi` can be dropped.
+.. Important::
+   In the axisymmetric approximation, one assumes that fields/quantities
+   do not vary along :math:`\phi`, implying that all the derivatives
+   with respect to :math:`\phi` can be dropped.
 
 With this assumption, the set of equations governing the time evolution
 of elastic waves in the velocity-stress formulation can be written as:
@@ -44,20 +44,22 @@ where:
 
 - :math:`t` represents time
 
-- :math:`r \in [0, r_{surface}]` is the radial distance from origin to surface of the body
+- :math:`r \in [0, r_{surface}]` is the radial distance from origin to the surface of the body
 
 - :math:`\theta \in [0, \pi]` is the polar angle
 
 - :math:`\rho(r, \theta)` is the density
 
-- :math:`v(r, \theta, t)` is the velocity (for simplicity we drop the subscript, but it is intended to be the :math:`v_{\phi}` velocity component)
+- :math:`v(r, \theta, t)` is the velocity (for simplicity we drop the subscript,
+  but it is intended to be the :math:`v_{\phi}` velocity component)
 
-- :math:`\sigma_{r\phi}(r, \theta, t)` and :math:`\sigma_{\theta\phi}(r, \theta, t)` are the two components of the stress tensor remaining after the axisymmetric approximation
+- :math:`\sigma_{r\phi}(r, \theta, t)` and :math:`\sigma_{\theta\phi}(r, \theta, t)`
+  are the two components of the stress tensor remaining after the axisymmetric approximation
 
 - :math:`f(r, \theta,t)` is the forcing term
 
-- :math:`G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)` is the shear modulus and :math:`v_s` being the shear wave velocity.
-
+- :math:`G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)` is the shear modulus
+  and :math:`v_s` being the shear wave velocity.
 
 
 In practice, the axisymmetric approximation means that one solves the
@@ -66,8 +68,11 @@ Such a formulation is referred to as 2.5-dimensional because it involves
 a 2-dimensional spatial domain (a circular sector of the Earth)
 but models point sources with correct 3-dimensional spreading {cite}.
 
-Note that we assume both the density and shear modulus to only depend on the spatial coordinates.
+.. Note::
+   We assume both the density and shear modulus to only depend on the spatial coordinates.
+
 
+.. _discretization:
 
 Discretization
 ==============
@@ -97,7 +102,7 @@ We remark that, differently than (cite), we do not rely on ghost
 points to impose boundary conditions, but account for the boundary
 conditions directly when assembling the system matrix.
 
-As an example, the figure below shows the grid when modeling the Earth: the computational
+The figure below shows the grid when modeling the Earth: the computational
 domain extends from the surface to the core-mantle boundary, excluding the liquid core.
 
 .. figure:: ../img/mesh.png
diff --git a/docs/src/index.rst b/docs/src/index.rst
index b21474c6..f892be39 100644
--- a/docs/src/index.rst
+++ b/docs/src/index.rst
@@ -51,9 +51,9 @@ Highlights and features
 *  The code relies on the `Kokkos programming model <https://github.com/kokkos>`_
    for performance portability
 
-*  We use the :doc:`velocity-stress formulation in an axi-symmetric domain <goveq>`
+*  :doc:`Velocity-stress formulation in an axi-symmetric domain <goveq>`
 
-*  We currently support the following material models:
+*  Support for the following material models:
 
    - :ref:`single layer model <singlelayerdescription>`
 
@@ -66,22 +66,22 @@ Highlights and features
    These are 1D models because they only depend on the radial distance.
    The modularity of the code allows one to easily add new models
 
-*  If you want to simulate the wave dynamics in another
-   planet/axisymmetric body, all you have to do is to create
-   a mesh suitable for that planet, and a suitable material model
+*  Simulating the dynamics in another planet/axisymmetric body is relatively easy:
+   you have to create a mesh suitable for that planet, and a suitable material model
 
 *  The code implements what we refer to as "rank-1" and "rank-2" formulations:
 
    *  *rank-1*:
 
-      * the discrete state and forcing term are stored as 1D arrays
+      * discrete state and forcing are stored as 1D arrays
 
-      * this is used to simulate the wave dynamics due to a *single forcing term*
+      * this is useful to simulate the wave dynamics due to a *single forcing term*
+
+      * :doc:`See the demo! <demo1>`
 
    *  *rank-2*:
 
-      * the discrete state and forcing term are stored
-	using rank-2 tensors (i.e. matrices)
+      * discrete state and forcing are stored using rank-2 tensors (i.e. matrices)
 
       * this is useful to *simultaneously* solve the wave
 	dynamics for *multiple forcing realizations* (e.g. multiple
@@ -90,6 +90,7 @@ Highlights and features
 	it has higher computational intensity, thus benefiting
 	efficient ensemble propagation
 
+      * :doc:`See the demo! <demo3>`
 
 How to cite
 -----------
@@ -122,6 +123,7 @@ Contents
     build_stepbystep
     inputfile
     demos
+    performance
     GitHub Repo <https://github.com/Pressio/SHAW>
     Open an issue/feature req. <https://github.com/Pressio/SHAW/issues>
     license
diff --git a/docs/src/performance.rst b/docs/src/performance.rst
new file mode 100644
index 00000000..53929573
--- /dev/null
+++ b/docs/src/performance.rst
@@ -0,0 +1,26 @@
+
+Performance
+===========
+
+The following plot shows performance results obtained on a workstation
+with two 18-core Intel(R) Xeon(R) Gold 6154 CPU @ 3.00 GHz,
+each with a 24.75MB L3 cache and 125GB total memory.
+We enable hyperthreading, thus supporting a maximum of 36 logical threads per CPU,
+so a total of 72 threads. We use GCC-8.3.1 and rely on kokkos
+and kokkos-kernels version 3.1.01.
+We use Blis-0.7.0 as the kokkos-kernels’ backend for all dense operations.
+We use the OpenMP backend for Kokkos.
+
+|
+
+.. figure:: ../img/fom_cpu_ave.png
+   :align: center
+   :width: 95%
+
+   M represents how many trajectories we are computing simultaneously:
+   when M=1, this what we refer to as rank-1 formulation,
+   while M>=2 corresponds to what we refer to as rank-2 formulation;
+   N is the *total* number of dofs (velocities plus stresses) for the problem.
+
+
+todo: put link to script to run performance test
diff --git a/python_scripts/constants.py b/python_scripts_revise/constants.py
similarity index 100%
rename from python_scripts/constants.py
rename to python_scripts_revise/constants.py
diff --git a/python_scripts/create_run_directory.py b/python_scripts_revise/create_run_directory.py
similarity index 100%
rename from python_scripts/create_run_directory.py
rename to python_scripts_revise/create_run_directory.py
diff --git a/python_scripts/log_file_extractor.py b/python_scripts_revise/log_file_extractor.py
similarity index 100%
rename from python_scripts/log_file_extractor.py
rename to python_scripts_revise/log_file_extractor.py
diff --git a/python_scripts_revise/plot_fom_scaling.py b/python_scripts_revise/plot_fom_scaling.py
new file mode 100644
index 00000000..96e3316f
--- /dev/null
+++ b/python_scripts_revise/plot_fom_scaling.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+
+import sys, os, time, yaml
+import pprint as pp
+import subprocess, math
+import numpy as np
+import os.path, re
+from scipy import stats
+from argparse import ArgumentParser
+import matplotlib.pyplot as plt
+
+np.set_printoptions(edgeitems=10, linewidth=100000)
+
+# peak BW for mrstem (GB) = 21333.33 * 4 at time of doing these runs
+peakMemBW = 85.
+
+myAlpha = .9
+
+# num of dofs for each case for plotting
+#785152, 3143168, 12577792, 50321408
+meshLabelsPlot = [r'$0.78~\cdot~10^6$', 
+                  r'$3~\cdot~10^6$', 
+                  r'$12~\cdot~10^6$', 
+                  r'$50~\cdot~10^6$']
+
+nThreads = [2, 8, 36]
+colors = {2:'#009286', 8:'#ff9e11', 36:'#cd4d84'}
+
+fSizes = [1,4,16,48]
+
+#=====================================================================
+def computeMetricValue(lineData, currValueF, metric, stat):
+  if metric == "mem":
+    if stat == "ave": return lineData[4]
+    elif stat=="min": return lineData[5]
+    elif stat=="max": return lineData[6]
+
+  elif metric == "cpu":
+    if stat == "ave": return lineData[7]
+    elif stat=="min": return lineData[8]
+    elif stat=="max": return lineData[9]
+
+  elif metric == "itertime":
+    if stat == "ave": return lineData[10]
+    elif stat=="min": return lineData[11]
+    elif stat=="max": return lineData[12]
+
+  elif metric == "looptime":
+    return lineData[13]
+
+#=====================================================================
+def createDataDic(data, metric, stat):
+  all = {}
+  for nt in nThreads:
+    dic = {}
+    for i in range(data.shape[0]):
+      # number of threads and number of modes
+      thisNumThr   = int(data[i][0])
+      thisValF     = int(data[i][1])
+
+      if thisNumThr == nt and thisValF in fSizes:
+          value = computeMetricValue(data[i,:], thisValF, metric, stat)
+          if thisValF in dic: dic[thisValF].append(value)
+          else: dic[thisValF] = [value]
+    all[nt] = dic
+
+  return all
+
+#=====================================================================
+def plotBarSet(ax, xLoc, width, f, dic, myColor):
+  val = dic[f]
+  ax.bar(xLoc, val, width, alpha=myAlpha, color=myColor, edgecolor='none', zorder=5)
+
+#=====================================================================
+def plotBar(dataDic, meshLabels, nThreads, metric, stat):
+  # number of mesh sizes to deal with
+  numMeshes = len(meshLabels)
+  # Setting the positions and width for the bars
+  posArray = range(numMeshes)
+  pos = list(posArray)
+
+  width = 0.45
+  plt.rc('axes', axisbelow=True)
+
+  fig, ax = plt.subplots(figsize=(9,6))
+  plt.grid()
+  ax2 = ax.twiny()
+  fig.subplots_adjust(bottom=0.25)
+
+  gigi = [0.25, 6.5, 12.75, 19.]
+
+  xTicksBars, xTlabels = [], []
+  count=0
+  for k,v in dataDic.items():
+    for i,f in enumerate(fSizes):
+      #x locations for the bars
+      shift = width*i*3.5
+
+      xLoc = [p+shift+0.455*count+gigi[k] for k,p in enumerate(pos)]
+
+      plotBarSet(ax, xLoc, width, f, v, colors[k])
+      xTicksBars += [p+shift+0.475+gigi[k] for k,p in enumerate(pos)]
+      xTlabels += [str(f) for i in range(numMeshes)]
+    count+=1
+
+  for nt in nThreads:
+    ax.bar(100, 1, width, alpha=myAlpha, color=colors[nt],
+           edgecolor='none', zorder=-1, label='threads='+str(nt))
+
+  l = ax.legend(loc="upper center", ncol=5, fontsize=13, frameon=False)
+  for text in l.get_texts():
+    text.set_color("gray")
+
+  # remove the vertical lines of the grid
+  ax.xaxis.grid(which="major", color='None', linestyle='-.', linewidth=0, zorder=0)
+
+  ax.xaxis.set_ticks_position('bottom')
+  ax.xaxis.set_label_position('bottom')
+  ax.set_xticks(xTicksBars)
+  ax.set_xticklabels(xTlabels, fontsize=15, color='gray')
+  ax.xaxis.set_tick_params(rotation=0)
+
+  ax.set_xlabel('Number of simultaneous trajectories (M)', fontsize=16, color='gray')
+  ax.set_xlim(min(pos)-0.2, max(pos)+width*56)
+
+  if metric =="mem":
+    ax.set_yscale('log')
+    ax.set_ylabel("Memory Bandwith (GB/s)", fontsize=18)
+    ax.set_ylim([1e-1, 1000])
+    ax.set_yticks([1e-1, 1, 10, 100, 1000])
+    ax.tick_params(axis='y', which='major', labelsize=15, color='gray')
+    ax.tick_params(axis='y', which='minor', labelsize=13, color='gray')
+
+    # # plot peak theoretical mem BW
+    # ax.plot([min(pos)-0.2, max(pos)+width*70],
+    #         [peakMemBW, peakMemBW], '--k', linewidth=1.2, zorder=7)
+    # ax.text((min(pos)+width+max(pos)+width*75)*0.45,
+    #         peakMemBW+12, 'Machine\'s theoretical peak', fontsize=15)
+
+  elif metric=='cpu':
+    ax.set_yscale('log')
+    ax.set_ylabel("GFlops", fontsize=18, color='gray')
+    ax.set_ylim([1e-1, 1e4])
+    ax.set_yticks([1e-1, 1, 10, 1e2, 1e3, 1e4])
+    ax.tick_params(axis='y', which='major', labelsize=15, color='gray')
+    ax.tick_params(axis='y', which='minor', labelsize=13, color='gray')
+
+  elif metric =="itertime":
+    ax.set_yscale('log')
+    ax.set_ylim([1e-1, 1e4])
+    ax.tick_params(axis='y', which='major', labelsize=15, color='gray')
+    ax.tick_params(axis='y', which='minor', labelsize=13, color='gray')
+    if stat == 'ave': pref = 'Average'
+    elif stat=='min': pref = 'Min'
+    elif stat=='max': pref = 'Max'
+    ax.set_ylabel(pref+" time (ms)/timestep", fontsize=18, color='gray')
+
+
+  # ticks for the meshes
+  meshTicks = [3.5, 11., 18.75, 26.35]
+  ax2.set_xticks(meshTicks)
+  ax2.xaxis.set_ticks_position('bottom')
+  ax2.xaxis.set_label_position('bottom')
+  ax2.spines['bottom'].set_position(('outward', 65))
+  ax2.set_xlabel('Total degrees of freedom (N)', fontsize=16, color='gray')
+  ax2.set_xticklabels(meshLabels, fontsize=16, color='gray')
+  ax2.set_xlim(min(pos), max(pos)+width*60)
+  ax2.set_axisbelow(True)
+
+  ax.tick_params(axis='y', colors='gray')
+
+  ax.xaxis.label.set_color('gray')
+  ax.yaxis.label.set_color('gray')
+  ax2.xaxis.label.set_color('gray')
+  ax2.yaxis.label.set_color('gray')
+
+  plt.tight_layout()
+  fileName = "fom_"+metric+"_"+stat+".png"
+  fig.savefig('./plots/'+fileName, format="png", bbox_inches='tight', 
+    dpi=300, transparent=True)
+  plt.show()
+
+#=====================================================================
+def main(dataFile, metric, stat):
+  data = np.loadtxt(dataFile)
+  dataDic = createDataDic(data, metric, stat)
+  #print(dataDic)
+  pp.pprint(dataDic)
+
+  plotBar(dataDic, meshLabelsPlot, nThreads, metric, stat)
+  plt.show()
+
+
+#////////////////////////////////////////////
+if __name__== "__main__":
+#////////////////////////////////////////////
+  parser = ArgumentParser()
+  # parser.add_argument("-file", "--file",
+  #                     dest="dataFile",
+  #                     help="where to get data from\n")
+
+  parser.add_argument("-metric", "--metric",
+                      dest="metric", default="mem",
+                      help="Choices: mem, cpu, itertime \n")
+
+  parser.add_argument("-stat", "--stat",
+                      dest="stat", default="ave",
+                      help="ave, min or max\n")
+
+  args = parser.parse_args()
+
+  assert(args.metric in ['mem', 'cpu', 'itertime'])
+  main('./data/fom_scaling_final.txt', args.metric, args.stat)
+
+#////////////////////////////////////////////
diff --git a/python_scripts/unused/memory_constraints.py b/python_scripts_revise/unused/memory_constraints.py
similarity index 100%
rename from python_scripts/unused/memory_constraints.py
rename to python_scripts_revise/unused/memory_constraints.py
diff --git a/python_scripts/utils.py b/python_scripts_revise/utils.py
similarity index 100%
rename from python_scripts/utils.py
rename to python_scripts_revise/utils.py
diff --git a/python_scripts/wf_scaling.py b/python_scripts_revise/wf_fom_scaling.py
similarity index 99%
rename from python_scripts/wf_scaling.py
rename to python_scripts_revise/wf_fom_scaling.py
index bcf6fbae..64f3b1a8 100755
--- a/python_scripts/wf_scaling.py
+++ b/python_scripts_revise/wf_fom_scaling.py
@@ -15,7 +15,9 @@
 def createBaseDic():
   baseDic = {
     'general' :
-    {'meshDir': "empty", 'dt': 0.05, 'finalTime': 50.,
+    {'meshDir': "empty", 
+     'dt': 0.05, 
+     'finalTime': 50.,
      'checkNumericalDispersion': True,
      'checkCfl': True,
      'includeMatPropInJacobian': True,