diff --git a/CMakeLists.txt b/CMakeLists.txt index e34c2051..4fcb09ef 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,21 +26,26 @@ include_directories(${YAML_CPP_INCLUDE_DIR}) find_package(KokkosKernels REQUIRED) link_libraries(dl ${YAML_CPP_LIBRARIES} Kokkos::kokkoskernels) -# Kokkos::BLAS Kokkos::LAPACK) -set(exeName1 shawExe) -add_executable(${exeName1} +# executables +add_executable( + shawExe ${CMAKE_CURRENT_SOURCE_DIR}/src/kokkos/main_fom.cc) -set(exeName2 extractStateFromSnaps) -add_executable(${exeName2} +add_executable( + extractStateFromSnaps ${CMAKE_CURRENT_SOURCE_DIR}/src/tools/main_extract_state_from_snaps.cc) +# tests enable_testing() add_subdirectory(tests) +# --------------------- +# REMOVE at some point +# --------------------- +# Kokkos::BLAS Kokkos::LAPACK) #include_directories(${EIGEN_INCLUDE_DIR} ${YAMLCPP_INCLUDE_DIR} ${KOKKOS_INCLUDE_DIR}) #link_directories (${BLAS_LIB_DIR} ${YAMLCPP_LIB_DIR}) #link_libraries (dl ${OMPLINK} yaml-cpp Kokkos::kokkoskernels ${BLASLIBNAME} gfortran) diff --git a/demos/fom_rank1/input.yaml b/demos/demo1/input.yaml similarity index 100% rename from demos/fom_rank1/input.yaml rename to demos/demo1/input.yaml diff --git a/demos/fom_rank1/plotSeismogram.py b/demos/demo1/plotSeismogram.py similarity index 90% rename from demos/fom_rank1/plotSeismogram.py rename to demos/demo1/plotSeismogram.py index 6b171fb5..e7c8f8bf 100644 --- a/demos/fom_rank1/plotSeismogram.py +++ b/demos/demo1/plotSeismogram.py @@ -8,19 +8,19 @@ def doPlot(panelId, t, data, key): plt.subplot(panelId) plt.grid('on') - plt.plot(t, data[key], '-o', color='m', + plt.plot(t, data[key], '-o', color='r', markerfacecolor='none', - markersize=3, linewidth=1, + markersize=0, linewidth=2, label="Receiver at " + key+'\u00b0') lg = plt.legend(loc="upper right", ncol=1, fontsize=15, labelspacing=.3, handletextpad=0.2, frameon=False, markerscale=0.75) - plt.setp(lg.get_texts(), color='w') + plt.setp(lg.get_texts(), color='gray') plt.xlim([-50, 2050]) - plt.xticks(np.linspace(0, 2000, 6), color='w') + plt.xticks(np.linspace(0, 2000, 6), color='gray') plt.ylim([-1.6e-6, 1.6e-6]) ylab = r'$v_{\phi}(t)$' @@ -28,7 +28,7 @@ def doPlot(panelId, t, data, key): plt.xlabel(r'Time (seconds)', fontsize=15) ax = plt.gca() - mycolor = 'w' + mycolor = 'gray' ax.xaxis.label.set_color(mycolor); ax.tick_params(axis='x', colors=mycolor) ax.yaxis.label.set_color(mycolor); diff --git a/demos/fom_rank1/plotWavefield.py b/demos/demo1/plotWavefield.py similarity index 100% rename from demos/fom_rank1/plotWavefield.py rename to demos/demo1/plotWavefield.py diff --git a/demos/fom_rank1_sample_depth/input.yaml b/demos/demo2/input.yaml similarity index 100% rename from demos/fom_rank1_sample_depth/input.yaml rename to demos/demo2/input.yaml diff --git a/demos/fom_rank1_sample_depth/plotSeismogram.py b/demos/demo2/plotSeismogram.py similarity index 88% rename from demos/fom_rank1_sample_depth/plotSeismogram.py rename to demos/demo2/plotSeismogram.py index eb8b6112..5fb3a3b5 100644 --- a/demos/fom_rank1_sample_depth/plotSeismogram.py +++ b/demos/demo2/plotSeismogram.py @@ -24,36 +24,36 @@ def doPlot(panelId, t, data, angle, depths): d3 = data['d3'][row, :] plt.title("Seismogram for receiver at " + angle+'\u00b0', - fontsize=15, color='w') + fontsize=15, color='gray') plt.plot(t, d0, '-o', color='m', markerfacecolor='none', - markersize=1, linewidth=1.8, + markersize=0, linewidth=1.8, label='With source depth='+depths[0]+' km') plt.plot(t, d1, '-s', color='c', markerfacecolor='none', - markersize=1, linewidth=1.8, + markersize=0, linewidth=1.8, label='With source depth='+depths[1]+' km') plt.plot(t, d2, '-*', color='r', markerfacecolor='none', - markersize=1, linewidth=1.8, + markersize=0, linewidth=1.8, label='With source depth='+depths[2]+' km') plt.plot(t, d3, '-v', color='y', markerfacecolor='none', - markersize=1, linewidth=1.8, + markersize=0, linewidth=1.8, label='With source depth='+depths[3]+' km') lg = plt.legend(loc="upper right", ncol=1, fontsize=12, labelspacing=.3, handletextpad=0.2, frameon=False, markerscale=0.75) - plt.setp(lg.get_texts(), color='w') + plt.setp(lg.get_texts(), color='gray') plt.xlim([-50, 2050]) - plt.xticks(np.linspace(0, 2000, 6), color='w') + plt.xticks(np.linspace(0, 2000, 6), color='gray') plt.ylim([-2.5e-6, 2.5e-6]) ylab = r'$v_{\phi}(t)$' @@ -61,7 +61,7 @@ def doPlot(panelId, t, data, angle, depths): plt.xlabel(r'Time (seconds)', fontsize=15) ax = plt.gca() - mycolor = 'w' + mycolor = 'gray' ax.xaxis.label.set_color(mycolor); ax.tick_params(axis='x', colors=mycolor) ax.yaxis.label.set_color(mycolor); diff --git a/demos/fom_rank1_sample_depth/plotWavefield.py b/demos/demo2/plotWavefield.py similarity index 93% rename from demos/fom_rank1_sample_depth/plotWavefield.py rename to demos/demo2/plotWavefield.py index 85395dc8..ec0bdf57 100644 --- a/demos/fom_rank1_sample_depth/plotWavefield.py +++ b/demos/demo2/plotWavefield.py @@ -35,7 +35,7 @@ def doPlot(th, r, z, figID, bd, outName, title, plotSource=False): fig1 = plt.figure(figID) ax1 = fig1.add_subplot(111, projection='polar') - h1=ax1.pcolormesh(th, r, z, cmap=cm1, shading = "flat", + h1=ax1.pcolormesh(th, r, z, cmap=cm1, shading = "auto", vmin=bd[0], vmax=bd[1], zorder=1) ax1.set_ylim([cmbRadius, earthRadius]) ax1.set_yticks([]) #[3480, 5701, 6371]) @@ -47,7 +47,7 @@ def doPlot(th, r, z, figID, bd, outName, title, plotSource=False): r'$\pi/2$', r'$2\pi/6$', r'$\pi/6$', r'$0$'], fontsize=11) - ax1.set_title(title, fontsize=15, color='w') + ax1.set_title(title, fontsize=15, color='gray') ax1.set_rorigin(-1) plotEarthSurf(ax1) plotCMB(ax1) @@ -57,9 +57,9 @@ def doPlot(th, r, z, figID, bd, outName, title, plotSource=False): if plotSource: sourceRadius = earthRadius-640. #[km] c = ax1.scatter(np.pi/2.01, sourceRadius, c='r', s=15) - ax1.text(np.pi/2.01, sourceRadius, "Source", horizontalalignment='center', verticalalignment='top', color='w') + ax1.text(np.pi/2.01, sourceRadius, "Source", horizontalalignment='center', verticalalignment='top', color='gray') - mycolor = 'w' + mycolor = 'gray' ax1.xaxis.label.set_color(mycolor); ax1.tick_params(axis='x', colors=mycolor) ax1.yaxis.label.set_color(mycolor); diff --git a/demos/fom_rank2_sample_depth/input.yaml b/demos/demo3/input.yaml similarity index 100% rename from demos/fom_rank2_sample_depth/input.yaml rename to demos/demo3/input.yaml diff --git a/demos/fom_rank2_sample_depth/plotSeismogram.py b/demos/demo3/plotSeismogram.py similarity index 95% rename from demos/fom_rank2_sample_depth/plotSeismogram.py rename to demos/demo3/plotSeismogram.py index 0533ddda..2d7781be 100644 --- a/demos/fom_rank2_sample_depth/plotSeismogram.py +++ b/demos/demo3/plotSeismogram.py @@ -22,7 +22,7 @@ def doPlot(panelId, t, data, angle, depths): d3 = data['d3'][row, :] plt.title("Seismogram for receiver at " + angle+'\u00b0', - fontsize=15, color='w') + fontsize=15, color='gray') plt.plot(t, d0, '-o', color='m', markerfacecolor='none', @@ -48,10 +48,10 @@ def doPlot(panelId, t, data, angle, depths): ncol=1, fontsize=12, labelspacing=.3, handletextpad=0.2, frameon=False, markerscale=0.75) - plt.setp(lg.get_texts(), color='w') + plt.setp(lg.get_texts(), color='gray') plt.xlim([-50, 2050]) - plt.xticks(np.linspace(0, 2000, 6), color='w') + plt.xticks(np.linspace(0, 2000, 6), color='gray') plt.ylim([-2.5e-6, 2.5e-6]) ylab = r'$v_{\phi}(t)$' @@ -59,7 +59,7 @@ def doPlot(panelId, t, data, angle, depths): plt.xlabel(r'Time (seconds)', fontsize=15) ax = plt.gca() - mycolor = 'w' + mycolor = 'gray' ax.xaxis.label.set_color(mycolor); ax.tick_params(axis='x', colors=mycolor) ax.yaxis.label.set_color(mycolor); diff --git a/docs/.doctrees/build_expert.doctree b/docs/.doctrees/build_expert.doctree index 1d7082d7..00c628cb 100644 Binary files a/docs/.doctrees/build_expert.doctree and b/docs/.doctrees/build_expert.doctree differ diff --git a/docs/.doctrees/build_stepbystep.doctree b/docs/.doctrees/build_stepbystep.doctree index 372f4990..b74d0b0d 100644 Binary files a/docs/.doctrees/build_stepbystep.doctree and b/docs/.doctrees/build_stepbystep.doctree differ diff --git a/docs/.doctrees/demo1.doctree b/docs/.doctrees/demo1.doctree new file mode 100644 index 00000000..236f18a2 Binary files /dev/null and b/docs/.doctrees/demo1.doctree differ diff --git a/docs/.doctrees/demo2.doctree b/docs/.doctrees/demo2.doctree new file mode 100644 index 00000000..7ec46dd0 Binary files /dev/null and b/docs/.doctrees/demo2.doctree differ diff --git a/docs/.doctrees/demo3.doctree b/docs/.doctrees/demo3.doctree new file mode 100644 index 00000000..857cedcd Binary files /dev/null and b/docs/.doctrees/demo3.doctree differ diff --git a/docs/.doctrees/demo_rank1fom.doctree b/docs/.doctrees/demo_rank1fom.doctree index dd3a48bb..2653b94f 100644 Binary files a/docs/.doctrees/demo_rank1fom.doctree and b/docs/.doctrees/demo_rank1fom.doctree differ diff --git a/docs/.doctrees/demo_rank1fommulti.doctree b/docs/.doctrees/demo_rank1fommulti.doctree index 56990079..2054867b 100644 Binary files a/docs/.doctrees/demo_rank1fommulti.doctree and b/docs/.doctrees/demo_rank1fommulti.doctree differ diff --git a/docs/.doctrees/demo_rank2fom.doctree b/docs/.doctrees/demo_rank2fom.doctree index d78ec261..ffa05e37 100644 Binary files a/docs/.doctrees/demo_rank2fom.doctree and b/docs/.doctrees/demo_rank2fom.doctree differ diff --git a/docs/.doctrees/demos.doctree b/docs/.doctrees/demos.doctree index c1d1055c..e2aa5885 100644 Binary files a/docs/.doctrees/demos.doctree and b/docs/.doctrees/demos.doctree differ diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle index a29fc153..4ac7e0a3 100644 Binary files a/docs/.doctrees/environment.pickle and b/docs/.doctrees/environment.pickle differ diff --git a/docs/.doctrees/goveq.doctree b/docs/.doctrees/goveq.doctree index 8548c276..4c51f9e8 100644 Binary files a/docs/.doctrees/goveq.doctree and b/docs/.doctrees/goveq.doctree differ diff --git a/docs/.doctrees/index.doctree b/docs/.doctrees/index.doctree index 1a24cfe1..3952622e 100644 Binary files a/docs/.doctrees/index.doctree and b/docs/.doctrees/index.doctree differ diff --git a/docs/.doctrees/performance.doctree b/docs/.doctrees/performance.doctree new file mode 100644 index 00000000..e33b72ef Binary files /dev/null and b/docs/.doctrees/performance.doctree differ diff --git a/docs/_images/demo1_f1.png b/docs/_images/demo1_f1.png index aff98f30..4f1db28a 100644 Binary files a/docs/_images/demo1_f1.png and b/docs/_images/demo1_f1.png differ diff --git a/docs/_images/demo1_f2.png b/docs/_images/demo1_f2.png index 4f97830e..589733df 100644 Binary files a/docs/_images/demo1_f2.png and b/docs/_images/demo1_f2.png differ diff --git a/docs/_images/demo1_f3.png b/docs/_images/demo1_f3.png index 89a2c759..f08bd1d8 100644 Binary files a/docs/_images/demo1_f3.png and b/docs/_images/demo1_f3.png differ diff --git a/docs/_images/demo1_f4.png b/docs/_images/demo1_f4.png index 99943b4b..efd09468 100644 Binary files a/docs/_images/demo1_f4.png and b/docs/_images/demo1_f4.png differ diff --git a/docs/_images/demo2_f1.png b/docs/_images/demo2_f1.png index 6b5a0f75..3712e5b7 100644 Binary files a/docs/_images/demo2_f1.png and b/docs/_images/demo2_f1.png differ diff --git a/docs/_images/demo2_f2.png b/docs/_images/demo2_f2.png index 8c4f32b5..d46e710b 100644 Binary files a/docs/_images/demo2_f2.png and b/docs/_images/demo2_f2.png differ diff --git a/docs/_images/demo2_f3.png b/docs/_images/demo2_f3.png index a8ec7d43..baef6aa5 100644 Binary files a/docs/_images/demo2_f3.png and b/docs/_images/demo2_f3.png differ diff --git a/docs/_images/demo3_f1.png b/docs/_images/demo3_f1.png index eb24f6d1..2a2efe60 100644 Binary files a/docs/_images/demo3_f1.png and b/docs/_images/demo3_f1.png differ diff --git a/docs/_images/fom_cpu_ave.png b/docs/_images/fom_cpu_ave.png new file mode 100644 index 00000000..6f5b740b Binary files /dev/null and b/docs/_images/fom_cpu_ave.png differ diff --git a/docs/_sources/build_expert.rst.txt b/docs/_sources/build_expert.rst.txt index 8852ea28..5d37521b 100644 --- a/docs/_sources/build_expert.rst.txt +++ b/docs/_sources/build_expert.rst.txt @@ -1,11 +1,12 @@ Building: "expert" mode ======================= -You need: +Prerequisites +------------- * This repo: ``git clone https://github.com/Pressio/SHAW`` -* C++14 compiler: we have tested this with GCC 8.3.1 and GCC 8.4.0 +* C++14 compiler: we have tested this with GCC 8.3.1, GCC 8.4.0, GCC 10.2.0. * ``CMake>=3.16.0`` @@ -17,20 +18,19 @@ You need: * `yaml-cpp `_: last tested version ``0.7.0`` -Then, as usual for a typical CMake project, you can do: +Build +----- .. code-block:: shell - export CXX= - export SHAWDIR= - - mkdir build && cd build - cmake \ - -DKokkosKernels_DIR=/lib/cmake/KokkosKernels/ \ - -Dyaml-cpp_DIR=/share/cmake/ \ - ${SHAWDIR} + -DCMAKE_CXX_COMPILER= \ + -DKokkosKernels_DIR=/lib/cmake/KokkosKernels/ \ + -Dyaml-cpp_DIR=/share/cmake/ \ + -B \ + -S + # from within your build dir make -j4 # running the tests is advised diff --git a/docs/_sources/build_stepbystep.rst.txt b/docs/_sources/build_stepbystep.rst.txt index 3e081255..aa66dceb 100644 --- a/docs/_sources/build_stepbystep.rst.txt +++ b/docs/_sources/build_stepbystep.rst.txt @@ -5,8 +5,8 @@ If you are reading this page, it likely is because you want a simplified (automated) way the get this done, so that you can minimize the extra effort in building the TPLs needed and the SHAW code. -This page tries to address this: it provides a step-by-step -guide that leverages some scripts we have prepared to simplify this. +This page tries to do so providing a step-by-step +guide and some scripts we have prepared. Prerequisites @@ -28,22 +28,19 @@ Prerequisites Step 1: Prepare environment ---------------------------- -Let's make things easy: - .. code-block:: shell - export CXX= - export SHAWDIR= + export CXX= + export SHAWDIR= export WORKDIR=${HOME}/myFirstShawBuild mkdir -p ${WORKDIR} Step 2: Build TPLs --------------------------------- +------------------ -To simplify this part, we have prepared script that -automates getting the TPLs: +We have prepared a script that automates this: .. code-block:: shell @@ -51,12 +48,12 @@ automates getting the TPLs: bash build_tpls.sh ${WORKDIR} openmp This script will fetch, build and install inside ``WORKDIR/tpls`` -all TPLs needed: Kokkos-core, Kokkos-kernelas and yaml-cpp. +all TPLs needed: Kokkos-core, Kokkos-kernels and yaml-cpp. .. Attention:: - This will build Kokkos for host-only use with the OpenMP backend - but **without** any architecture specifications. This is on purpose, + This builds Kokkos with only the OpenMP backend and **without** + any architecture specifications. This is on purpose, because this step is meant to be as generic and simple as possible to get you started quickly. If you want to customize things, read more on the `Kokkos github `_. @@ -90,14 +87,16 @@ Step 3: Build SHAW .. code-block:: shell cd ${WORKDIR} - mkdir shaw-build && cd shaw-build + # note that here there is not need to specify compiler because + # cmake will automatically pick the up the env var CXX tha we + # already set above in step 1 cmake \ -DKokkosKernels_DIR=${WORKDIR}/tpls/kokkos-kernels-install/lib/cmake/KokkosKernels/ \ -Dyaml-cpp_DIR=${WORKDIR}/tpls/yamlcpp-install/share/cmake/ \ - ${SHAWDIR} + -B ${WORKDIR}/shaw-build \ + -S ${SHAWDIR} + cd ${WORKDIR}/shaw-build make -j4 - - # running the SHAW tests is advised ctest diff --git a/docs/_sources/demo1.rst.txt b/docs/_sources/demo1.rst.txt new file mode 100644 index 00000000..6716106d --- /dev/null +++ b/docs/_sources/demo1.rst.txt @@ -0,0 +1,143 @@ +Demo 1 +====== + +.. admonition:: Description: + + This demo simulates the wave dynamic + for a single forcing using the PREM Earth's model. + + +1. Prepare +---------- + +.. code-block:: bash + + export SHAWDIR= + export EXEDIR= + + # create a dir to run the demo + export MYRUNDIR=${HOME}/myFirstDemo + mkdir -p ${MYRUNDIR} + +.. Important:: + You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`. + +| + +2. Generate the mesh +-------------------- + +We use a grid of ``200`` x ``1000`` velocity points +along the radial and polar directions, respectively. + +To generate the mesh files proceed as follows: + +.. code-block:: bash + + cd ${SHAWDIR}/meshing + python create_single_mesh.py -nr 200 -nth 1000 -working-dir ${MYRUNDIR} + +Note that the grid generator script only needs the velocity points +because the stress points are defined automatically +based on the :ref:`staggered scheme `. + +After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` directory containing: + +.. code-block:: bash + + . + ├── [4.5M] coeff_vp.dat + ├── [ 28M] graph_sp.dat + ├── [ 16M] graph_vp.dat + └── [ 231] mesh_info.dat + +| + +3. Input file +------------- + +We use the following input file (:doc:`learn more about input file `): + +.. literalinclude :: ../../demos/demo1/input.yaml + :language: yaml + +which we have ready for you to copy as: + +.. code-block:: bash + + cp ${SHAWDIR}/demos/demo1/input.yaml ${MYRUNDIR} + +| + +4. Run the simulation +--------------------- + + +.. code-block:: bash + + cd ${MYRUNDIR} + + # soft link the executable + ln -s ${EXEDIR}/shawExe . + + # if you use OpenMP build, remember to set + # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread + ./shawExe input.yaml + +| + +5. Post-process data +-------------------- + +The demo should generate inside ``${MYRUNDIR}`` the following: + +.. code-block:: bash + + coords_sp.txt #: coordinates of the velocity grid points + coords_vp.txt #: coordinates of the stresses grid points + seismogram_0 #: seismogram at the receiver locations set in input.yaml + snaps_vp_0 #: snapshot matrix for the velocity + snaps_sp_0 #: snapshot matrix for the stresses + + +We created Python scripts for this: + +.. code-block:: bash + + cp ${SHAWDIR}/demos/demo1/*.py ${MYRUNDIR} + + +First, the seismogram data: + +.. code-block:: bash + + cd ${MYRUNDIR} + python plotSeismogram.py + + +.. image:: ../img/demo1_f1.png + + +Then, contour plots of the velocity field at ``t=250, 1000, 2000`` (seconds): + +.. code-block:: bash + + cd ${MYRUNDIR} + ln -s ${EXEDIR}/extractStateFromSnaps . + + # extract from the velocity snapshots the velocity field at specific timesteps: + # since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``, + # correspond to *time steps* 1000, 4000, 8000 + ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \ + --outformat=ascii --timesteps=1000 4000 8000 \ + --samplingfreq=100 --outfileappend=vp + + python plotWavefield.py + + +.. image:: ../img/demo1_f2.png + :width: 30% +.. image:: ../img/demo1_f3.png + :width: 28% +.. image:: ../img/demo1_f4.png + :width: 28% diff --git a/docs/src/demo_rank1fommulti.rst b/docs/_sources/demo2.rst.txt similarity index 69% rename from docs/src/demo_rank1fommulti.rst rename to docs/_sources/demo2.rst.txt index b6a9282b..c8ef9681 100644 --- a/docs/src/demo_rank1fommulti.rst +++ b/docs/_sources/demo2.rst.txt @@ -1,27 +1,31 @@ -(2): Multi-forcing via rank-1 -============================= +Demo 2 +====== -Before you start +.. admonition:: Description: -Below we assume you already completed one of the builds described in the build tab. -For simplicity, look at the serial step-by-step build guide. + This demo simulates the wave dynamic for multiple + forcings using the rank-1 formulation and the PREM Earth's model. -Prepare environment -------------------- +1. Prepare +---------- .. code-block:: bash - export ESWSRCDIR= - export SHAWEXEDIR= + export SHAWDIR= + export EXEDIR= # create a dir to run the demo export MYRUNDIR=${HOME}/mySecondDemo mkdir -p ${MYRUNDIR} +.. Important:: + You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`. + +| -Generating the mesh -------------------- +2. Generate the mesh +-------------------- For this demo, we use a grid of ``256`` x ``1024`` velocity points along the radial and polar directions, respectively. @@ -29,7 +33,7 @@ To generate the mesh files proceed as follows: .. code-block:: bash - cd ${ESWSRCDIR}/meshing + cd ${SHAWDIR}/meshing python create_single_mesh.py -nr 256 -nth 1024 -working-dir ${MYRUNDIR} @@ -43,60 +47,31 @@ After generating the grid, you should have a ``${MYRUNDIR}/mesh256x1024`` direct ├── [ 21M] graph_vp.dat └── [ 231] mesh_info.dat +| -Input file ----------- - -We use the following input file (`learn more about input file <{filename}/inputfile.rst>`_): - -.. code-block:: yaml - - general: - meshDir: ./mesh256x1024 - dt: 0.25 - finalTime: 2000.0 - checkNumericalDispersion: true - checkCfl: true +3. Input file +------------- - io: - snapshotMatrix: - binary: true - velocity: {freq: 100, fileName: snaps_vp} - stress: {freq: 100, fileName: snaps_sp} +We use the following input file (:doc:`learn more about input file `): - seismogram: - binary: false - freq: 4 - receivers: [5,30,55,80,105,130,155,175] +.. literalinclude :: ../../demos/demo2/input.yaml + :language: yaml - source: - signal: - kind: ricker - - # here we pass a list of depths to use as samples - # this will automatically activate sampling - depth: [240.,440.,540.,700.] - - period: 65.0 - delay: 180.0 - - material: - kind: prem - -You can get the input file as: +which we have ready for you to copy as: .. code-block:: bash - cp ${ESWSRCDIR}/demos/fom_rank1_sample_depth/input.yaml ${MYRUNDIR} + cp ${SHAWDIR}/demos/demo2/input.yaml ${MYRUNDIR} +| -Run the simulation ------------------- +4. Run the simulation +--------------------- .. code-block:: bash cd ${MYRUNDIR} - ln -s ${SHAWEXEDIR}/shawExe . + ln -s ${EXEDIR}/shawExe . # if you use OpenMP build, remember to set # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread @@ -109,8 +84,8 @@ and using a serial build of the code, each individual realization takes approxim of which the IO time for data collection is less than 1 second. -Simulation data ---------------- +5. Simulation data +------------------ After running the demo (have some patience because it takes some a couple minutes if you use the serial mode), you should have inside ``${MYRUNDIR}`` the following files: @@ -136,8 +111,8 @@ if you use the serial mode), you should have inside ``${MYRUNDIR}`` the followin snaps_sp_3 #: stresses snapshots for depth = 700 -Post-process data ------------------ +6. Post-process data +-------------------- To post-process the data, get the Python scripts created for this demo and visualize the seismogram: @@ -145,7 +120,7 @@ for this demo and visualize the seismogram: .. code-block:: bash cd ${MYRUNDIR} - cp ${ESWSRCDIR}/demos/fom_rank1_sample_depth/plotSeismogram.py . + cp ${SHAWDIR}/demos/demo2/plotSeismogram.py . python plotSeismogram.py @@ -158,7 +133,7 @@ for ``depth=240`` and ``depth=700`` .. code-block:: bash cd ${MYRUNDIR} - ln -s ${SHAWEXEDIR}/extractStateFromSnaps . + ln -s ${EXEDIR}/extractStateFromSnaps . # snaps_vp_0 contains snapshots for depth=240 km # extract target state and write to file appending vp_d240 to identify the case @@ -170,6 +145,7 @@ for ``depth=240`` and ``depth=700`` ./extractStateFromSnaps --snaps=./snaps_vp_3 binary --fsize=1 \ --outformat=ascii --timesteps=8000 --samplingfreq=100 --outfileappend=vp_d700 + cp ${SHAWDIR}/demos/demo2/plotWavefield.py . python plotWavefield.py And plot them below, showing as expected the largely different pattern diff --git a/docs/src/demo_rank2fom.rst b/docs/_sources/demo3.rst.txt similarity index 58% rename from docs/src/demo_rank2fom.rst rename to docs/_sources/demo3.rst.txt index 0cf06f52..51248069 100644 --- a/docs/src/demo_rank2fom.rst +++ b/docs/_sources/demo3.rst.txt @@ -1,75 +1,64 @@ -(3): Multi-forcing via rank-2 -============================= +Demo 3 +====== -Before you start +.. admonition:: Description: -Below we assume you already completed one of the builds described in the build tab. -For simplicity, look at the serial step-by-step build guide. + This demo simulates the wave dynamic for multiple + forcings using the rank-2 formulation and the PREM Earth's model. + For the sake of demonstration, this demo solves *the same problem* + described in :doc:`demo2`, except that here we use the *rank-2 formulation*, + which allows us to simulate several trajectories simultaneously. -For the sake of demonstration, this demo solves *the same problem* -described in the rank-1 demo, except that -here we use the *rank-2 formulation*, which allows us to simulate several -trajectories simultaneously. - - -Prepare environment --------------------- +1. Prepare +---------- .. code-block:: bash - export ESWSRCDIR= - export SHAWEXEDIR= + export SHAWDIR= + export EXEDIR= # create a dir to run the demo export MYRUNDIR=${HOME}/myThirdDemo mkdir -p ${MYRUNDIR} -Generating the mesh -------------------- +.. Important:: + You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`. -This is identical to the mesh used in this demo: +| + +2. Generate the mesh +-------------------- .. code-block:: bash - cd ${ESWSRCDIR}/meshing + cd ${SHAWDIR}/meshing python create_single_mesh.py -nr 256 -nth 1024 -working-dir ${MYRUNDIR} +3. Input file +------------- +We use the following input file (:doc:`learn more about input file `): -Input file ----------- +.. literalinclude :: ../../demos/demo3/input.yaml + :language: yaml -The input file is identical to `the one for the rank-1 demo <{filename}/rank1fommulti.rst>`_, -except for the addition of one line to the ``source`` section: -.. code-block:: yaml - - # - # general, io, material: as in the other demo - # - source: - signal: - # kind, depth, period, delay: same as the other one - # ... - - # forcingSize defines how many simultaneous trajectories to compute - forcingSize: 4 - -The full input file can be copied: +which we have ready for you to copy as: .. code-block:: bash - cp ${ESWSRCDIR}/demos/fom_rank2_sample_depth/input.yaml ${MYRUNDIR} + cp ${SHAWDIR}/demos/demo3/input.yaml ${MYRUNDIR} +| -Run the simulation ------------------- +4. Run the simulation +--------------------- .. code-block:: bash cd ${MYRUNDIR} - ln -s ${SHAWEXEDIR}/shawExe . + ln -s ${EXEDIR}/shawExe . ./shawExe input.yaml To give an idea of runtime, on a MacPro with 2.4 GHz 8-Core Intel Core i9 and 32 GB 2667 MHz DDR4, @@ -77,12 +66,12 @@ and using a serial build of the code, the run takes approximately 107 seconds, of which the IO time for data collection is less than 1 second. Note that this already gives a hint to the advantages of using the rank-2 formulation. In fact, while here it takes 107 seconds to simulate the four trajectories simultaneously, -in the `rank-1 version of this demo <{filename}/rank1fommulti.rst>`_ it took +in :doc:`rank-1 version of this demo ` it took about 150 seconds to simulate the same realizations. -Simulation data ---------------- +5. Simulation data +------------------ The demo should generate inside ``${MYRUNDIR}`` the following: @@ -104,8 +93,8 @@ The demo should generate inside ``${MYRUNDIR}`` the following: snaps_sp_0 #: snapshot matrix for the stresses for all realizations -Post-process data ------------------ +6. Post-process data +-------------------- To post-process the data, get the Python scripts created for this demo and visualize the seismogram: @@ -113,7 +102,7 @@ for this demo and visualize the seismogram: .. code-block:: bash cd ${MYRUNDIR} - cp ${ESWSRCDIR}/demos/fom_rank2_sample_depth/plotSeismogram.py . + cp ${SHAWDIR}/demos/demo3/plotSeismogram.py . python plotSeismogram.py Which generates a figure identical to the `seismogram plot obtained with the rank-1 <{filename}/rank1fommulti.rst>`_ diff --git a/docs/_sources/demo_rank1fom.rst.txt b/docs/_sources/demo_rank1fom.rst.txt index 323b02ca..c661fcfc 100644 --- a/docs/_sources/demo_rank1fom.rst.txt +++ b/docs/_sources/demo_rank1fom.rst.txt @@ -1,37 +1,45 @@ -(1): Single Forcing -=================== +Demo 1 +====== -Below we assume you already completed one of the builds described in the build tab. -For simplicity, look at the serial step-by-step build guide. +.. admonition:: Description: -Prepare environment -------------------- + This demo simulates the wave dynamic + for a single forcing using the PREM Earth's model. + + +1. Prepare +---------- .. code-block:: bash - export ESWSRCDIR= - export SHAWEXEDIR= + export SHAWDIR= + export EXEDIR= # create a dir to run the demo export MYRUNDIR=${HOME}/myFirstDemo mkdir -p ${MYRUNDIR} +.. Important:: + You need to have the code built to proceed, see :doc:`build_expert` or :doc:`build_stepbystep`. -Generating the mesh -------------------- +| -For this demo, we use a grid of ``200`` x ``1000`` velocity points +2. Generate the mesh +-------------------- + +We use a grid of ``200`` x ``1000`` velocity points along the radial and polar directions, respectively. + To generate the mesh files proceed as follows: .. code-block:: bash - cd ${ESWSRCDIR}/meshing + cd ${SHAWDIR}/meshing python create_single_mesh.py -nr 200 -nth 1000 -working-dir ${MYRUNDIR} Note that the grid generator script only needs the velocity points because the stress points are defined automatically -based on the `staggered scheme <{filename}/goveq.rst>`_. +based on the :ref:`staggered scheme `. After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` directory containing: @@ -43,66 +51,43 @@ After generating the grid, you should have a ``${MYRUNDIR}/mesh200x1000`` direct ├── [ 16M] graph_vp.dat └── [ 231] mesh_info.dat +| -Input file ----------- +3. Input file +------------- -We use the following input file (`learn more about input file <{filename}/inputfile.rst>`_): - -.. code-block:: yaml - - general: - # meshDir should contain the full path to the mesh directory - # as generated by the python script `meshing/create_single_mesh.py` - # we assume the input file is in the same location as mesh dir - meshDir: ./mesh200x1000 - dt: 0.25 - finalTime: 2000.0 - checkNumericalDispersion: true - checkCfl: true - - io: - snapshotMatrix: - binary: true - velocity: {freq: 100, fileName: snaps_vp} - stress: {freq: 100, fileName: snaps_sp} - seismogram: - binary: false - freq: 4 - receivers: [5,30,55,80,105,130,155,175] - - source: - signal: - kind: ricker - depth: 640.0 # km - period: 65.0 # seconds - delay: 180.0 # seconds - - material: - kind: prem - -You can get the input file as: +We use the following input file (:doc:`learn more about input file `): + +.. literalinclude :: ../../demos/fom_rank1/input.yaml + :language: yaml + +which we have ready for you to copy as: .. code-block:: bash - cp ${ESWSRCDIR}/demos/fom_rank1/input.yaml ${MYRUNDIR} + cp ${SHAWDIR}/demos/fom_rank1/input.yaml ${MYRUNDIR} + +| -Run the simulation ------------------- +4. Run the simulation +--------------------- .. code-block:: bash cd ${MYRUNDIR} - ln -s ${SHAWEXEDIR}/shawExe . + + # soft link the executable + ln -s ${EXEDIR}/shawExe . # if you use OpenMP build, remember to set # OMP_NUM_THREADS=how-many-you-want-use OMP_PLACES=threads OMP_PROC_BIND=spread ./shawExe input.yaml +| -Simulation data ---------------- +5. Post-process data +-------------------- The demo should generate inside ``${MYRUNDIR}`` the following: @@ -115,17 +100,14 @@ The demo should generate inside ``${MYRUNDIR}`` the following: snaps_sp_0 #: snapshot matrix for the stresses -Post-process data ------------------ - -To post-process the data, you can use the Python scripts created for this demo: +We created Python scripts for this: .. code-block:: bash - cp ${ESWSRCDIR}/demos/fom_rank1/*.py ${MYRUNDIR} + cp ${SHAWDIR}/demos/fom_rank1/*.py ${MYRUNDIR} -First, we visualize the seismogram data by doing: +First, the seismogram data: .. code-block:: bash @@ -136,18 +118,19 @@ First, we visualize the seismogram data by doing: .. image:: ../img/demo1_f1.png -Then, we can extract and visualize the full wavefield at ``t=250, 1000, 2000`` (seconds): +Then, contour plots of the velocity field at ``t=250, 1000, 2000`` (seconds): .. code-block:: bash cd ${MYRUNDIR} - ln -s ${SHAWEXEDIR}/extractStateFromSnaps . + ln -s ${EXEDIR}/extractStateFromSnaps . # extract from the velocity snapshots the velocity field at specific timesteps: # since we use ``dt = 0.25`` seconds, our tartgets ``t=250, 1000, 2000``, # correspond to *time steps* 1000, 4000, 8000 ./extractStateFromSnaps --snaps=./snaps_vp_0 binary --fsize=1 \ - --outformat=ascii --timesteps=1000 4000 8000 --samplingfreq=100 --outfileappend=vp + --outformat=ascii --timesteps=1000 4000 8000 \ + --samplingfreq=100 --outfileappend=vp python plotWavefield.py diff --git a/docs/_sources/demo_rank1fommulti.rst.txt b/docs/_sources/demo_rank1fommulti.rst.txt index b6a9282b..ae865100 100644 --- a/docs/_sources/demo_rank1fommulti.rst.txt +++ b/docs/_sources/demo_rank1fommulti.rst.txt @@ -1,14 +1,14 @@ -(2): Multi-forcing via rank-1 -============================= +Demo 2 +====== -Before you start +Earth, PREM, Multi-forcing (rank-1) -Below we assume you already completed one of the builds described in the build tab. -For simplicity, look at the serial step-by-step build guide. +.. Important:: + You need to have the code built to proceed wit this demo, see :doc:`build_expert` or :doc:`build_stepbystep`. -Prepare environment -------------------- +Prepare +------- .. code-block:: bash diff --git a/docs/_sources/demo_rank2fom.rst.txt b/docs/_sources/demo_rank2fom.rst.txt index 0cf06f52..16822c36 100644 --- a/docs/_sources/demo_rank2fom.rst.txt +++ b/docs/_sources/demo_rank2fom.rst.txt @@ -1,19 +1,19 @@ -(3): Multi-forcing via rank-2 -============================= +Demo 3 +====== -Before you start +Earth, Multi-forcing (rank-2) + +.. Important:: + You need to have the code built to proceed wit this demo, see :doc:`build_expert` or :doc:`build_stepbystep`. -Below we assume you already completed one of the builds described in the build tab. -For simplicity, look at the serial step-by-step build guide. For the sake of demonstration, this demo solves *the same problem* described in the rank-1 demo, except that here we use the *rank-2 formulation*, which allows us to simulate several trajectories simultaneously. - -Prepare environment --------------------- +Prepare +------- .. code-block:: bash diff --git a/docs/_sources/demos.rst.txt b/docs/_sources/demos.rst.txt index 462b35fb..967e4b84 100644 --- a/docs/_sources/demos.rst.txt +++ b/docs/_sources/demos.rst.txt @@ -6,6 +6,6 @@ End-to-end Demos .. toctree:: :maxdepth: 2 - demo_rank1fom - demo_rank1fommulti - demo_rank2fom \ No newline at end of file + demo1 + demo2 + demo3 diff --git a/docs/_sources/goveq.rst.txt b/docs/_sources/goveq.rst.txt index f1ca6d98..2a47616c 100644 --- a/docs/_sources/goveq.rst.txt +++ b/docs/_sources/goveq.rst.txt @@ -1,7 +1,7 @@ Governing equations =================== -This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain.` +This code simulates the evolution of elastic seismic shear waves in an axisymmetric domain. Assuming the target body/planet (e.g. Earth) can be approximated as a sphere, we adopt a spherical coordinate system as shown in the figure below: @@ -11,10 +11,10 @@ we adopt a spherical coordinate system as shown in the figure below: :align: center :alt: Alternative text - -In the axisymmetric approximation, one assumes that fields/quantities -do not vary along :math:`\phi`, implying that all the derivatives -with respect to :math:`\phi` can be dropped. +.. Important:: + In the axisymmetric approximation, one assumes that fields/quantities + do not vary along :math:`\phi`, implying that all the derivatives + with respect to :math:`\phi` can be dropped. With this assumption, the set of equations governing the time evolution of elastic waves in the velocity-stress formulation can be written as: @@ -44,20 +44,22 @@ where: - :math:`t` represents time -- :math:`r \in [0, r_{surface}]` is the radial distance from origin to surface of the body +- :math:`r \in [0, r_{surface}]` is the radial distance from origin to the surface of the body - :math:`\theta \in [0, \pi]` is the polar angle - :math:`\rho(r, \theta)` is the density -- :math:`v(r, \theta, t)` is the velocity (for simplicity we drop the subscript, but it is intended to be the :math:`v_{\phi}` velocity component) +- :math:`v(r, \theta, t)` is the velocity (for simplicity we drop the subscript, + but it is intended to be the :math:`v_{\phi}` velocity component) -- :math:`\sigma_{r\phi}(r, \theta, t)` and :math:`\sigma_{\theta\phi}(r, \theta, t)` are the two components of the stress tensor remaining after the axisymmetric approximation +- :math:`\sigma_{r\phi}(r, \theta, t)` and :math:`\sigma_{\theta\phi}(r, \theta, t)` + are the two components of the stress tensor remaining after the axisymmetric approximation - :math:`f(r, \theta,t)` is the forcing term -- :math:`G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)` is the shear modulus and :math:`v_s` being the shear wave velocity. - +- :math:`G(r, \theta) = v_s^2(r, \theta) \rho(r, \theta)` is the shear modulus + and :math:`v_s` being the shear wave velocity. In practice, the axisymmetric approximation means that one solves the @@ -66,8 +68,11 @@ Such a formulation is referred to as 2.5-dimensional because it involves a 2-dimensional spatial domain (a circular sector of the Earth) but models point sources with correct 3-dimensional spreading {cite}. -Note that we assume both the density and shear modulus to only depend on the spatial coordinates. +.. Note:: + We assume both the density and shear modulus to only depend on the spatial coordinates. + +.. _discretization: Discretization ============== @@ -97,7 +102,7 @@ We remark that, differently than (cite), we do not rely on ghost points to impose boundary conditions, but account for the boundary conditions directly when assembling the system matrix. -As an example, the figure below shows the grid when modeling the Earth: the computational +The figure below shows the grid when modeling the Earth: the computational domain extends from the surface to the core-mantle boundary, excluding the liquid core. .. figure:: ../img/mesh.png diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt index b21474c6..f892be39 100644 --- a/docs/_sources/index.rst.txt +++ b/docs/_sources/index.rst.txt @@ -51,9 +51,9 @@ Highlights and features * The code relies on the `Kokkos programming model `_ for performance portability -* We use the :doc:`velocity-stress formulation in an axi-symmetric domain ` +* :doc:`Velocity-stress formulation in an axi-symmetric domain ` -* We currently support the following material models: +* Support for the following material models: - :ref:`single layer model ` @@ -66,22 +66,22 @@ Highlights and features These are 1D models because they only depend on the radial distance. The modularity of the code allows one to easily add new models -* If you want to simulate the wave dynamics in another - planet/axisymmetric body, all you have to do is to create - a mesh suitable for that planet, and a suitable material model +* Simulating the dynamics in another planet/axisymmetric body is relatively easy: + you have to create a mesh suitable for that planet, and a suitable material model * The code implements what we refer to as "rank-1" and "rank-2" formulations: * *rank-1*: - * the discrete state and forcing term are stored as 1D arrays + * discrete state and forcing are stored as 1D arrays - * this is used to simulate the wave dynamics due to a *single forcing term* + * this is useful to simulate the wave dynamics due to a *single forcing term* + + * :doc:`See the demo! ` * *rank-2*: - * the discrete state and forcing term are stored - using rank-2 tensors (i.e. matrices) + * discrete state and forcing are stored using rank-2 tensors (i.e. matrices) * this is useful to *simultaneously* solve the wave dynamics for *multiple forcing realizations* (e.g. multiple @@ -90,6 +90,7 @@ Highlights and features it has higher computational intensity, thus benefiting efficient ensemble propagation + * :doc:`See the demo! ` How to cite ----------- @@ -122,6 +123,7 @@ Contents build_stepbystep inputfile demos + performance GitHub Repo Open an issue/feature req. license diff --git a/docs/_sources/performance.rst.txt b/docs/_sources/performance.rst.txt new file mode 100644 index 00000000..868b3fe0 --- /dev/null +++ b/docs/_sources/performance.rst.txt @@ -0,0 +1,23 @@ + +Performance +=========== + +The following plot shows performance results obtained on a workstation +with two 18-core Intel(R) Xeon(R) Gold 6154 CPU @ 3.00 GHz, +each with a 24.75MB L3 cache and 125GB total memory. +We enable hyperthreading, thus supporting a maximum of 36 logical threads per CPU, +so a total of 72 threads. We use GCC-8.3.1 and rely on kokkos +and kokkos-kernels version 3.1.01. +We use Blis-0.7.0 as the kokkos-kernels’ backend for all dense operations. +We use the OpenMP backend for Kokkos. + +| + +.. figure:: ../img/fom_cpu_ave.png + :align: center + :width: 95% + + M represents how many trajectories we are computing simultaneously: + when M=1, this what we refer to as rank-1 formulation, + while M>=2 corresponds to what we refer to as rank-2 formulation; + N is the *total* number of dofs (velocities plus stresses) for the problem. diff --git a/docs/build_expert.html b/docs/build_expert.html index ddb2d871..44d34634 100644 --- a/docs/build_expert.html +++ b/docs/build_expert.html @@ -134,7 +134,7 @@ -