From e9f7013f430c0d2428e2a95bf20096c7c19a6831 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Sun, 24 Mar 2024 00:03:30 -0400
Subject: [PATCH] retry for safety

---
 .github/workflows/ci-examples.yml                |  4 ++--
 .github/workflows/ci-judi.yml                    |  4 ++--
 .github/workflows/ci-op.yml                      |  4 ++--
 .github/workflows/deploy_doc.yaml                |  2 +-
 .github/workflows/docker-publish.yml             |  2 +-
 .github/workflows/flake8.yml                     |  2 +-
 src/TimeModeling/Modeling/misfit_fg.jl           |  6 ++++--
 .../Modeling/time_modeling_serial.jl             |  9 ++++++---
 src/TimeModeling/Modeling/twri_objective.jl      | 16 +++-------------
 test/test_gradients.jl                           |  8 +++-----
 10 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci-examples.yml b/.github/workflows/ci-examples.yml
index 42e72a950..dbd3ce2b8 100644
--- a/.github/workflows/ci-examples.yml
+++ b/.github/workflows/ci-examples.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
       - name: Checkout JUDI
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - id: set-matrix
         run: echo "matrix=$(ls examples/scripts/*.jl examples/machine-learning/*.jl | xargs -n 1 | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
@@ -52,7 +52,7 @@ jobs:
   
     steps:
       - name: Checkout JUDI
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Setup julia
         uses: julia-actions/setup-julia@v1
diff --git a/.github/workflows/ci-judi.yml b/.github/workflows/ci-judi.yml
index 1e97c04f9..65716a9fa 100644
--- a/.github/workflows/ci-judi.yml
+++ b/.github/workflows/ci-judi.yml
@@ -33,10 +33,10 @@ jobs:
 
     steps:
       - name: Checkout JUDI
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Cache julia install
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         env:
           cache-name: cache-julia-pkgs
         with:
diff --git a/.github/workflows/ci-op.yml b/.github/workflows/ci-op.yml
index 2e1ea552f..35797d931 100644
--- a/.github/workflows/ci-op.yml
+++ b/.github/workflows/ci-op.yml
@@ -62,10 +62,10 @@ jobs:
 
     steps:
       - name: Checkout JUDI
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Cache julia install
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         env:
           cache-name: cache-julia-pkgs
         with:
diff --git a/.github/workflows/deploy_doc.yaml b/.github/workflows/deploy_doc.yaml
index 79e92c616..6d82cb509 100644
--- a/.github/workflows/deploy_doc.yaml
+++ b/.github/workflows/deploy_doc.yaml
@@ -19,7 +19,7 @@ jobs:
 
     steps:
       - name: Checkout master
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - uses: julia-actions/setup-julia@latest
       
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 063965c13..029998087 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -25,7 +25,7 @@ jobs:
         run: echo ${{ github.event_name }}
 
       - name: Checkout JUDI
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@v1.0.2
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
index f5005cee3..fa3fdc7ae 100644
--- a/.github/workflows/flake8.yml
+++ b/.github/workflows/flake8.yml
@@ -20,7 +20,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v4
 
     - name: Set up Python 3.9
       uses: actions/setup-python@v4
diff --git a/src/TimeModeling/Modeling/misfit_fg.jl b/src/TimeModeling/Modeling/misfit_fg.jl
index 2c502a8b7..3d9906a95 100644
--- a/src/TimeModeling/Modeling/misfit_fg.jl
+++ b/src/TimeModeling/Modeling/misfit_fg.jl
@@ -6,8 +6,7 @@ Dtypes = Union{<:judiVector, NTuple{N, <:judiVector} where N, Vector{<:judiVecto
 MTypes = Union{<:AbstractModel, NTuple{N, <:AbstractModel} where N, Vector{<:AbstractModel}}
 dmTypes = Union{dmType, NTuple{N, dmType} where N, Vector{dmType}}
 
-
-function multi_src_fg(model_full::AbstractModel, source::Dtypes, dObs::Dtypes, dm, options::JUDIOptions;
+function _multi_src_fg(model_full::AbstractModel, source::Dtypes, dObs::Dtypes, dm, options::JUDIOptions;
                       nlind::Bool=false, lin::Bool=false, misfit::Function=mse, illum::Bool=false,
                       data_precon=nothing, model_precon=LinearAlgebra.I)
     GC.gc(true)
@@ -92,6 +91,9 @@ function multi_src_fg(model_full::AbstractModel, source::Dtypes, dObs::Dtypes, d
     return fval, grad
 end
 
+multi_src_fg = retry(_multi_src_fg)
+
+
 # Find number of experiments
 """
     get_nexp(x)
diff --git a/src/TimeModeling/Modeling/time_modeling_serial.jl b/src/TimeModeling/Modeling/time_modeling_serial.jl
index 109275555..6bf0b73f2 100644
--- a/src/TimeModeling/Modeling/time_modeling_serial.jl
+++ b/src/TimeModeling/Modeling/time_modeling_serial.jl
@@ -5,10 +5,11 @@ GeomOrNot = Union{Geometry, Array, Nothing}
 ArrayOrNot = Union{Array, PyArray, PyObject, Nothing}
 PhysOrNot = Union{PhysicalParameter, Array, Nothing}
 
+
 # Setup time-domain linear or nonlinear foward and adjoint modeling and interface to devito
-function time_modeling(model_full::AbstractModel, srcGeometry::GeomOrNot, srcData::ArrayOrNot,
-                       recGeometry::GeomOrNot, recData::ArrayOrNot, dm::PhysOrNot,
-                       op::Symbol, options::JUDIOptions, fw::Bool, illum::Bool)
+function _time_modeling(model_full::AbstractModel, srcGeometry::GeomOrNot, srcData::ArrayOrNot,
+                        recGeometry::GeomOrNot, recData::ArrayOrNot, dm::PhysOrNot,
+                        op::Symbol, options::JUDIOptions, fw::Bool, illum::Bool)
     GC.gc(true)
     devito.clear_cache()
 
@@ -91,3 +92,5 @@ function save_to_disk(shot::judiVector{T}, srcGeometry::GeometryIC{T}, srcData::
     end
     return dout
 end
+
+time_modeling = retry(_time_modeling)
\ No newline at end of file
diff --git a/src/TimeModeling/Modeling/twri_objective.jl b/src/TimeModeling/Modeling/twri_objective.jl
index 1d0f339be..d92598659 100644
--- a/src/TimeModeling/Modeling/twri_objective.jl
+++ b/src/TimeModeling/Modeling/twri_objective.jl
@@ -53,18 +53,7 @@ end
 
 subsample(opt::TWRIOptions, srcnum::Int) = getindex(opt, srcnum)
 
-"""
-    twri_objective(model, source, dobs; options=Options(), optionswri=TWRIOptions())
-
-Evaluate the time domain Wavefield reconstruction inversion objective function. Returns a tuple with function value and
-gradient(s) w.r.t to m and/or y. `model` is a `Model` structure with the current velocity model and `source` and `dobs` are the wavelets and 
-observed data of type `judiVector`.
-
-Example
-=======
-    function_value, gradient_m, gradient_y = twri_objective(model, source, dobs; options=Options(), optionswri=TWRIOptions())
-"""
-function twri_objective(model_full::AbstractModel, source::judiVector, dObs::judiVector, y::Union{judiVector, Nothing},
+function _twri_objective(model_full::AbstractModel, source::judiVector, dObs::judiVector, y::Union{judiVector, Nothing},
                         options::JUDIOptions, optionswri::TWRIOptions)
     # Load full geometry for out-of-core geometry containers
     dObs.geometry = Geometry(dObs.geometry)
@@ -133,6 +122,7 @@ filter_out(obj, m, ::Nothing) = obj, m
 filter_out(obj, ::Nothing, y) = obj, y
 filter_out(obj, m, y) = obj, m, y
 
+twri = retry(_twri_objective)
 
 # Parallel
 """
@@ -152,7 +142,7 @@ function twri_objective(model::AbstractModel, source::judiVector, dObs::judiVect
     else
         arg_func = j -> (model, source[j], dObs[j], y[j], options[j], optionswri[j])
     end
-    results = run_and_reduce(twri_objective, pool, source.nsrc, arg_func)
+    results = run_and_reduce(twri, pool, source.nsrc, arg_func)
     # Collect and reduce gradients
     out = as_vec(results, Val(options.return_array))
     return out
diff --git a/test/test_gradients.jl b/test/test_gradients.jl
index 9fa2bf490..90b1e6590 100644
--- a/test/test_gradients.jl
+++ b/test/test_gradients.jl
@@ -101,15 +101,13 @@ end
 # Test if lsrtm_objective produces the same value/gradient as is done by the correct algebra
 @testset "LSRTM gradient linear algebra test with $(nlayer) layers, tti $(tti), viscoacoustic $(viscoacoustic), freesurface $(fs)" begin
 	# Draw a random case to avoid long CI.
-	dft, optchk = rand([true, false], 2)
 	ic = rand(["isic", "fwi", "as"])
-	optchk = optchk && !dft
-    @timeit TIMEROUTPUT "LSRTM validity (IC=$(ic), checkpointing=$(optchk), dft=$(dft))" begin
+	printstyled("LSRTM validity with dft (IC=$(ic), checkpointing=$(optchk))\n", color=:blue)
+    @timeit TIMEROUTPUT "LSRTM validity with dft (IC=$(ic), checkpointing=$(optchk))" begin
 		ftol = fs ? 1f-3 : 5f-4
-		freq = dft ? [[2.5, 4.5],[3.5, 5.5],[10.0, 15.0], [30.0, 32.0]] : []
+		freq = [[2.5, 4.5],[3.5, 5.5],[10.0, 15.0], [30.0, 32.0]]
 		J.options.free_surface = fs
 		J.options.IC = ic
-		J.options.optimal_checkpointing = optchk
 		J.options.frequencies = freq
 
 		d_res = dobs0 + J*dm1 - dobs