From 5ff23506769b787f2d642d20b75b1b9964469672 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dieter=20Werthm=C3=BCller?= <mail@werthmuller.org>
Date: Fri, 21 Jun 2024 15:04:07 +0200
Subject: [PATCH] pyGIMLi(emg3d) working

---
 docs/api/index.rst                            |   2 +-
 docs/api/inversion/index.rst                  |   8 +
 .../{inversion.rst => inversion/pygimli.rst}  |   4 +-
 docs/api/utils.rst                            |   1 +
 emg3d/_multiprocessing.py                     |   5 +
 emg3d/inversion/pygimli.py                    | 316 ++++++++----------
 tests/test_time.py                            |   2 +-
 7 files changed, 161 insertions(+), 177 deletions(-)
 create mode 100644 docs/api/inversion/index.rst
 rename docs/api/{inversion.rst => inversion/pygimli.rst} (73%)

diff --git a/docs/api/index.rst b/docs/api/index.rst
index 5fbd6284..adb52fe1 100644
--- a/docs/api/index.rst
+++ b/docs/api/index.rst
@@ -27,7 +27,7 @@ API reference
    surveys
    time
    utils
-   inversion
+   inversion/index
 
 
 .. grid:: 1
diff --git a/docs/api/inversion/index.rst b/docs/api/inversion/index.rst
new file mode 100644
index 00000000..2d34ce4f
--- /dev/null
+++ b/docs/api/inversion/index.rst
@@ -0,0 +1,8 @@
+Inversion
+#########
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   pygimli
diff --git a/docs/api/inversion.rst b/docs/api/inversion/pygimli.rst
similarity index 73%
rename from docs/api/inversion.rst
rename to docs/api/inversion/pygimli.rst
index 7b6271e5..937341ff 100644
--- a/docs/api/inversion.rst
+++ b/docs/api/inversion/pygimli.rst
@@ -1,5 +1,5 @@
-Inversion
-=========
+pyGIMLi(emg3d)
+==============
 
 .. automodapi:: emg3d.inversion.pygimli
    :no-inheritance-diagram:
diff --git a/docs/api/utils.rst b/docs/api/utils.rst
index a83546ef..994864e2 100644
--- a/docs/api/utils.rst
+++ b/docs/api/utils.rst
@@ -4,3 +4,4 @@ Utils
 .. automodapi:: emg3d.utils
    :no-inheritance-diagram:
    :no-heading:
+   :skip: EMArray
diff --git a/emg3d/_multiprocessing.py b/emg3d/_multiprocessing.py
index 1fb095e3..f6eb5aa8 100644
--- a/emg3d/_multiprocessing.py
+++ b/emg3d/_multiprocessing.py
@@ -43,6 +43,7 @@ def process_map(fn, *iterables, max_workers, **kwargs):
     execution.
 
     """
+    process_map.count += 1
 
     # Parallel
     if max_workers > 1 and tqdm is None:
@@ -64,6 +65,10 @@ def process_map(fn, *iterables, max_workers, **kwargs):
             iterable=map(fn, *iterables), total=len(iterables[0]), **kwargs))
 
 
+# Counter for processing map (used, e.g., for inversions).
+process_map.count = 0
+
+
 def solve(inp):
     """Thin wrapper of `solve` or `solve_source` for a `process_map`.
 
diff --git a/emg3d/inversion/pygimli.py b/emg3d/inversion/pygimli.py
index c77b2479..d845184c 100644
--- a/emg3d/inversion/pygimli.py
+++ b/emg3d/inversion/pygimli.py
@@ -20,9 +20,9 @@
 except ImportError:
     pygimli = None
 
-from emg3d import models, utils
+from emg3d import utils, _multiprocessing
 
-__all__ = ['Kernel', 'Inversion', 'Jacobian']
+__all__ = ['Kernel', 'Inversion']
 
 # Add pygimli and pgcore to the emg3d.Report().
 utils.OPTIONAL.extend(['pygimli', 'pgcore'])
@@ -31,22 +31,10 @@
 def __dir__():
     return __all__
 
-# TODO: Create functions
-# - convert_model(from/to)
-# - convert_data(from/to)
-
 
 @utils._requires('pygimli')
-class Jacobian(pygimli.Matrix):
-    """Create a Jacobian operator for emg3d which is understood by pyGIMLi.
-
-    This never builds the actual Jacobian, but provides functions to compute
-    the
-
-    - Jacobian times a model vector ``Jm``
-      (``jvec`` in emg3d, ``mult`` in pyGIMLi), and the
-    - Jacobian transposed times a data vector ``Jᵀd``
-      (``jtvec`` in emg3d, ``transMult`` in pyGIMLi).
+class Kernel(pygimli.Modelling):
+    """Create a forward operator of emg3d to use within a pyGIMLi inversion.
 
 
     Parameters
@@ -54,114 +42,37 @@ class Jacobian(pygimli.Matrix):
     simulation : Simulation
         The simulation; a :class:`emg3d.simulations.Simulation` instance.
 
-    mesh : Mesh
-        The mesh; :func:`pygimli.meshtools.grid.createGrid` instance.
-
-    """
-
-    def __init__(self, simulation, mesh):
-        """Initiate a new Jacobian instance."""
-        super().__init__()
-
-        # Store pointers to the emg3d-simulation and the pyGIMLi-mesh.
-        self.simulation = simulation
-        self.mesh = mesh
-
-        # Store n-cols and n-rows.
-        self._cols = simulation.model.size
-        self._rows = simulation.survey.count * 2
-
-    def cols(self):
-        """The number of columns corresponds to the model size."""
-        return self._cols
-
-    def rows(self):
-        """The number of rows corresponds to 2x(data size), for [Re; Im]."""
-        return self._rows
-
-    def mult(self, x):
-        """Multiply the Jacobian with a vector, Jm."""
-
-        self.simulation._count_jvec += 1
-
-        # Resort x to represent the model.
-        model = x[self.mesh().cellMarkers()]
-
-        # Compute jvec.
-        jvec = self.simulation.jvec(
-            vector=np.reshape(model, self.simulation.model.shape, order='F')
-        )
-
-        # Get non-NaN data.
-        data = jvec[self.simulation.survey.isfinite]
-
-        # Return real and imaginary parts stacked.
-        return np.hstack((data.real, data.imag))
-
-    def transMult(self, x):
-        """Multiply  Jacobian transposed with a vector, Jᵀd = (dJᵀ)ᵀ."""
-        self.simulation._count_jtvec += 1
-
-        # Cast finite [Re, Im] data from pyGIMLi into the emg3d format.
-        data = np.ones(
-                self.simulation.survey.shape,
-                dtype=self.simulation.data.observed.dtype
-        )*np.nan
-        x = np.asarray(x)
-        xl = x.size//2
-        data[self.simulation.survey.isfinite] = x[:xl] + 1j*x[xl:]
-
-        # Compute jtvec.
-        jtvec = self.simulation.jtvec(data).ravel('F')
-
-        # Resort jtvec according to regions.
-        out = np.empty(jtvec.size)
-        out[self.mesh().cellMarkers()] = jtvec
-        return out
-
-    def save(self, *args):
-        """There is no save for this pseudo-Jacobian."""
-        pass
+    markers : ndarray of ints, default: None
+        An ndarray of ints of the same shapes as the model. All cells with the
+        same number belong to the same region with this number, which can
+        subsequently be defined through
+        :func:`pygimli.frameworks.modelling.Modelling.setRegionProperties`.
 
-
-@utils._requires('pygimli')
-class Kernel(pygimli.Modelling):
-    """Create a forward operator to use emg3d within a pyGIMLi inversion.
-
-
-    Parameters
-    ----------
-    simulation : Simulation
-        The emg3d simulation a :class:`emg3d.simulations.Simulation` instance.
+    pgthreads : int, default: 2
+        Number of threads for pyGIMLi (sets ``OPENBLAS_NUM_THREADS``). This is
+        by default a small number, as the important parallelization in
+        pyGIMLi(emg3d) happens over sources and frequencies in emg3d. This is
+        controlled in the parameter ``max_workers`` when creating the
+        simulation.
 
     """
 
     def __init__(self, simulation, markers=None, pgthreads=2):
-        """Initialize the pyGIMLi(emg3d)-wrapper."""
+        """Initialize a pyGIMLi(emg3d)-wrapper."""
+        super().__init__()
 
+        # Set pyGIMLi threads.
         pygimli.setThreadCount(pgthreads)
 
-        # TODO move this to the Simulation class!
-        simulation._count_forward = 0
-        simulation._count_jvec = 0
-        simulation._count_jtvec = 0
-
-        # Check current limitation 1: isotropic.
-        mcase = simulation.model.case
-        if mcase != 'isotropic':
-            raise NotImplementedError(
-                f"pyGIMLi(emg3d) not implemented for {mcase} case."
-            )
-
-        # Check current limitation 2: conductivity.
-        mname = simulation.model.map.name
-        if mname != 'Conductivity':
-            raise NotImplementedError(
-                f"pyGIMLi(emg3d) not implemented for {mname} mapping."
-            )
-
-        # Initiate first pygimli.Modelling, which will do its magic.
-        super().__init__()
+        # Check current limitations.
+        checks = {
+            'case': (simulation.model.case, 'isotropic'),
+            'mapping': (simulation.model.map.name, 'Conductivity'),
+        }
+        for k, v in checks.items():
+            if v[0] != v[1]:
+                msg = f"pyGIMLi(emg3d) is not implemented for {v[0]} {k}."
+                raise NotImplementedError(msg)
 
         # Store the simulation.
         self.simulation = simulation
@@ -173,13 +84,19 @@ def __init__(self, simulation, markers=None, pgthreads=2):
             z=simulation.model.grid.nodes_z,
         )
 
+        # Set mesh and, if provided, markers.
         if markers is not None:
-            mesh.setCellMarkers(markers)
-
+            mesh.setCellMarkers(markers.ravel('F'))
         self.setMesh(mesh)
 
-        # Store J and set it
-        self.J = Jacobian(self.simulation, self.mesh)
+        # Create J, store and set it.
+        self.J = self.Jacobian(
+            simulation=self.simulation,
+            data2pygimli=self.data2pygimli,
+            data2emg3d=self.data2emg3d,
+            model2pygimli=self.model2pygimli,
+            model2emg3d=self.model2emg3d,
+        )
         self.setJacobian(self.J)
 
     def response(self, model):
@@ -189,93 +106,121 @@ def response(self, model):
         self.simulation.clean('computed')
 
         # Replace model
-        self.simulation.model = models.Model(
-            grid=self.simulation.model.grid,
-            # Resort inversion model array to represent the actual model.
-            property_x=model[self.mesh().cellMarkers()],
-            mapping='Conductivity'
-        )
+        self.simulation.model.property_x = self.model2emg3d(model)
 
         # Compute forward model and set initial residuals.
-        self.simulation._count_forward += 1
         _ = self.simulation.misfit
 
-        # Return the responses
-        data = self.simulation.data.synthetic.data[
-                self.simulation.survey.isfinite
-        ]
-        return np.hstack((data.real, data.imag))
+        # Return the responses as pyGIMLi array
+        return self.data2pygimli(self.simulation.data.synthetic.data)
 
     def createStartModel(self, dataVals=None):
         """Returns the model from the provided simulation."""
-        return self.simulation.model.property_x.ravel('F')
+        return self.model2pygimli(self.simulation.model.property_x)
 
     def createJacobian(self, model):
-        """Dummy to prevent pygimli.Modelling from doing it the hard way."""
+        """Dummy to prevent pyGIMLi from doing it the hard way."""
         pass  # do nothing
 
+    def data2pygimli(self, data):
+        """Convert an emg3d data-xarray to a pyGIMLi data array."""
+        out = data[self.simulation.survey.isfinite]
+        if np.iscomplexobj(out):
+            return np.hstack((out.real, out.imag))
+        else:  # For standard deviation
+            return np.hstack((out, out))
+
+    def data2emg3d(self, data):
+        """Convert a pyGIMLi data array to an emg3d data-xarray."""
+        out = np.ones(
+                self.simulation.survey.shape,
+                dtype=self.simulation.data.observed.dtype
+        )*np.nan
+        data = np.asarray(data)
+        ind = data.size//2
+        out[self.simulation.survey.isfinite] = data[:ind] + 1j*data[ind:]
+        return out
 
-def _post_step(n, inv):
-    """TODO"""
+    def model2pygimli(self, model):
+        """Convert an emg3d Model property to a pyGIMLi model array.
 
-    # TODO: save data, model, and everything to re-start inversion.
+        This function deals with the regions defined in pyGIMLi.
+        """
+        out = np.empty(model.size)
+        out[self.mesh().cellMarkers()] = model.ravel('F')
+        return out
 
-    # inv.chi2History
-    # inv.modelHistory
+    def model2emg3d(self, model):
+        """Convert a pyGIMLi model array to an emg3d Model property.
 
-    sim = inv.fop.simulation
+        This function deals with the regions defined in pyGIMLi.
+        """
+        out = np.asarray(model[self.mesh().cellMarkers()])
+        return out.reshape(self.simulation.model.shape, order='F')
 
-    kc = sim._count_forward + sim._count_jvec + sim._count_jtvec
-    sim.survey.data[f"it{n}"] = sim.survey.data.synthetic
-    cglsit = max(0, sim._count_jvec-1)
-    phi = inv.inv.getPhi()
-    if not hasattr(inv, 'lastphi'):
-        lastphi = ""
-    else:
-        lastphi = f"; Δϕ = {(1-phi/inv.lastphi)*100:.2f}%"
-    inv.lastphi = phi
-    pygimli.info(
-        f"{n}: "
-        f"χ² = {inv.inv.chi2():7.2f}; "
-        f"λ = {inv.inv.getLambda()}; "
-        f"#CGLS {cglsit:2d} ({kc:2d} solves); "
-        f"ϕ = {inv.inv.getPhiD():.2f} + {inv.inv.getPhiM():.2f}·λ = "
-        f"{phi:.2f}{lastphi}"
-    )
+    class Jacobian(pygimli.Matrix):
+        """Return Jacobian operator for pyGIMLi(emg3d)."""
+
+        def __init__(self, simulation,
+                     data2pygimli, data2emg3d, model2pygimli, model2emg3d):
+            """Initiate a new Jacobian instance."""
+            super().__init__()
+            self.simulation = simulation
+            self.data2pygimli = data2pygimli
+            self.data2emg3d = data2emg3d
+            self.model2pygimli = model2pygimli
+            self.model2emg3d = model2emg3d
 
-    # Reset counters
-    sim._count_forward = 0
-    sim._count_jvec = 0
-    sim._count_jtvec = 0
+        def cols(self):
+            """The number of columns corresponds to the model size."""
+            return self.simulation.model.size
+
+        def rows(self):
+            """The number of rows corresponds to 2x data-size (Re; Im)."""
+            return self.simulation.survey.count * 2
+
+        def mult(self, x):
+            """Multiply the Jacobian with a vector, Jm."""
+            jvec = self.simulation.jvec(vector=self.model2emg3d(x))
+            return self.data2pygimli(jvec)
+
+        def transMult(self, x):
+            """Multiply  Jacobian transposed with a vector, Jᵀd = (dJᵀ)ᵀ."""
+            jtvec = self.simulation.jtvec(self.data2emg3d(x))
+            return self.model2pygimli(jtvec)
+
+        def save(self, *args):
+            """There is no save for this pseudo-Jacobian."""
+            pass
 
 
 @utils._requires('pygimli')
 class Inversion(pygimli.Inversion):
-    """TODO"""
+    """Thin wrapper, adding verbosity and taking care of data format."""
+
     def __init__(self, fop=None, inv=None, **kwargs):
+        """Initialize an Inversion instance."""
         super().__init__(fop=fop, inv=inv, **kwargs)
         self._postStep = _post_step
 
     def run(self, dataVals=None, errorVals=None, **kwargs):
+        """Run the inversion."""
+
+        # Reset counter, start timer, print message.
+        _multiprocessing.process_map.count = 0
         timer = utils.Timer()
         pygimli.info(":: pyGIMLi(emg3d) START ::")
 
         # Take data from the survey if not provided.
         if dataVals is None:
-            finite_data = self.fop.simulation.survey.finite_data()
-            dataVals = np.hstack([finite_data.real, finite_data.imag])
+            dataVals = self.fop.data2pygimli(
+                    self.fop.simulation.data.observed.data)
 
         # Take the error from the survey if not provided.
         if errorVals is None:
-            # TODO - IS THIS CORRECT?
-            std_dev_full = self.fop.simulation.survey.standard_deviation
-            std_dev = std_dev_full.data[self.fop.simulation.survey.isfinite]
-            errorVals = np.hstack([std_dev, std_dev]) / abs(dataVals)
-
-            # TODO does it make any difference, is it needed?
-            # To completely ignore big errors
-            # => Test if it is actually necessary or not
-            errorVals[errorVals > 0.5] = 1e8
+            std_dev = self.fop.data2pygimli(
+                    self.fop.simulation.survey.standard_deviation.data)
+            errorVals = std_dev / abs(dataVals)
 
         # Run the inversion
         out = super().run(dataVals=dataVals, errorVals=errorVals, **kwargs)
@@ -284,3 +229,28 @@ def run(self, dataVals=None, errorVals=None, **kwargs):
         pygimli.info(f":: pyGIMLi(emg3d) END   :: runtime = {timer.runtime}")
 
         return out
+
+
+def _post_step(n, inv):
+    """Print some values for each iteration."""
+
+    # Print info
+    sim = inv.fop.simulation
+    sim.survey.data[f"it{n}"] = sim.survey.data.synthetic
+    phi = inv.inv.getPhi()
+    if not hasattr(inv, 'lastphi'):
+        lastphi = ""
+    else:
+        lastphi = f"; Δϕ = {(1-phi/inv.lastphi)*100:.2f}%"
+    inv.lastphi = phi
+    pygimli.info(
+        f"{n}: "
+        f"χ² = {inv.inv.chi2():7.2f}; "
+        f"λ = {inv.inv.getLambda()}; "
+        f"{_multiprocessing.process_map.count:2d} kernel calls; "
+        f"ϕ = {inv.inv.getPhiD():.2f} + {inv.inv.getPhiM():.2f}·λ = "
+        f"{phi:.2f}{lastphi}"
+    )
+
+    # Reset counter
+    _multiprocessing.process_map.count = 0
diff --git a/tests/test_time.py b/tests/test_time.py
index c5e382e0..36a50302 100644
--- a/tests/test_time.py
+++ b/tests/test_time.py
@@ -28,7 +28,7 @@ def test_defaults(self, capsys):
         assert Fourier.signal == 0        # Impulse respons
         assert_allclose(times, Fourier.time, 0, 0)
         assert Fourier.verb == 3          # Verbose by default
-        assert 'key' in out
+        assert 'ey 201' in out
         assert 'Req. freq' in out
         assert 'Calc. freq' in out
         assert Fourier.freq_compute.min() >= fmin