From 868edad13188fd9fd91e3d5aa11fa0ae2d2ef310 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dieter=20Werthm=C3=BCller?= <mail@werthmuller.org>
Date: Tue, 23 Jan 2024 21:33:15 +0100
Subject: [PATCH] pyGIMLi(emg3d)

---
 docs/api/index.rst             |   1 +
 docs/api/inversion/index.rst   |   8 +
 docs/api/inversion/pygimli.rst |   6 +
 docs/conf.py                   |   1 +
 docs/manual/installation.rst   |   3 +-
 emg3d/_multiprocessing.py      |   5 +
 emg3d/inversion/__init__.py    |  15 ++
 emg3d/inversion/pygimli.py     | 315 +++++++++++++++++++++++++++++++++
 emg3d/meshes.py                |  23 +++
 emg3d/surveys.py               |  11 ++
 emg3d/utils.py                 |  13 +-
 requirements-dev.txt           |   1 +
 setup.py                       |   3 +-
 tests/test_meshes.py           |   8 +
 tests/test_time.py             |   2 +-
 15 files changed, 410 insertions(+), 5 deletions(-)
 create mode 100644 docs/api/inversion/index.rst
 create mode 100644 docs/api/inversion/pygimli.rst
 create mode 100644 emg3d/inversion/__init__.py
 create mode 100644 emg3d/inversion/pygimli.py

diff --git a/docs/api/index.rst b/docs/api/index.rst
index 4c03f995..adb52fe1 100644
--- a/docs/api/index.rst
+++ b/docs/api/index.rst
@@ -27,6 +27,7 @@ API reference
    surveys
    time
    utils
+   inversion/index
 
 
 .. grid:: 1
diff --git a/docs/api/inversion/index.rst b/docs/api/inversion/index.rst
new file mode 100644
index 00000000..2d34ce4f
--- /dev/null
+++ b/docs/api/inversion/index.rst
@@ -0,0 +1,8 @@
+Inversion
+#########
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   pygimli
diff --git a/docs/api/inversion/pygimli.rst b/docs/api/inversion/pygimli.rst
new file mode 100644
index 00000000..937341ff
--- /dev/null
+++ b/docs/api/inversion/pygimli.rst
@@ -0,0 +1,6 @@
+pyGIMLi(emg3d)
+==============
+
+.. automodapi:: emg3d.inversion.pygimli
+   :no-inheritance-diagram:
+   :no-heading:
diff --git a/docs/conf.py b/docs/conf.py
index 8c4c3a74..8252e5dd 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -39,6 +39,7 @@
     "empymod": ("https://empymod.emsig.xyz/en/stable", None),
     "xarray": ("https://docs.xarray.dev/en/stable", None),
     "numba": ("https://numba.readthedocs.io/en/stable", None),
+    "pygimli": ("https://www.pygimli.org", None),
 }
 
 # ==== 2. General Settings ====
diff --git a/docs/manual/installation.rst b/docs/manual/installation.rst
index b620bf82..1cd18794 100644
--- a/docs/manual/installation.rst
+++ b/docs/manual/installation.rst
@@ -25,13 +25,14 @@ namely:
 - ``matplotlib``: To use the plotting utilities within ``discretize``.
 - ``h5py``: Save and load data in the HDF5 format.
 - ``tqdm``: For nice progress bars when computing many sources and frequencies.
+- ``pygimli``: To run inversions using ``pygimli``, pyGIMLi(emg3d).
 
 All soft dependencies are also available both on ``conda-forge`` and ``pip``.
 To get therefore the complete experience use one of the following options:
 
 .. code-block:: console
 
-   conda install -c conda-forge emg3d discretize xarray matplotlib h5py tqdm
+   conda install -c conda-forge emg3d discretize xarray matplotlib h5py tqdm pygimli
 
 or via ``pip``:
 
diff --git a/emg3d/_multiprocessing.py b/emg3d/_multiprocessing.py
index 1fb095e3..f6eb5aa8 100644
--- a/emg3d/_multiprocessing.py
+++ b/emg3d/_multiprocessing.py
@@ -43,6 +43,7 @@ def process_map(fn, *iterables, max_workers, **kwargs):
     execution.
 
     """
+    process_map.count += 1
 
     # Parallel
     if max_workers > 1 and tqdm is None:
@@ -64,6 +65,10 @@ def process_map(fn, *iterables, max_workers, **kwargs):
             iterable=map(fn, *iterables), total=len(iterables[0]), **kwargs))
 
 
+# Counter for processing map (used, e.g., for inversions).
+process_map.count = 0
+
+
 def solve(inp):
     """Thin wrapper of `solve` or `solve_source` for a `process_map`.
 
diff --git a/emg3d/inversion/__init__.py b/emg3d/inversion/__init__.py
new file mode 100644
index 00000000..064daa57
--- /dev/null
+++ b/emg3d/inversion/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2024 The emsig community.
+#
+# This file is part of emg3d.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy
+# of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
diff --git a/emg3d/inversion/pygimli.py b/emg3d/inversion/pygimli.py
new file mode 100644
index 00000000..c4f481fb
--- /dev/null
+++ b/emg3d/inversion/pygimli.py
@@ -0,0 +1,315 @@
+# Copyright 2024 The emsig community.
+#
+# This file is part of emg3d.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy
+# of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+import numpy as np
+
+try:
+    import pygimli
+except ImportError:
+    pygimli = None
+
+from emg3d import utils, _multiprocessing
+
+__all__ = ['Kernel', 'Inversion']
+
+# Add pygimli and pgcore to the emg3d.Report().
+utils.OPTIONAL.extend(['pygimli', 'pgcore'])
+
+
+def __dir__():
+    return __all__
+
+
+class Kernel(pygimli.Modelling if pygimli else object):
+    """Create a forward operator of emg3d to use within a pyGIMLi inversion.
+
+
+    Parameters
+    ----------
+    simulation : Simulation
+        The simulation; a :class:`emg3d.simulations.Simulation` instance.
+
+    markers : ndarray of ints, default: None
+        An ndarray of ints of the same shapes as the model. All cells with the
+        same number belong to the same region with this number, which can
+        subsequently be defined through
+        :func:`pygimli.frameworks.modelling.Modelling.setRegionProperties`.
+
+    pgthreads : int, default: 2
+        Number of threads for pyGIMLi (sets ``OPENBLAS_NUM_THREADS``). This is
+        by default a small number, as the important parallelization in
+        pyGIMLi(emg3d) happens over sources and frequencies in emg3d. This is
+        controlled in the parameter ``max_workers`` when creating the
+        simulation.
+
+    """
+
+    @utils._requires('pygimli')
+    def __init__(self, simulation, markers=None, pgthreads=2):
+        """Initialize a pyGIMLi(emg3d)-wrapper."""
+        super().__init__()
+
+        # Set pyGIMLi threads.
+        pygimli.setThreadCount(pgthreads)
+
+        # Check current limitations.
+        checks = {
+            'case': (simulation.model.case, 'isotropic'),
+            'mapping': (simulation.model.map.name, 'Conductivity'),
+        }
+        for k, v in checks.items():
+            if v[0] != v[1]:
+                msg = f"pyGIMLi(emg3d) is not implemented for {v[0]} {k}."
+                raise NotImplementedError(msg)
+
+        # Store the simulation.
+        self.simulation = simulation
+
+        # Translate discretize TensorMesh to pygimli-Grid.
+        mesh = pygimli.createGrid(
+            x=simulation.model.grid.nodes_x,
+            y=simulation.model.grid.nodes_y,
+            z=simulation.model.grid.nodes_z,
+        )
+
+        # Set markers.
+        if markers is not None:
+            mesh.setCellMarkers(markers.ravel('F'))
+            self.markers = markers
+        else:
+            self.markes = np.zeros(simulation.model.size, dtype=int)
+        # Store original props; required if a region is set to ``background``.
+        self._model = simulation.model.property_x.copy()
+        # Store volumes; required if a region is set to ``single``.
+        self._volumes = simulation.model.grid.cell_volumes.reshape(
+                self._model.shape, order='F')
+        # Set mesh.
+        self.setMesh(mesh)
+
+        # Create J, store and set it.
+        self.J = self.Jacobian(
+            simulation=self.simulation,
+            data2gimli=self.data2gimli,
+            data2emg3d=self.data2emg3d,
+            model2gimli=self.model2gimli,
+            model2emg3d=self.model2emg3d,
+        )
+        self.setJacobian(self.J)
+
+    def response(self, model):
+        """Create synthetic data for provided model."""
+
+        # Clean emg3d-simulation, so things are recomputed
+        self.simulation.clean('computed')
+
+        # Replace model
+        self.simulation.model.property_x = self.model2emg3d(model)
+
+        # Compute forward model and set initial residuals.
+        _ = self.simulation.misfit
+
+        # Return the responses as pyGIMLi array
+        return self.data2gimli(self.simulation.data.synthetic.data)
+
+    def createStartModel(self, dataVals=None):
+        """Returns the model from the provided simulation."""
+        return self.model2gimli(self.simulation.model.property_x)
+
+    def createJacobian(self, model):
+        """Dummy to prevent pyGIMLi from doing it the hard way."""
+
+    def data2gimli(self, data):
+        """Convert an emg3d data-xarray to a pyGIMLi data array."""
+        out = data[self.simulation.survey.isfinite]
+        if np.iscomplexobj(out):
+            return np.hstack((out.real, out.imag))
+        else:  # For standard deviation
+            return np.hstack((out, out))
+
+    def data2emg3d(self, data):
+        """Convert a pyGIMLi data array to an emg3d data-xarray."""
+        out = np.ones(
+                self.simulation.survey.shape,
+                dtype=self.simulation.data.observed.dtype
+        )*np.nan
+        data = np.asarray(data)
+        ind = data.size//2
+        out[self.simulation.survey.isfinite] = data[:ind] + 1j*data[ind:]
+        return out
+
+    def model2gimli(self, model):
+        """Convert an emg3d Model property to a pyGIMLi model array.
+
+        This function deals with the regions defined in pyGIMLi.
+        """
+
+        # If the inversion model is smaller than the model, we have to
+        # take care of the regions.
+        if len(model) != self.simulation.model.size:
+
+            out = np.empty(self.simulation.model.size)
+            i = 0
+
+            for n, v in self.regionProperties().items():
+                ni = self.markers == n
+                if v['background'] or v['fix']:
+                    ii = 0
+                elif v['single']:
+                    ii = 1
+                    out[i] = np.average(model[ni], weights=self._volumes[ni])
+                else:
+                    ii = np.sum(ni)
+                    out[i:i+ii] = model[ni]
+                i += ii
+
+            out = out[:i]
+
+        else:
+            out = np.empty(model.size)
+            out[self.mesh().cellMarkers()] = model.ravel('F')
+
+        return out
+
+    def model2emg3d(self, model):
+        """Convert a pyGIMLi model array to an emg3d Model property.
+
+        This function deals with the regions defined in pyGIMLi.
+        """
+
+        # If the inversion model is smaller than the model, we have to
+        # take care of the regions.
+        if len(model) != self.simulation.model.size:
+
+            out = np.empty(self.simulation.model.shape)
+            i = 0
+
+            for n, v in self.regionProperties().items():
+                ni = self.markers == n
+                if v['background']:
+                    ii = 0
+                    out[ni] = self._model[ni]
+                elif v['fix']:
+                    ii = 0
+                    out[ni] = v['startModel']
+                elif v['single']:
+                    ii = 1
+                    out[ni] = model[i]
+                else:
+                    ii = np.sum(ni)
+                    out[ni] = model[i:ii+i]
+                i += ii
+
+        else:
+            out = np.asarray(model[self.mesh().cellMarkers()]).reshape(
+                    self.simulation.model.shape, order='F')
+
+        return out
+
+    class Jacobian(pygimli.Matrix if pygimli else object):
+        """Return Jacobian operator for pyGIMLi(emg3d)."""
+
+        def __init__(self, simulation,
+                     data2gimli, data2emg3d, model2gimli, model2emg3d):
+            """Initiate a new Jacobian instance."""
+            super().__init__()
+            self.simulation = simulation
+            self.data2gimli = data2gimli
+            self.data2emg3d = data2emg3d
+            self.model2gimli = model2gimli
+            self.model2emg3d = model2emg3d
+
+        def cols(self):
+            """The number of columns corresponds to the model size."""
+            return self.simulation.model.size
+
+        def rows(self):
+            """The number of rows corresponds to 2x data-size (Re; Im)."""
+            return self.simulation.survey.count * 2
+
+        def mult(self, x):
+            """Multiply the Jacobian with a vector, Jm."""
+            jvec = self.simulation.jvec(vector=self.model2emg3d(x))
+            return self.data2gimli(jvec)
+
+        def transMult(self, x):
+            """Multiply  Jacobian transposed with a vector, Jᵀd = (dJᵀ)ᵀ."""
+            jtvec = self.simulation.jtvec(self.data2emg3d(x))
+            return self.model2gimli(jtvec)
+
+        def save(self, *args):
+            """There is no save for this pseudo-Jacobian."""
+
+
+@utils._requires('pygimli')
+class Inversion(pygimli.Inversion if pygimli else object):
+    """Thin wrapper, adding verbosity and taking care of data format."""
+
+    @utils._requires('pygimli')
+    def __init__(self, fop=None, inv=None, **kwargs):
+        """Initialize an Inversion instance."""
+        super().__init__(fop=fop, inv=inv, **kwargs)
+        self._postStep = _post_step
+
+    def run(self, dataVals=None, errorVals=None, **kwargs):
+        """Run the inversion."""
+
+        # Reset counter, start timer, print message.
+        _multiprocessing.process_map.count = 0
+        timer = utils.Timer()
+        pygimli.info(":: pyGIMLi(emg3d) START ::")
+
+        # Take data from the survey if not provided.
+        if dataVals is None:
+            dataVals = self.fop.data2gimli(
+                    self.fop.simulation.data.observed.data)
+
+        # Take the error from the survey if not provided.
+        if errorVals is None:
+            std_dev = self.fop.data2gimli(
+                    self.fop.simulation.survey.standard_deviation.data)
+            errorVals = std_dev / abs(dataVals)
+
+        # Run the inversion
+        out = super().run(dataVals=dataVals, errorVals=errorVals, **kwargs)
+
+        # Print passed time and exit
+        pygimli.info(f":: pyGIMLi(emg3d) END   :: runtime = {timer.runtime}")
+
+        return out
+
+
+def _post_step(n, inv):
+    """Print some values for each iteration."""
+
+    # Print info
+    sim = inv.fop.simulation
+    sim.survey.data[f"it{n}"] = sim.survey.data.synthetic
+    phi = inv.inv.getPhi()
+    if not hasattr(inv, 'lastphi'):
+        lastphi = ""
+    else:
+        lastphi = f"; Δϕ = {(1-phi/inv.lastphi)*100:.2f}%"
+    inv.lastphi = phi
+    pygimli.info(
+        f"{n}: "
+        f"χ² = {inv.inv.chi2():7.2f}; "
+        f"λ = {inv.inv.getLambda()}; "
+        f"{_multiprocessing.process_map.count:2d} kernel calls; "
+        f"ϕ = {inv.inv.getPhiD():.2f} + {inv.inv.getPhiM():.2f}·λ = "
+        f"{phi:.2f}{lastphi}"
+    )
+
+    # Reset counter
+    _multiprocessing.process_map.count = 0
diff --git a/emg3d/meshes.py b/emg3d/meshes.py
index 66ed12e0..3ac7e0e6 100644
--- a/emg3d/meshes.py
+++ b/emg3d/meshes.py
@@ -180,6 +180,29 @@ def __eq__(self, mesh):
 
         return bool(equal)
 
+    def contains(self, mesh):
+        """Check if input mesh is a subset of this mesh.
+
+        The provided ``mesh`` can be either an emg3d or a discretize
+        TensorMesh instance.
+
+        """
+
+        # Check if mesh is of the same instance.
+        equal = mesh.__class__.__name__ == self.__class__.__name__
+
+        # Check dimensions.
+        if equal:
+            equal *= len(mesh.shape_cells) == len(self.shape_cells)
+
+        # Check distances and origin.
+        if equal:
+            equal *= np.isin(mesh.nodes_x, self.nodes_x).all()
+            equal *= np.isin(mesh.nodes_y, self.nodes_y).all()
+            equal *= np.isin(mesh.nodes_z, self.nodes_z).all()
+
+        return bool(equal)
+
     def copy(self):
         """Return a copy of the TensorMesh."""
         return self.from_dict(self.to_dict(True))
diff --git a/emg3d/surveys.py b/emg3d/surveys.py
index 1ffcf437..d8a5b59d 100644
--- a/emg3d/surveys.py
+++ b/emg3d/surveys.py
@@ -713,6 +713,17 @@ def _rec_types_coord(self, source):
         indices = self._irec_types
         return [tuple(self._rec_coord[source][ind].T) for ind in indices]
 
+    @property
+    def isfinite(self):
+        """TODO: document!"""
+        if not hasattr(self, '_isfinite'):
+            self._isfinite = np.isfinite(self.data.observed.data)
+        return self._isfinite
+
+    def finite_data(self, data='observed'):
+        """TODO: document!"""
+        return self.data[data].data[self.isfinite]
+
 
 def random_noise(standard_deviation, mean_noise=0.0, ntype='white_noise'):
     r"""Return random noise for given inputs.
diff --git a/emg3d/utils.py b/emg3d/utils.py
index 356707dc..59b734eb 100644
--- a/emg3d/utils.py
+++ b/emg3d/utils.py
@@ -42,6 +42,8 @@
 
 __all__ = ['Report', 'EMArray', 'Timer']
 
+OPTIONAL = ['xarray', 'discretize', 'h5py', 'matplotlib', 'tqdm', 'IPython']
+
 
 def __dir__():
     return __all__
@@ -158,8 +160,7 @@ def __init__(self, add_pckg=None, ncol=3, text_width=80, sort=False):
         core = ['numpy', 'scipy', 'numba', 'emg3d', 'empymod']
 
         # Optional packages.
-        optional = ['xarray', 'discretize', 'h5py', 'matplotlib',
-                    'tqdm', 'IPython']
+        optional = OPTIONAL
 
         super().__init__(additional=add_pckg, core=core, optional=optional,
                          ncol=ncol, text_width=text_width, sort=sort)
@@ -171,6 +172,7 @@ class Timer:
     def __init__(self):
         """Initiate timer with a performance counter."""
         self._t0 = perf_counter()
+        self._previous = 0
 
     def __repr__(self):
         """Simple representation."""
@@ -191,6 +193,13 @@ def runtime(self):
         """Return elapsed time as hh:mm:ss string."""
         return str(timedelta(seconds=np.round(self.elapsed)))
 
+    @property
+    def laptime(self):
+        """Return time of this lap as hh:mm:ss string."""
+        previous = self._previous
+        self._previous = self.elapsed
+        return str(timedelta(seconds=np.round(self._previous-previous)))
+
     @property
     def elapsed(self):
         """Return elapsed time in seconds."""
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 43002fea..f0bd1181 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -7,6 +7,7 @@ h5py
 xarray
 discretize
 matplotlib
+pygimli
 
 # SETUP RELATED
 setuptools_scm
diff --git a/setup.py b/setup.py
index d7a0d3ca..5cba52c5 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
     author_email="info@emsig.xyz",
     url="https://emsig.xyz",
     license="Apache-2.0",
-    packages=["emg3d", "emg3d.cli"],
+    packages=["emg3d", "emg3d.inversion", "emg3d.cli"],
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "License :: OSI Approved :: Apache Software License",
@@ -40,6 +40,7 @@
             "xarray",
             "discretize",
             "matplotlib",
+            "pygimli",
         ],
     },
     use_scm_version={
diff --git a/tests/test_meshes.py b/tests/test_meshes.py
index bbaa2fe1..701d7c8e 100644
--- a/tests/test_meshes.py
+++ b/tests/test_meshes.py
@@ -101,6 +101,14 @@ def test_TensorMesh(self):
 
         assert mesh.cell_volumes.sum() > 69046392
 
+        # Check contains.
+        tgrid = meshes.TensorMesh(
+            [cgrid.h[0][:3], cgrid.h[1][:5], cgrid.h[2]], origin=cgrid.origin)
+        assert emg3dgrid.contains(tgrid)
+        newgrid = meshes.TensorMesh(
+                [np.ones(3), np.ones(3), np.ones(3)], np.zeros(3))
+        assert not emg3dgrid.contains(newgrid)
+
     def test_TensorMesh_repr(self):
         # Create some dummy data
         grid = meshes.TensorMesh(
diff --git a/tests/test_time.py b/tests/test_time.py
index c5e382e0..a84f5900 100644
--- a/tests/test_time.py
+++ b/tests/test_time.py
@@ -28,7 +28,7 @@ def test_defaults(self, capsys):
         assert Fourier.signal == 0        # Impulse respons
         assert_allclose(times, Fourier.time, 0, 0)
         assert Fourier.verb == 3          # Verbose by default
-        assert 'key' in out
+        assert 'ey' in out
         assert 'Req. freq' in out
         assert 'Calc. freq' in out
         assert Fourier.freq_compute.min() >= fmin