From 868edad13188fd9fd91e3d5aa11fa0ae2d2ef310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dieter=20Werthm=C3=BCller?= Date: Tue, 23 Jan 2024 21:33:15 +0100 Subject: [PATCH] pyGIMLi(emg3d) --- docs/api/index.rst | 1 + docs/api/inversion/index.rst | 8 + docs/api/inversion/pygimli.rst | 6 + docs/conf.py | 1 + docs/manual/installation.rst | 3 +- emg3d/_multiprocessing.py | 5 + emg3d/inversion/__init__.py | 15 ++ emg3d/inversion/pygimli.py | 315 +++++++++++++++++++++++++++++++++ emg3d/meshes.py | 23 +++ emg3d/surveys.py | 11 ++ emg3d/utils.py | 13 +- requirements-dev.txt | 1 + setup.py | 3 +- tests/test_meshes.py | 8 + tests/test_time.py | 2 +- 15 files changed, 410 insertions(+), 5 deletions(-) create mode 100644 docs/api/inversion/index.rst create mode 100644 docs/api/inversion/pygimli.rst create mode 100644 emg3d/inversion/__init__.py create mode 100644 emg3d/inversion/pygimli.py diff --git a/docs/api/index.rst b/docs/api/index.rst index 4c03f995..adb52fe1 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -27,6 +27,7 @@ API reference surveys time utils + inversion/index .. grid:: 1 diff --git a/docs/api/inversion/index.rst b/docs/api/inversion/index.rst new file mode 100644 index 00000000..2d34ce4f --- /dev/null +++ b/docs/api/inversion/index.rst @@ -0,0 +1,8 @@ +Inversion +######### + +.. toctree:: + :maxdepth: 2 + :hidden: + + pygimli diff --git a/docs/api/inversion/pygimli.rst b/docs/api/inversion/pygimli.rst new file mode 100644 index 00000000..937341ff --- /dev/null +++ b/docs/api/inversion/pygimli.rst @@ -0,0 +1,6 @@ +pyGIMLi(emg3d) +============== + +.. automodapi:: emg3d.inversion.pygimli + :no-inheritance-diagram: + :no-heading: diff --git a/docs/conf.py b/docs/conf.py index 8c4c3a74..8252e5dd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ "empymod": ("https://empymod.emsig.xyz/en/stable", None), "xarray": ("https://docs.xarray.dev/en/stable", None), "numba": ("https://numba.readthedocs.io/en/stable", None), + "pygimli": ("https://www.pygimli.org", None), } # ==== 2. General Settings ==== diff --git a/docs/manual/installation.rst b/docs/manual/installation.rst index b620bf82..1cd18794 100644 --- a/docs/manual/installation.rst +++ b/docs/manual/installation.rst @@ -25,13 +25,14 @@ namely: - ``matplotlib``: To use the plotting utilities within ``discretize``. - ``h5py``: Save and load data in the HDF5 format. - ``tqdm``: For nice progress bars when computing many sources and frequencies. +- ``pygimli``: To run inversions using ``pygimli``, pyGIMLi(emg3d). All soft dependencies are also available both on ``conda-forge`` and ``pip``. To get therefore the complete experience use one of the following options: .. code-block:: console - conda install -c conda-forge emg3d discretize xarray matplotlib h5py tqdm + conda install -c conda-forge emg3d discretize xarray matplotlib h5py tqdm pygimli or via ``pip``: diff --git a/emg3d/_multiprocessing.py b/emg3d/_multiprocessing.py index 1fb095e3..f6eb5aa8 100644 --- a/emg3d/_multiprocessing.py +++ b/emg3d/_multiprocessing.py @@ -43,6 +43,7 @@ def process_map(fn, *iterables, max_workers, **kwargs): execution. """ + process_map.count += 1 # Parallel if max_workers > 1 and tqdm is None: @@ -64,6 +65,10 @@ def process_map(fn, *iterables, max_workers, **kwargs): iterable=map(fn, *iterables), total=len(iterables[0]), **kwargs)) +# Counter for processing map (used, e.g., for inversions). +process_map.count = 0 + + def solve(inp): """Thin wrapper of `solve` or `solve_source` for a `process_map`. diff --git a/emg3d/inversion/__init__.py b/emg3d/inversion/__init__.py new file mode 100644 index 00000000..064daa57 --- /dev/null +++ b/emg3d/inversion/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 The emsig community. +# +# This file is part of emg3d. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy +# of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. diff --git a/emg3d/inversion/pygimli.py b/emg3d/inversion/pygimli.py new file mode 100644 index 00000000..c4f481fb --- /dev/null +++ b/emg3d/inversion/pygimli.py @@ -0,0 +1,315 @@ +# Copyright 2024 The emsig community. +# +# This file is part of emg3d. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy +# of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +import numpy as np + +try: + import pygimli +except ImportError: + pygimli = None + +from emg3d import utils, _multiprocessing + +__all__ = ['Kernel', 'Inversion'] + +# Add pygimli and pgcore to the emg3d.Report(). +utils.OPTIONAL.extend(['pygimli', 'pgcore']) + + +def __dir__(): + return __all__ + + +class Kernel(pygimli.Modelling if pygimli else object): + """Create a forward operator of emg3d to use within a pyGIMLi inversion. + + + Parameters + ---------- + simulation : Simulation + The simulation; a :class:`emg3d.simulations.Simulation` instance. + + markers : ndarray of ints, default: None + An ndarray of ints of the same shapes as the model. All cells with the + same number belong to the same region with this number, which can + subsequently be defined through + :func:`pygimli.frameworks.modelling.Modelling.setRegionProperties`. + + pgthreads : int, default: 2 + Number of threads for pyGIMLi (sets ``OPENBLAS_NUM_THREADS``). This is + by default a small number, as the important parallelization in + pyGIMLi(emg3d) happens over sources and frequencies in emg3d. This is + controlled in the parameter ``max_workers`` when creating the + simulation. + + """ + + @utils._requires('pygimli') + def __init__(self, simulation, markers=None, pgthreads=2): + """Initialize a pyGIMLi(emg3d)-wrapper.""" + super().__init__() + + # Set pyGIMLi threads. + pygimli.setThreadCount(pgthreads) + + # Check current limitations. + checks = { + 'case': (simulation.model.case, 'isotropic'), + 'mapping': (simulation.model.map.name, 'Conductivity'), + } + for k, v in checks.items(): + if v[0] != v[1]: + msg = f"pyGIMLi(emg3d) is not implemented for {v[0]} {k}." + raise NotImplementedError(msg) + + # Store the simulation. + self.simulation = simulation + + # Translate discretize TensorMesh to pygimli-Grid. + mesh = pygimli.createGrid( + x=simulation.model.grid.nodes_x, + y=simulation.model.grid.nodes_y, + z=simulation.model.grid.nodes_z, + ) + + # Set markers. + if markers is not None: + mesh.setCellMarkers(markers.ravel('F')) + self.markers = markers + else: + self.markes = np.zeros(simulation.model.size, dtype=int) + # Store original props; required if a region is set to ``background``. + self._model = simulation.model.property_x.copy() + # Store volumes; required if a region is set to ``single``. + self._volumes = simulation.model.grid.cell_volumes.reshape( + self._model.shape, order='F') + # Set mesh. + self.setMesh(mesh) + + # Create J, store and set it. + self.J = self.Jacobian( + simulation=self.simulation, + data2gimli=self.data2gimli, + data2emg3d=self.data2emg3d, + model2gimli=self.model2gimli, + model2emg3d=self.model2emg3d, + ) + self.setJacobian(self.J) + + def response(self, model): + """Create synthetic data for provided model.""" + + # Clean emg3d-simulation, so things are recomputed + self.simulation.clean('computed') + + # Replace model + self.simulation.model.property_x = self.model2emg3d(model) + + # Compute forward model and set initial residuals. + _ = self.simulation.misfit + + # Return the responses as pyGIMLi array + return self.data2gimli(self.simulation.data.synthetic.data) + + def createStartModel(self, dataVals=None): + """Returns the model from the provided simulation.""" + return self.model2gimli(self.simulation.model.property_x) + + def createJacobian(self, model): + """Dummy to prevent pyGIMLi from doing it the hard way.""" + + def data2gimli(self, data): + """Convert an emg3d data-xarray to a pyGIMLi data array.""" + out = data[self.simulation.survey.isfinite] + if np.iscomplexobj(out): + return np.hstack((out.real, out.imag)) + else: # For standard deviation + return np.hstack((out, out)) + + def data2emg3d(self, data): + """Convert a pyGIMLi data array to an emg3d data-xarray.""" + out = np.ones( + self.simulation.survey.shape, + dtype=self.simulation.data.observed.dtype + )*np.nan + data = np.asarray(data) + ind = data.size//2 + out[self.simulation.survey.isfinite] = data[:ind] + 1j*data[ind:] + return out + + def model2gimli(self, model): + """Convert an emg3d Model property to a pyGIMLi model array. + + This function deals with the regions defined in pyGIMLi. + """ + + # If the inversion model is smaller than the model, we have to + # take care of the regions. + if len(model) != self.simulation.model.size: + + out = np.empty(self.simulation.model.size) + i = 0 + + for n, v in self.regionProperties().items(): + ni = self.markers == n + if v['background'] or v['fix']: + ii = 0 + elif v['single']: + ii = 1 + out[i] = np.average(model[ni], weights=self._volumes[ni]) + else: + ii = np.sum(ni) + out[i:i+ii] = model[ni] + i += ii + + out = out[:i] + + else: + out = np.empty(model.size) + out[self.mesh().cellMarkers()] = model.ravel('F') + + return out + + def model2emg3d(self, model): + """Convert a pyGIMLi model array to an emg3d Model property. + + This function deals with the regions defined in pyGIMLi. + """ + + # If the inversion model is smaller than the model, we have to + # take care of the regions. + if len(model) != self.simulation.model.size: + + out = np.empty(self.simulation.model.shape) + i = 0 + + for n, v in self.regionProperties().items(): + ni = self.markers == n + if v['background']: + ii = 0 + out[ni] = self._model[ni] + elif v['fix']: + ii = 0 + out[ni] = v['startModel'] + elif v['single']: + ii = 1 + out[ni] = model[i] + else: + ii = np.sum(ni) + out[ni] = model[i:ii+i] + i += ii + + else: + out = np.asarray(model[self.mesh().cellMarkers()]).reshape( + self.simulation.model.shape, order='F') + + return out + + class Jacobian(pygimli.Matrix if pygimli else object): + """Return Jacobian operator for pyGIMLi(emg3d).""" + + def __init__(self, simulation, + data2gimli, data2emg3d, model2gimli, model2emg3d): + """Initiate a new Jacobian instance.""" + super().__init__() + self.simulation = simulation + self.data2gimli = data2gimli + self.data2emg3d = data2emg3d + self.model2gimli = model2gimli + self.model2emg3d = model2emg3d + + def cols(self): + """The number of columns corresponds to the model size.""" + return self.simulation.model.size + + def rows(self): + """The number of rows corresponds to 2x data-size (Re; Im).""" + return self.simulation.survey.count * 2 + + def mult(self, x): + """Multiply the Jacobian with a vector, Jm.""" + jvec = self.simulation.jvec(vector=self.model2emg3d(x)) + return self.data2gimli(jvec) + + def transMult(self, x): + """Multiply Jacobian transposed with a vector, Jᵀd = (dJᵀ)ᵀ.""" + jtvec = self.simulation.jtvec(self.data2emg3d(x)) + return self.model2gimli(jtvec) + + def save(self, *args): + """There is no save for this pseudo-Jacobian.""" + + +@utils._requires('pygimli') +class Inversion(pygimli.Inversion if pygimli else object): + """Thin wrapper, adding verbosity and taking care of data format.""" + + @utils._requires('pygimli') + def __init__(self, fop=None, inv=None, **kwargs): + """Initialize an Inversion instance.""" + super().__init__(fop=fop, inv=inv, **kwargs) + self._postStep = _post_step + + def run(self, dataVals=None, errorVals=None, **kwargs): + """Run the inversion.""" + + # Reset counter, start timer, print message. + _multiprocessing.process_map.count = 0 + timer = utils.Timer() + pygimli.info(":: pyGIMLi(emg3d) START ::") + + # Take data from the survey if not provided. + if dataVals is None: + dataVals = self.fop.data2gimli( + self.fop.simulation.data.observed.data) + + # Take the error from the survey if not provided. + if errorVals is None: + std_dev = self.fop.data2gimli( + self.fop.simulation.survey.standard_deviation.data) + errorVals = std_dev / abs(dataVals) + + # Run the inversion + out = super().run(dataVals=dataVals, errorVals=errorVals, **kwargs) + + # Print passed time and exit + pygimli.info(f":: pyGIMLi(emg3d) END :: runtime = {timer.runtime}") + + return out + + +def _post_step(n, inv): + """Print some values for each iteration.""" + + # Print info + sim = inv.fop.simulation + sim.survey.data[f"it{n}"] = sim.survey.data.synthetic + phi = inv.inv.getPhi() + if not hasattr(inv, 'lastphi'): + lastphi = "" + else: + lastphi = f"; Δϕ = {(1-phi/inv.lastphi)*100:.2f}%" + inv.lastphi = phi + pygimli.info( + f"{n}: " + f"χ² = {inv.inv.chi2():7.2f}; " + f"λ = {inv.inv.getLambda()}; " + f"{_multiprocessing.process_map.count:2d} kernel calls; " + f"ϕ = {inv.inv.getPhiD():.2f} + {inv.inv.getPhiM():.2f}·λ = " + f"{phi:.2f}{lastphi}" + ) + + # Reset counter + _multiprocessing.process_map.count = 0 diff --git a/emg3d/meshes.py b/emg3d/meshes.py index 66ed12e0..3ac7e0e6 100644 --- a/emg3d/meshes.py +++ b/emg3d/meshes.py @@ -180,6 +180,29 @@ def __eq__(self, mesh): return bool(equal) + def contains(self, mesh): + """Check if input mesh is a subset of this mesh. + + The provided ``mesh`` can be either an emg3d or a discretize + TensorMesh instance. + + """ + + # Check if mesh is of the same instance. + equal = mesh.__class__.__name__ == self.__class__.__name__ + + # Check dimensions. + if equal: + equal *= len(mesh.shape_cells) == len(self.shape_cells) + + # Check distances and origin. + if equal: + equal *= np.isin(mesh.nodes_x, self.nodes_x).all() + equal *= np.isin(mesh.nodes_y, self.nodes_y).all() + equal *= np.isin(mesh.nodes_z, self.nodes_z).all() + + return bool(equal) + def copy(self): """Return a copy of the TensorMesh.""" return self.from_dict(self.to_dict(True)) diff --git a/emg3d/surveys.py b/emg3d/surveys.py index 1ffcf437..d8a5b59d 100644 --- a/emg3d/surveys.py +++ b/emg3d/surveys.py @@ -713,6 +713,17 @@ def _rec_types_coord(self, source): indices = self._irec_types return [tuple(self._rec_coord[source][ind].T) for ind in indices] + @property + def isfinite(self): + """TODO: document!""" + if not hasattr(self, '_isfinite'): + self._isfinite = np.isfinite(self.data.observed.data) + return self._isfinite + + def finite_data(self, data='observed'): + """TODO: document!""" + return self.data[data].data[self.isfinite] + def random_noise(standard_deviation, mean_noise=0.0, ntype='white_noise'): r"""Return random noise for given inputs. diff --git a/emg3d/utils.py b/emg3d/utils.py index 356707dc..59b734eb 100644 --- a/emg3d/utils.py +++ b/emg3d/utils.py @@ -42,6 +42,8 @@ __all__ = ['Report', 'EMArray', 'Timer'] +OPTIONAL = ['xarray', 'discretize', 'h5py', 'matplotlib', 'tqdm', 'IPython'] + def __dir__(): return __all__ @@ -158,8 +160,7 @@ def __init__(self, add_pckg=None, ncol=3, text_width=80, sort=False): core = ['numpy', 'scipy', 'numba', 'emg3d', 'empymod'] # Optional packages. - optional = ['xarray', 'discretize', 'h5py', 'matplotlib', - 'tqdm', 'IPython'] + optional = OPTIONAL super().__init__(additional=add_pckg, core=core, optional=optional, ncol=ncol, text_width=text_width, sort=sort) @@ -171,6 +172,7 @@ class Timer: def __init__(self): """Initiate timer with a performance counter.""" self._t0 = perf_counter() + self._previous = 0 def __repr__(self): """Simple representation.""" @@ -191,6 +193,13 @@ def runtime(self): """Return elapsed time as hh:mm:ss string.""" return str(timedelta(seconds=np.round(self.elapsed))) + @property + def laptime(self): + """Return time of this lap as hh:mm:ss string.""" + previous = self._previous + self._previous = self.elapsed + return str(timedelta(seconds=np.round(self._previous-previous))) + @property def elapsed(self): """Return elapsed time in seconds.""" diff --git a/requirements-dev.txt b/requirements-dev.txt index 43002fea..f0bd1181 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,6 +7,7 @@ h5py xarray discretize matplotlib +pygimli # SETUP RELATED setuptools_scm diff --git a/setup.py b/setup.py index d7a0d3ca..5cba52c5 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ author_email="info@emsig.xyz", url="https://emsig.xyz", license="Apache-2.0", - packages=["emg3d", "emg3d.cli"], + packages=["emg3d", "emg3d.inversion", "emg3d.cli"], classifiers=[ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: Apache Software License", @@ -40,6 +40,7 @@ "xarray", "discretize", "matplotlib", + "pygimli", ], }, use_scm_version={ diff --git a/tests/test_meshes.py b/tests/test_meshes.py index bbaa2fe1..701d7c8e 100644 --- a/tests/test_meshes.py +++ b/tests/test_meshes.py @@ -101,6 +101,14 @@ def test_TensorMesh(self): assert mesh.cell_volumes.sum() > 69046392 + # Check contains. + tgrid = meshes.TensorMesh( + [cgrid.h[0][:3], cgrid.h[1][:5], cgrid.h[2]], origin=cgrid.origin) + assert emg3dgrid.contains(tgrid) + newgrid = meshes.TensorMesh( + [np.ones(3), np.ones(3), np.ones(3)], np.zeros(3)) + assert not emg3dgrid.contains(newgrid) + def test_TensorMesh_repr(self): # Create some dummy data grid = meshes.TensorMesh( diff --git a/tests/test_time.py b/tests/test_time.py index c5e382e0..a84f5900 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -28,7 +28,7 @@ def test_defaults(self, capsys): assert Fourier.signal == 0 # Impulse respons assert_allclose(times, Fourier.time, 0, 0) assert Fourier.verb == 3 # Verbose by default - assert 'key' in out + assert 'ey' in out assert 'Req. freq' in out assert 'Calc. freq' in out assert Fourier.freq_compute.min() >= fmin