From 152e25635c165d62590f1b72091598d593790955 Mon Sep 17 00:00:00 2001
From: Feda Curic <feda.curic@gmail.com>
Date: Mon, 4 Dec 2023 09:30:19 +0100
Subject: [PATCH] Add GitHub workflow for benchmarking

---
 .github/workflows/benchmark.yml               |  49 ++++++
 pyproject.toml                                |   1 +
 .../analysis/test_adaptive_localization.py    |   4 +-
 tests/unit_tests/analysis/test_es_update.py   | 162 +++++++++++++++++-
 4 files changed, 214 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/benchmark.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 00000000000..85b77edbced
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,49 @@
+name: Benchmark Adaptive Localization
+on:
+  push:
+    branches:
+      - benchmark
+
+permissions:
+    # deployments permission to deploy GitHub pages website
+    deployments: write
+    # contents permission to update benchmark contents in gh-pages branch
+    contents: write
+
+jobs:
+  benchmark:
+    name: Run pytest-benchmark benchmark example
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: true
+          lfs: true
+
+      - uses: actions/setup-python@v4
+        id: setup_python
+        with:
+          python-version: "3.10"
+          cache: "pip"
+          cache-dependency-path: |
+            setup.py
+            pyproject.toml
+
+      - name: Install ert with dev-deps
+        run: |
+          pip install ".[dev]"
+
+      - name: Run benchmark
+        run: |
+          pytest tests/unit_tests/analysis/test_es_update.py::test_and_benchmark_adaptive_localization_with_fields --benchmark-json output.json
+
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: Python Benchmark with pytest-benchmark
+          tool: 'pytest'
+          output-file-path: output.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: true
diff --git a/pyproject.toml b/pyproject.toml
index bd8d65ab3e6..5dd8110170c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -118,6 +118,7 @@ dev = [
     "sphinxcontrib-plantuml",
     "sphinxcontrib.datatemplates",
     "testpath",
+    "gstools",
 ]
 style = [
     "cmake-format",
diff --git a/tests/unit_tests/analysis/test_adaptive_localization.py b/tests/unit_tests/analysis/test_adaptive_localization.py
index 62dbcde8151..c0204cad3ce 100644
--- a/tests/unit_tests/analysis/test_adaptive_localization.py
+++ b/tests/unit_tests/analysis/test_adaptive_localization.py
@@ -1,4 +1,6 @@
+import uuid
 from argparse import ArgumentParser
+from functools import partial
 from textwrap import dedent
 
 import numpy as np
@@ -16,7 +18,7 @@
 def run_cli_ES_with_case(poly_config):
     config_name = poly_config.split(".")[0]
     prior_sample_name = "prior_sample" + "_" + config_name
-    posterior_sample_name = "posterior_sample" + "_" + config_name
+    posterior_sample_name = str(uuid.uuid1())
     parser = ArgumentParser(prog="test_main")
     parsed = ert_parser(
         parser,
diff --git a/tests/unit_tests/analysis/test_es_update.py b/tests/unit_tests/analysis/test_es_update.py
index 3cf7a9679af..8fb08e662ec 100644
--- a/tests/unit_tests/analysis/test_es_update.py
+++ b/tests/unit_tests/analysis/test_es_update.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 import xarray as xr
+import xtgeo
 from iterative_ensemble_smoother import SIES
 
 from ert import LibresFacade
@@ -484,7 +485,6 @@ def test_that_surfaces_retain_their_order_when_loaded_and_saved_by_ert(copy_case
     (row-major / column-major) when working with surfaces.
     """
     rng = np.random.default_rng()
-    import xtgeo
     from scipy.ndimage import gaussian_filter
 
     def sample_prior(nx, ny):
@@ -608,6 +608,166 @@ def _load_parameters(source_ens, iens_active_index, param_groups):
         assert np.trace(np.cov(posterior[prior_name])) < np.trace(np.cov(prior_data))
 
 
+def test_and_benchmark_adaptive_localization_with_fields(
+    storage, tmp_path, monkeypatch, benchmark
+):
+    from functools import partial
+
+    import gstools as gs
+    import scipy as sp
+
+    from ert.config import Field
+    from ert.field_utils import Shape
+
+    monkeypatch.chdir(tmp_path)
+
+    rng = np.random.default_rng(42)
+
+    # Number of grid-cells in x and y direction
+    nx = 40
+    # Dimensionality of the problem
+    num_parameters = nx * nx
+    num_observations = 50
+    num_ensemble = 25
+
+    diagonal = np.ones(min(num_parameters, num_observations))
+
+    # Create a tridiagonal matrix (easiest with scipy)
+    A = sp.sparse.diags(
+        [diagonal, diagonal, diagonal],
+        offsets=[-1, 0, 1],
+        shape=(num_observations, num_parameters),
+        dtype=float,
+    ).toarray()
+
+    # We add some noise that is insignificant compared to the
+    # actual local structure in the forward model
+    A = A + rng.standard_normal(size=A.shape) * 0.01
+
+    def g(X):
+        """Apply the forward model."""
+        return A @ X
+
+    model = gs.Exponential(dim=2, var=2, len_scale=8)
+
+    fields = []
+    seed = gs.random.MasterRNG(20170519)
+    for _ in range(num_ensemble):
+        srf = gs.SRF(model, seed=seed())
+        field = srf.structured([np.arange(nx), np.arange(nx)])
+        fields.append(field)
+
+    X = np.vstack([field.flatten() for field in fields]).T
+    Y = g(X)
+
+    # Create observations: obs = g(x) + N(0, 1)
+    x_true = np.linspace(-1, 1, num=num_parameters)
+    observation_noise = rng.standard_normal(size=num_observations)  # N(0, 1) noise
+    observations = g(x_true) + observation_noise
+
+    shape = Shape(nx, nx, 1)
+    grid = xtgeo.create_box_grid(dimension=(shape.nx, shape.ny, shape.nz))
+    grid.to_file("MY_EGRID.EGRID", "egrid")
+
+    resp = GenDataConfig(name="RESPONSE")
+    obs = xr.Dataset(
+        {
+            "observations": (
+                ["report_step", "index"],
+                observations.reshape((1, num_observations)),
+            ),
+            "std": (
+                ["report_step", "index"],
+                observation_noise.reshape(1, num_observations),
+            ),
+        },
+        coords={"report_step": [0], "index": np.arange(len(observations))},
+        attrs={"response": "RESPONSE"},
+    )
+
+    param_group = "PARAM_FIELD"
+    update_config = UpdateConfiguration(
+        update_steps=[
+            UpdateStep(
+                name="ALL_ACTIVE",
+                observations=["OBSERVATION"],
+                parameters=[param_group],
+            )
+        ]
+    )
+
+    config = Field.from_config_list(
+        "MY_EGRID.EGRID",
+        shape,
+        [
+            param_group,
+            param_group,
+            "param.GRDECL",
+            "INIT_FILES:param_%d.GRDECL",
+            "FORWARD_INIT:False",
+        ],
+    )
+
+    experiment = storage.create_experiment(
+        parameters=[config],
+        responses=[resp],
+        observations={"OBSERVATION": obs},
+    )
+
+    prior = storage.create_ensemble(
+        experiment,
+        ensemble_size=num_ensemble,
+        iteration=0,
+        name="prior",
+    )
+
+    for iens in range(prior.ensemble_size):
+        prior.state_map[iens] = RealizationStorageState.HAS_DATA
+        prior.save_parameters(
+            param_group,
+            iens,
+            xr.Dataset(
+                {
+                    "values": xr.DataArray(fields[iens], dims=("x", "y")),
+                }
+            ),
+        )
+
+        prior.save_response(
+            "RESPONSE",
+            xr.Dataset(
+                {"values": (["report_step", "index"], [Y[:, iens]])},
+                coords={"index": range(len(Y[:, iens])), "report_step": [0]},
+            ),
+            iens,
+        )
+
+    posterior_ens = storage.create_ensemble(
+        prior.experiment_id,
+        ensemble_size=prior.ensemble_size,
+        iteration=1,
+        name="posterior",
+        prior_ensemble=prior,
+    )
+
+    smoother_update_run = partial(
+        smoother_update,
+        prior,
+        posterior_ens,
+        "id",
+        update_config,
+        UpdateSettings(),
+        ESSettings(localization=True),
+    )
+    benchmark(smoother_update_run)
+
+    prior_da = prior.load_parameters(param_group, range(num_ensemble))
+    posterior_da = posterior_ens.load_parameters(param_group, range(num_ensemble))
+    # Because of adaptive localization, not all parameters should be updated.
+    # This would fail if with global updates.
+    assert np.isclose(prior_da, posterior_da).sum() > 0
+
+
 @pytest.mark.integration_test
 def test_gen_data_obs_data_mismatch(storage, uniform_parameter, update_config):
     resp = GenDataConfig(name="RESPONSE")