From baebba25f5d7de69bcf54554ca99fd54bdbde385 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Mon, 4 Sep 2023 15:43:10 +0200
Subject: [PATCH 1/2] Go back to c implementation of summary loading

This removes the following unwanted behaviors of the current
implementation:

* The call to _init_numpy_vector_interp changes some values, probably due to interpolation.
* Iterating over ecl_sum uses both ecl_smspec_node->key1 and ecl_smspec_node->key2 which
  will result in duplicate entries for summary keyword types that have both types.
* DATE is included in the values loaded

However, the following behavior is kept from the most recent implementation:

* All report steps are loaded, not just those in the time map.
---
 src/clib/lib/CMakeLists.txt                   |  1 +
 src/clib/lib/enkf/read_summary.cpp            | 50 +++++++++++++++++++
 src/ert/config/summary_config.py              | 37 ++++----------
 .../0/summary_collector_1.csv                 | 10 ++--
 tests/unit_tests/test_libres_facade.py        |  2 +-
 5 files changed, 68 insertions(+), 32 deletions(-)
 create mode 100644 src/clib/lib/enkf/read_summary.cpp
diff --git a/src/clib/lib/CMakeLists.txt b/src/clib/lib/CMakeLists.txt
index c82156d631b..63092675504 100644
--- a/src/clib/lib/CMakeLists.txt
+++ b/src/clib/lib/CMakeLists.txt
@@ -15,6 +15,7 @@ pybind11_add_module(
   job_queue/torque_driver.cpp
   ${lsb}
   enkf/enkf_obs.cpp
+  enkf/read_summary.cpp
   enkf/row_scaling.cpp)
 
 # -----------------------------------------------------------------
diff --git a/src/clib/lib/enkf/read_summary.cpp b/src/clib/lib/enkf/read_summary.cpp
new file mode 100644
index 00000000000..9cbdf018c3f
--- /dev/null
+++ b/src/clib/lib/enkf/read_summary.cpp
@@ -0,0 +1,50 @@
+#include <ert/ecl/ecl_smspec.hpp>
+#include <ert/ecl/ecl_sum.hpp>
+#include <ert/python.hpp>
+#include <fnmatch.h>
+#include <string>
+#include <tuple>
+#include <vector>
+
+static bool matches(std::vector<std::string> patterns, std::string key) {
+    bool has_key = false;
+    for (auto pattern : patterns) {
+        if (fnmatch(pattern.c_str(), key.c_str(), 0) == 0) {
+            has_key = true;
+            break;
+        }
+    }
+    return has_key;
+}
+ERT_CLIB_SUBMODULE("_read_summary", m) {
+    m.def("read_summary",
+          [](Cwrap<ecl_sum_type> summary, std::vector<std::string> keys) {
+              const int step2 = ecl_sum_get_last_report_step(summary);
+              const ecl_smspec_type *smspec = ecl_sum_get_smspec(summary);
+              std::vector<std::pair<std::string, std::vector<double>>>
+                  summary_vectors{};
+
+              for (int i = 0; i < ecl_smspec_num_nodes(smspec); i++) {
+                  const ecl::smspec_node &smspec_node =
+                      ecl_smspec_iget_node_w_node_index(smspec, i);
+                  const char *key = smspec_node.get_gen_key1();
+                  if (matches(keys, key)) {
+                      int start = ecl_sum_get_first_report_step(summary);
+                      int end = ecl_sum_get_last_report_step(summary);
+                      std::vector<double> data{};
+                      int key_index =
+                          ecl_sum_get_general_var_params_index(summary, key);
+                      for (int tstep = start; tstep <= end; tstep++) {
+                          if (ecl_sum_has_report_step(summary, tstep)) {
+                              int time_index =
+                                  ecl_sum_iget_report_end(summary, tstep);
+                              data.push_back(
+                                  ecl_sum_iget(summary, time_index, key_index));
+                          }
+                      }
+                      summary_vectors.emplace_back(key, data);
+                  }
+              }
+              return summary_vectors;
+          });
+}
diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py
index 486657fe2a3..1c4ce5ebae9 100644
--- a/src/ert/config/summary_config.py
+++ b/src/ert/config/summary_config.py
@@ -1,19 +1,18 @@
 from __future__ import annotations
 
-import ctypes
 import logging
 from dataclasses import dataclass
-from fnmatch import fnmatch
 from typing import TYPE_CHECKING
 
-import numpy as np
 import xarray as xr
 from ecl.summary import EclSum
 
+from ert._clib._read_summary import read_summary  # pylint: disable=import-error
+
 from .response_config import ResponseConfig
 
 if TYPE_CHECKING:
-    from typing import Any, List, Optional
+    from typing import List, Optional
 
 
 logger = logging.getLogger(__name__)
@@ -57,10 +56,8 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset:
                 f"file from: {run_path}/{filename}.UNSMRY",
             ) from e
 
-        data = []
-        keys = []
-        time_map = summary.alloc_time_vector(True)
-        axis = [t.datetime() for t in time_map]
+        c_time = summary.alloc_time_vector(True)
+        time_map = [t.datetime() for t in c_time]
         if self.refcase:
             existing_time_map = self.refcase.alloc_time_vector(True)
             missing = []
@@ -80,25 +77,13 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset:
                     f"from: {run_path}/{filename}.UNSMRY"
                 )
 
-        user_summary_keys = set(self.keys)
-        for key in summary:
-            if not self._should_load_summary_key(key, user_summary_keys):
-                continue
-            keys.append(key)
-
-            np_vector = np.zeros(len(time_map))
-            summary._init_numpy_vector_interp(
-                key,
-                time_map,
-                np_vector.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
-            )
-            data.append(np_vector)
+        summary_data = read_summary(summary, self.keys)
+        summary_data.sort(key=lambda x: x[0])
+        data = [d for _, d in summary_data]
+        keys = [k for k, _ in summary_data]
 
         ds = xr.Dataset(
             {"values": (["name", "time"], data)},
-            coords={"time": axis, "name": keys},
+            coords={"time": time_map, "name": keys},
         )
-        return ds.drop_duplicates(["time", "name"])
-
-    def _should_load_summary_key(self, data_key: Any, user_set_keys: set[str]) -> bool:
-        return any(fnmatch(data_key, key) for key in user_set_keys)
+        return ds.drop_duplicates(["time"])
diff --git a/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv b/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv
index e7f6e4196d2..14ef7548276 100644
--- a/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv
+++ b/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv
@@ -1,5 +1,5 @@
-Realization,Date,"BPR:1,3,8","BPR:5,5,5",BPR:445,BPR:721,FGIP,FGIPH,FGOR,FGORH,FGPR,FGPRH,FGPT,FGPTH,FOIP,FOIPH,FOPR,FOPRH,FOPT,FOPTH,FWCT,FWCTH,FWIP,FWIPH,FWPR,FWPRH,FWPT,FWPTH,WGOR:OP1,WGOR:OP2,WGORH:OP1,WGORH:OP2,WGPR:OP1,WGPR:OP2,WGPRH:OP1,WGPRH:OP2,WOPR:OP1,WOPR:OP2,WOPRH:OP1,WOPRH:OP2,WWCT:OP1,WWCT:OP2,WWCTH:OP1,WWCTH:OP2,WWPR:OP1,WWPR:OP2,WWPRH:OP1,WWPRH:OP2
-0,2010-01-10,0.9996,0.9996,0.9996,0.9996,2499.4473,2499.9956,1.0,1.0,0.0557,0.0012,0.5528,0.0044,1999.4462,1999.994,0.056,0.0017,0.5538,0.0059,0.1776,0.0002,2249.4492,2249.9998,0.0551,0.0,0.5507,0.0001,1.0,1.0,1.0,1.0,0.0557,0.0,0.0006,0.0006,0.056,0.0,0.0008,0.0008,0.3552,0.0,0.0001,0.0002,0.0551,0.0,0.0,0.0
-1,2010-01-10,0.9996,0.9996,0.9996,0.9996,2499.8467,2499.9956,1.0,1.0,0.0157,0.0012,0.1533,0.0044,1999.8458,1999.994,0.016,0.0017,0.1542,0.0059,0.0657,0.0002,2249.8489,2249.9998,0.0151,0.0,0.1512,0.0001,1.0,1.0,1.0,1.0,0.0,0.0157,0.0006,0.0006,0.0,0.016,0.0008,0.0008,0.0,0.1314,0.0001,0.0002,0.0,0.0151,0.0,0.0
-2,2010-01-10,0.9996,0.9996,0.9996,0.9996,2500.0,2499.9956,1.0,1.0,0.0,0.0012,0.0,0.0044,2000.0,1999.994,0.0,0.0017,0.0,0.0059,0.0,0.0002,2250.0,2249.9998,0.0,0.0,0.0,0.0001,1.0,1.0,1.0,1.0,0.0,0.0,0.0006,0.0006,0.0,0.0,0.0008,0.0008,0.0,0.0,0.0001,0.0002,0.0,0.0,0.0,0.0
-3,2010-01-10,0.9996,0.9996,0.9996,0.9996,2497.1733,2499.9956,0.9994,1.0,0.2835,0.0012,2.8267,0.0044,1997.1715,1999.994,0.284,0.0017,2.8285,0.0059,0.4825,0.0002,2247.1775,2249.9998,0.2823,0.0,2.8224,0.0001,1.0,0.9987,1.0,1.0,0.0879,0.1956,0.0006,0.0006,0.0882,0.1958,0.0008,0.0008,0.4661,0.4989,0.0001,0.0002,0.0873,0.195,0.0,0.0
+Realization,Date,"BPR:1,3,8","BPR:5,5,5",FGIP,FGIPH,FGOR,FGORH,FGPR,FGPRH,FGPT,FGPTH,FOIP,FOIPH,FOPR,FOPRH,FOPT,FOPTH,FWCT,FWCTH,FWIP,FWIPH,FWPR,FWPRH,FWPT,FWPTH,WGOR:OP1,WGOR:OP2,WGORH:OP1,WGORH:OP2,WGPR:OP1,WGPR:OP2,WGPRH:OP1,WGPRH:OP2,WOPR:OP1,WOPR:OP2,WOPRH:OP1,WOPRH:OP2,WWCT:OP1,WWCT:OP2,WWCTH:OP1,WWCTH:OP2,WWPR:OP1,WWPR:OP2,WWPRH:OP1,WWPRH:OP2
+0,2010-01-10,0.9996,0.9996,2499.4473,2499.9956,1.0,1.0,0.0557,0.0012,0.5528,0.0044,1999.4462,1999.994,0.056,0.0017,0.5538,0.0059,0.1776,0.0002,2249.4492,2249.9998,0.0551,0.0,0.5507,0.0001,1.0,1.0,1.0,1.0,0.0557,0.0,0.0006,0.0006,0.056,0.0,0.0008,0.0008,0.3552,0.0,0.0001,0.0002,0.0551,0.0,0.0,0.0
+1,2010-01-10,0.9996,0.9996,2499.8467,2499.9956,1.0,1.0,0.0157,0.0012,0.1533,0.0044,1999.8458,1999.994,0.016,0.0017,0.1542,0.0059,0.0657,0.0002,2249.8489,2249.9998,0.0151,0.0,0.1512,0.0001,1.0,1.0,1.0,1.0,0.0,0.0157,0.0006,0.0006,0.0,0.016,0.0008,0.0008,0.0,0.1314,0.0001,0.0002,0.0,0.0151,0.0,0.0
+2,2010-01-10,0.9996,0.9996,2500.0,2499.9956,1.0,1.0,0.0,0.0012,0.0,0.0044,2000.0,1999.994,0.0,0.0017,0.0,0.0059,0.0,0.0002,2250.0,2249.9998,0.0,0.0,0.0,0.0001,1.0,1.0,1.0,1.0,0.0,0.0,0.0006,0.0006,0.0,0.0,0.0008,0.0008,0.0,0.0,0.0001,0.0002,0.0,0.0,0.0,0.0
+3,2010-01-10,0.9996,0.9996,2497.1733,2499.9956,0.9994,1.0,0.2835,0.0012,2.8267,0.0044,1997.1715,1999.994,0.284,0.0017,2.8285,0.0059,0.4825,0.0002,2247.1775,2249.9998,0.2823,0.0,2.8224,0.0001,1.0,0.9987,1.0,1.0,0.0879,0.1956,0.0006,0.0006,0.0882,0.1958,0.0008,0.0008,0.4661,0.4989,0.0001,0.0002,0.0873,0.195,0.0,0.0
diff --git a/tests/unit_tests/test_libres_facade.py b/tests/unit_tests/test_libres_facade.py
index f052a2a73e3..48e7f8eb405 100644
--- a/tests/unit_tests/test_libres_facade.py
+++ b/tests/unit_tests/test_libres_facade.py
@@ -263,7 +263,7 @@ def test_summary_collector(
         data.iloc[:4].round(4).to_csv(),
         "summary_collector_1.csv",
     )
-    assert data.shape == (1000, 46)
+    assert data.shape == (1000, 44)
     with pytest.raises(KeyError):
         # realization 60:
         _ = data.loc[60]

From cfafc3c30229fc445261daa998c989bd1ef984c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98yvind=20Eide=20=28EDT=20DSD=20SD2=29?=
 <oyveid@st-linrgs480.st.statoil.no>
Date: Tue, 19 Sep 2023 15:37:28 +0200
Subject: [PATCH 2/2] Remove duplicates from summary files

---
 src/clib/lib/enkf/read_summary.cpp | 7 +++++--
 src/ert/config/summary_config.py   | 5 ++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/clib/lib/enkf/read_summary.cpp b/src/clib/lib/enkf/read_summary.cpp
index 9cbdf018c3f..62ee5d5da46 100644
--- a/src/clib/lib/enkf/read_summary.cpp
+++ b/src/clib/lib/enkf/read_summary.cpp
@@ -23,12 +23,15 @@ ERT_CLIB_SUBMODULE("_read_summary", m) {
               const ecl_smspec_type *smspec = ecl_sum_get_smspec(summary);
               std::vector<std::pair<std::string, std::vector<double>>>
                   summary_vectors{};
-
+              std::vector<std::string> seen_keys{};
               for (int i = 0; i < ecl_smspec_num_nodes(smspec); i++) {
                   const ecl::smspec_node &smspec_node =
                       ecl_smspec_iget_node_w_node_index(smspec, i);
                   const char *key = smspec_node.get_gen_key1();
-                  if (matches(keys, key)) {
+                  if ((matches(keys, key)) &&
+                      !(std::find(seen_keys.begin(), seen_keys.end(), key) !=
+                        seen_keys.end())) {
+                      seen_keys.push_back(key);
                       int start = ecl_sum_get_first_report_step(summary);
                       int end = ecl_sum_get_last_report_step(summary);
                       std::vector<double> data{};
diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py
index 1c4ce5ebae9..0fed2c8a31c 100644
--- a/src/ert/config/summary_config.py
+++ b/src/ert/config/summary_config.py
@@ -77,13 +77,12 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset:
                     f"from: {run_path}/{filename}.UNSMRY"
                 )
 
-        summary_data = read_summary(summary, self.keys)
+        summary_data = read_summary(summary, list(set(self.keys)))
         summary_data.sort(key=lambda x: x[0])
         data = [d for _, d in summary_data]
         keys = [k for k, _ in summary_data]
-
         ds = xr.Dataset(
             {"values": (["name", "time"], data)},
             coords={"time": time_map, "name": keys},
         )
-        return ds.drop_duplicates(["time"])
+        return ds.drop_duplicates("time")