From baebba25f5d7de69bcf54554ca99fd54bdbde385 Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Mon, 4 Sep 2023 15:43:10 +0200 Subject: [PATCH 1/2] Go back to c implementation of summary loading This removes the following unwanted behaviors of the current implementation: * The call to _init_numpy_vector_interp changes some values, probably due to interpolation. * Iterating over ecl_sum uses both ecl_smspec_node->key1 and ecl_smspec_node->key2 which will result in duplicate entries for summary keyword types that have both types. * DATE is included in the values loaded However, the following behavior is kept from the most recent implementation: * All report steps are loaded, not just those in the time map. --- src/clib/lib/CMakeLists.txt | 1 + src/clib/lib/enkf/read_summary.cpp | 50 +++++++++++++++++++ src/ert/config/summary_config.py | 37 ++++---------- .../0/summary_collector_1.csv | 10 ++-- tests/unit_tests/test_libres_facade.py | 2 +- 5 files changed, 68 insertions(+), 32 deletions(-) create mode 100644 src/clib/lib/enkf/read_summary.cpp diff --git a/src/clib/lib/CMakeLists.txt b/src/clib/lib/CMakeLists.txt index c82156d631b..63092675504 100644 --- a/src/clib/lib/CMakeLists.txt +++ b/src/clib/lib/CMakeLists.txt @@ -15,6 +15,7 @@ pybind11_add_module( job_queue/torque_driver.cpp ${lsb} enkf/enkf_obs.cpp + enkf/read_summary.cpp enkf/row_scaling.cpp) # ----------------------------------------------------------------- diff --git a/src/clib/lib/enkf/read_summary.cpp b/src/clib/lib/enkf/read_summary.cpp new file mode 100644 index 00000000000..9cbdf018c3f --- /dev/null +++ b/src/clib/lib/enkf/read_summary.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +static bool matches(std::vector patterns, std::string key) { + bool has_key = false; + for (auto pattern : patterns) { + if (fnmatch(pattern.c_str(), key.c_str(), 0) == 0) { + has_key = true; + break; + } + } + return has_key; +} +ERT_CLIB_SUBMODULE("_read_summary", m) { + m.def("read_summary", + [](Cwrap summary, std::vector keys) { + const int step2 = ecl_sum_get_last_report_step(summary); + const ecl_smspec_type *smspec = ecl_sum_get_smspec(summary); + std::vector>> + summary_vectors{}; + + for (int i = 0; i < ecl_smspec_num_nodes(smspec); i++) { + const ecl::smspec_node &smspec_node = + ecl_smspec_iget_node_w_node_index(smspec, i); + const char *key = smspec_node.get_gen_key1(); + if (matches(keys, key)) { + int start = ecl_sum_get_first_report_step(summary); + int end = ecl_sum_get_last_report_step(summary); + std::vector data{}; + int key_index = + ecl_sum_get_general_var_params_index(summary, key); + for (int tstep = start; tstep <= end; tstep++) { + if (ecl_sum_has_report_step(summary, tstep)) { + int time_index = + ecl_sum_iget_report_end(summary, tstep); + data.push_back( + ecl_sum_iget(summary, time_index, key_index)); + } + } + summary_vectors.emplace_back(key, data); + } + } + return summary_vectors; + }); +} diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 486657fe2a3..1c4ce5ebae9 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -1,19 +1,18 @@ from __future__ import annotations -import ctypes import logging from dataclasses import dataclass -from fnmatch import fnmatch from typing import TYPE_CHECKING -import numpy as np import xarray as xr from ecl.summary import EclSum +from ert._clib._read_summary import read_summary # pylint: disable=import-error + from .response_config import ResponseConfig if TYPE_CHECKING: - from typing import Any, List, Optional + from typing import List, Optional logger = logging.getLogger(__name__) @@ -57,10 +56,8 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: f"file from: {run_path}/{filename}.UNSMRY", ) from e - data = [] - keys = [] - time_map = summary.alloc_time_vector(True) - axis = [t.datetime() for t in time_map] + c_time = summary.alloc_time_vector(True) + time_map = [t.datetime() for t in c_time] if self.refcase: existing_time_map = self.refcase.alloc_time_vector(True) missing = [] @@ -80,25 +77,13 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: f"from: {run_path}/{filename}.UNSMRY" ) - user_summary_keys = set(self.keys) - for key in summary: - if not self._should_load_summary_key(key, user_summary_keys): - continue - keys.append(key) - - np_vector = np.zeros(len(time_map)) - summary._init_numpy_vector_interp( - key, - time_map, - np_vector.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - ) - data.append(np_vector) + summary_data = read_summary(summary, self.keys) + summary_data.sort(key=lambda x: x[0]) + data = [d for _, d in summary_data] + keys = [k for k, _ in summary_data] ds = xr.Dataset( {"values": (["name", "time"], data)}, - coords={"time": axis, "name": keys}, + coords={"time": time_map, "name": keys}, ) - return ds.drop_duplicates(["time", "name"]) - - def _should_load_summary_key(self, data_key: Any, user_set_keys: set[str]) -> bool: - return any(fnmatch(data_key, key) for key in user_set_keys) + return ds.drop_duplicates(["time"]) diff --git a/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv b/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv index e7f6e4196d2..14ef7548276 100644 --- a/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv +++ b/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv @@ -1,5 +1,5 @@ -Realization,Date,"BPR:1,3,8","BPR:5,5,5",BPR:445,BPR:721,FGIP,FGIPH,FGOR,FGORH,FGPR,FGPRH,FGPT,FGPTH,FOIP,FOIPH,FOPR,FOPRH,FOPT,FOPTH,FWCT,FWCTH,FWIP,FWIPH,FWPR,FWPRH,FWPT,FWPTH,WGOR:OP1,WGOR:OP2,WGORH:OP1,WGORH:OP2,WGPR:OP1,WGPR:OP2,WGPRH:OP1,WGPRH:OP2,WOPR:OP1,WOPR:OP2,WOPRH:OP1,WOPRH:OP2,WWCT:OP1,WWCT:OP2,WWCTH:OP1,WWCTH:OP2,WWPR:OP1,WWPR:OP2,WWPRH:OP1,WWPRH:OP2 -0,2010-01-10,0.9996,0.9996,0.9996,0.9996,2499.4473,2499.9956,1.0,1.0,0.0557,0.0012,0.5528,0.0044,1999.4462,1999.994,0.056,0.0017,0.5538,0.0059,0.1776,0.0002,2249.4492,2249.9998,0.0551,0.0,0.5507,0.0001,1.0,1.0,1.0,1.0,0.0557,0.0,0.0006,0.0006,0.056,0.0,0.0008,0.0008,0.3552,0.0,0.0001,0.0002,0.0551,0.0,0.0,0.0 -1,2010-01-10,0.9996,0.9996,0.9996,0.9996,2499.8467,2499.9956,1.0,1.0,0.0157,0.0012,0.1533,0.0044,1999.8458,1999.994,0.016,0.0017,0.1542,0.0059,0.0657,0.0002,2249.8489,2249.9998,0.0151,0.0,0.1512,0.0001,1.0,1.0,1.0,1.0,0.0,0.0157,0.0006,0.0006,0.0,0.016,0.0008,0.0008,0.0,0.1314,0.0001,0.0002,0.0,0.0151,0.0,0.0 -2,2010-01-10,0.9996,0.9996,0.9996,0.9996,2500.0,2499.9956,1.0,1.0,0.0,0.0012,0.0,0.0044,2000.0,1999.994,0.0,0.0017,0.0,0.0059,0.0,0.0002,2250.0,2249.9998,0.0,0.0,0.0,0.0001,1.0,1.0,1.0,1.0,0.0,0.0,0.0006,0.0006,0.0,0.0,0.0008,0.0008,0.0,0.0,0.0001,0.0002,0.0,0.0,0.0,0.0 -3,2010-01-10,0.9996,0.9996,0.9996,0.9996,2497.1733,2499.9956,0.9994,1.0,0.2835,0.0012,2.8267,0.0044,1997.1715,1999.994,0.284,0.0017,2.8285,0.0059,0.4825,0.0002,2247.1775,2249.9998,0.2823,0.0,2.8224,0.0001,1.0,0.9987,1.0,1.0,0.0879,0.1956,0.0006,0.0006,0.0882,0.1958,0.0008,0.0008,0.4661,0.4989,0.0001,0.0002,0.0873,0.195,0.0,0.0 +Realization,Date,"BPR:1,3,8","BPR:5,5,5",FGIP,FGIPH,FGOR,FGORH,FGPR,FGPRH,FGPT,FGPTH,FOIP,FOIPH,FOPR,FOPRH,FOPT,FOPTH,FWCT,FWCTH,FWIP,FWIPH,FWPR,FWPRH,FWPT,FWPTH,WGOR:OP1,WGOR:OP2,WGORH:OP1,WGORH:OP2,WGPR:OP1,WGPR:OP2,WGPRH:OP1,WGPRH:OP2,WOPR:OP1,WOPR:OP2,WOPRH:OP1,WOPRH:OP2,WWCT:OP1,WWCT:OP2,WWCTH:OP1,WWCTH:OP2,WWPR:OP1,WWPR:OP2,WWPRH:OP1,WWPRH:OP2 +0,2010-01-10,0.9996,0.9996,2499.4473,2499.9956,1.0,1.0,0.0557,0.0012,0.5528,0.0044,1999.4462,1999.994,0.056,0.0017,0.5538,0.0059,0.1776,0.0002,2249.4492,2249.9998,0.0551,0.0,0.5507,0.0001,1.0,1.0,1.0,1.0,0.0557,0.0,0.0006,0.0006,0.056,0.0,0.0008,0.0008,0.3552,0.0,0.0001,0.0002,0.0551,0.0,0.0,0.0 +1,2010-01-10,0.9996,0.9996,2499.8467,2499.9956,1.0,1.0,0.0157,0.0012,0.1533,0.0044,1999.8458,1999.994,0.016,0.0017,0.1542,0.0059,0.0657,0.0002,2249.8489,2249.9998,0.0151,0.0,0.1512,0.0001,1.0,1.0,1.0,1.0,0.0,0.0157,0.0006,0.0006,0.0,0.016,0.0008,0.0008,0.0,0.1314,0.0001,0.0002,0.0,0.0151,0.0,0.0 +2,2010-01-10,0.9996,0.9996,2500.0,2499.9956,1.0,1.0,0.0,0.0012,0.0,0.0044,2000.0,1999.994,0.0,0.0017,0.0,0.0059,0.0,0.0002,2250.0,2249.9998,0.0,0.0,0.0,0.0001,1.0,1.0,1.0,1.0,0.0,0.0,0.0006,0.0006,0.0,0.0,0.0008,0.0008,0.0,0.0,0.0001,0.0002,0.0,0.0,0.0,0.0 +3,2010-01-10,0.9996,0.9996,2497.1733,2499.9956,0.9994,1.0,0.2835,0.0012,2.8267,0.0044,1997.1715,1999.994,0.284,0.0017,2.8285,0.0059,0.4825,0.0002,2247.1775,2249.9998,0.2823,0.0,2.8224,0.0001,1.0,0.9987,1.0,1.0,0.0879,0.1956,0.0006,0.0006,0.0882,0.1958,0.0008,0.0008,0.4661,0.4989,0.0001,0.0002,0.0873,0.195,0.0,0.0 diff --git a/tests/unit_tests/test_libres_facade.py b/tests/unit_tests/test_libres_facade.py index f052a2a73e3..48e7f8eb405 100644 --- a/tests/unit_tests/test_libres_facade.py +++ b/tests/unit_tests/test_libres_facade.py @@ -263,7 +263,7 @@ def test_summary_collector( data.iloc[:4].round(4).to_csv(), "summary_collector_1.csv", ) - assert data.shape == (1000, 46) + assert data.shape == (1000, 44) with pytest.raises(KeyError): # realization 60: _ = data.loc[60] From cfafc3c30229fc445261daa998c989bd1ef984c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20Eide=20=28EDT=20DSD=20SD2=29?= Date: Tue, 19 Sep 2023 15:37:28 +0200 Subject: [PATCH 2/2] Remove duplicates from summary files --- src/clib/lib/enkf/read_summary.cpp | 7 +++++-- src/ert/config/summary_config.py | 5 ++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/clib/lib/enkf/read_summary.cpp b/src/clib/lib/enkf/read_summary.cpp index 9cbdf018c3f..62ee5d5da46 100644 --- a/src/clib/lib/enkf/read_summary.cpp +++ b/src/clib/lib/enkf/read_summary.cpp @@ -23,12 +23,15 @@ ERT_CLIB_SUBMODULE("_read_summary", m) { const ecl_smspec_type *smspec = ecl_sum_get_smspec(summary); std::vector>> summary_vectors{}; - + std::vector seen_keys{}; for (int i = 0; i < ecl_smspec_num_nodes(smspec); i++) { const ecl::smspec_node &smspec_node = ecl_smspec_iget_node_w_node_index(smspec, i); const char *key = smspec_node.get_gen_key1(); - if (matches(keys, key)) { + if ((matches(keys, key)) && + !(std::find(seen_keys.begin(), seen_keys.end(), key) != + seen_keys.end())) { + seen_keys.push_back(key); int start = ecl_sum_get_first_report_step(summary); int end = ecl_sum_get_last_report_step(summary); std::vector data{}; diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 1c4ce5ebae9..0fed2c8a31c 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -77,13 +77,12 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: f"from: {run_path}/{filename}.UNSMRY" ) - summary_data = read_summary(summary, self.keys) + summary_data = read_summary(summary, list(set(self.keys))) summary_data.sort(key=lambda x: x[0]) data = [d for _, d in summary_data] keys = [k for k, _ in summary_data] - ds = xr.Dataset( {"values": (["name", "time"], data)}, coords={"time": time_map, "name": keys}, ) - return ds.drop_duplicates(["time"]) + return ds.drop_duplicates("time")