From dfb42138e90f0f8acf6968172f3082d321430f84 Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Mon, 9 Dec 2024 08:01:00 +0100
Subject: [PATCH 01/12] pwnlayers2 initial commit

---
 src/nhflotools/pwnlayers2/__init__.py         |   0
 .../interpolate_layer_boundaries.py           | 261 +++++++
 .../interpolation_helper_functions.py         | 635 ++++++++++++++++++
 src/nhflotools/pwnlayers2/layers.py           | 344 ++++++++++
 4 files changed, 1240 insertions(+)
 create mode 100644 src/nhflotools/pwnlayers2/__init__.py
 create mode 100644 src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py
 create mode 100644 src/nhflotools/pwnlayers2/interpolation_helper_functions.py
 create mode 100644 src/nhflotools/pwnlayers2/layers.py

diff --git a/src/nhflotools/pwnlayers2/__init__.py b/src/nhflotools/pwnlayers2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py b/src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py
new file mode 100644
index 0000000..3770ee5
--- /dev/null
+++ b/src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py
@@ -0,0 +1,261 @@
+import os
+
+import geopandas as gpd
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+from nhflotools.pwnlayers2.interpolation_helper_functions import (
+    CRS_RD,
+    get_point_values,
+    interpolate_gdf,
+    polyline_from_points,
+)
+
+try:
+    import pyvista as pv
+except ImportError as e:
+    msg = "pyvista is not installed. Please install it to run this script."
+    raise ImportError(msg) from e
+
+# Define the interpolation grid (to be replaced with the model grid in NHFLO)
+xmin, ymin = 95000, 496000
+xmax, ymax = 115000, 533000
+dx = 100.0
+xi = np.arange(xmin, xmax + dx, dx)
+yi = np.arange(ymin, ymax + dx, dx)
+X, Y = np.meshgrid(xi, yi)
+
+# Create a GeoDataFrame with the points of the interpolation
+# grid. The values are set to zero, which get used as helper
+# points in the interpolation of the thickness in the areas
+# where the layer is reported as absent by Kosten (1997)
+
+pts = gpd.points_from_xy(X.ravel(), Y.ravel())
+gdf_pt = gpd.GeoDataFrame(
+    geometry=pts,
+    data={"value": [0] * len(pts)},
+    crs=CRS_RD,
+)
+
+# Names of the layers to be interpolated
+layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"]
+
+# Define colours for the 3D plot
+cmap_t = mpl.colormaps["Blues"]
+colors_t = cmap_t(np.linspace(0.5, 1, len(layer_names)))
+
+cmap_b = mpl.colormaps["Oranges"]
+colors_b = cmap_b(np.linspace(0.5, 1, len(layer_names)))
+
+fig, ax = plt.subplots()
+# cf = plt.contourf(X, Y, zint_t.reshape(X.shape))
+# plt.colorbar(mappable=cf)
+# plt.plot(x, y,'o', ms=3, mec='w', mfc='none' )
+# plt.axis('square')
+# plt.show()
+
+# Define a line for creating a cross section that shows the
+# projection of the layer top/bottoms along a line.
+ln_xs = polyline_from_points(
+    np.array([
+        [(xmin + xmax) / 2, ymin, 0],
+        [(xmin + xmax) / 2, ymax, 0],
+        [(xmin + xmax) / 2, ymax, -15000],
+        [(xmin + xmax) / 2, ymin, -15000],
+    ])
+)
+
+# Create a plotter instance for the 3D plot
+plotter = pv.Plotter()
+# Same for the cross section
+plotter_xs = pv.Plotter()
+
+overlap = np.zeros(X.shape)
+
+da = xr.DataArray(
+    data=overlap,
+    dims=["lat", "lon"],
+    coords={
+        "lat": yi,
+        "lon": xi,
+    },
+)
+
+# Load the polygon to fill the nans below the North Sea with nearest neighbour interpolation values
+fpath_shp = os.path.join("..", "gis", "kaarten_2024_voor_interpolatie", "noordzee_clip", "noordzee_clip.shp")
+gdf_ns = gpd.read_file(fpath_shp)
+
+# Create a list with the names of the subfolders where the interpolation result will be stored
+subdirs = ["top_aquitard", "dikte_aquitard", "bot_aquitard"]
+
+# Loop over the layers
+fpath_gpkg = os.path.join("..", "gis", "kaarten_2024_voor_interpolatie", "interpolation_points.gpkg")
+for c, layer_name in enumerate(layer_names):
+    # Create GeoDataFrames with the data points of the top and thicknesses
+    gdf_t = get_point_values(f"T{layer_name}")
+    # gdf_t.set_crs(CRS_RD)
+    gdf_d = get_point_values(f"D{layer_name}")
+    # gdf_d.set_crs(CRS_RD)
+
+    # Read the polygons that indicate the absence of a layer (0.01 m polygons in the Koster (1997) shapefiles)
+    fpath_shp = os.path.join(
+        "..", "gis", "kaarten_2024_voor_interpolatie", "dikte_aquitard", f"D{layer_name}", f"D{layer_name}_mask.shp"
+    )
+    gdf_msk = gpd.read_file(fpath_shp)
+    gdf_msk = gdf_msk[["geometry", "VALUE"]]
+    gdf_msk = gdf_msk.rename(columns={"VALUE": "value"})
+    gdf_within = gpd.sjoin(gdf_pt, gdf_msk, predicate="within")
+    gdf_d = pd.concat([gdf_d, gdf_within[["geometry", "value_left"]]])
+
+    gdf_t = gdf_t.drop_duplicates()
+    gdf_d = gdf_d.drop_duplicates()
+
+    # Store the interpolation points (layer top) so that they can be visualised in QGIS
+    # Experimental, commented out for the time being
+    # gdf_t.to_file(
+    #     fpath_gpkg,
+    #     driver="GPKG",
+    #     mode="a",
+    #     layer=layer_name,
+    # )
+
+    # gdf_d.to_file(
+    #     fpath_gpkg,
+    #     driver="GPKG",
+    #     mode="a",
+    #     layer=layer_name,
+    # )
+
+    # # Store the interpolation points (layer top) so that they can be visualised in QGIS
+    fpath_shp = os.path.join(
+        "..",
+        "gis",
+        "kaarten_2024_voor_interpolatie",
+        "top_aquitard",
+        f"T{layer_name}",
+        f"T{layer_name}_interpolation_points.shp",
+    )
+    gdf_t.set_crs(CRS_RD)
+    # gdf_t.to_file(fpath_shp)
+
+    # # Store the interpolation points (layer thickness) so that they can be visualised in QGIS
+    fpath_shp = os.path.join(
+        "..",
+        "gis",
+        "kaarten_2024_voor_interpolatie",
+        "dikte_aquitard",
+        f"D{layer_name}",
+        f"D{layer_name}_interpolation_points.shp",
+    )
+    gdf_d.set_crs(CRS_RD)
+    # gdf_d.to_file(fpath_shp)
+
+    # Interpolate the top
+    zint_t = interpolate_gdf(gdf_pt, gdf_t, gdf_ns=gdf_ns)
+    # Interpolate the thickness
+    zint_d = interpolate_gdf(gdf_pt, gdf_d, gdf_ns=gdf_ns, gdf_msk=gdf_msk)
+
+    # Check if a mask exists for the Bergen area
+    fpath_shp = os.path.join(
+        "..",
+        "gis",
+        "kaarten_2024_voor_interpolatie",
+        "dikte_aquitard",
+        f"D{layer_name}",
+        f"D{layer_name}_mask_bergen_area.shp",
+    )
+    if os.path.isfile(fpath_shp):
+        # Read the shapefile
+        gdf_msk_bergen = gpd.read_file(fpath_shp)
+        # Check which grid points are within the clipping polygons
+        gdf_within = gpd.sjoin(gdf_pt, gdf_msk_bergen, predicate="within")
+        # Convert their indices to a list
+        idx_msk = gdf_within.index.to_list()
+        # Set the interpolated values to NaN
+        zint_t[idx_msk] = np.nan
+        zint_d[idx_msk] = np.nan
+
+    # Calculate the layer bottom using the interpolated values
+    zint_b = zint_t - zint_d
+
+    # Store the interpolated values for visualization in QGIS
+    for subdir, zint in zip(subdirs, [zint_t, zint_d, zint_b], strict=False):
+        da.values = zint.reshape(X.shape)
+        fstem = f"{subdir[0].capitalize()}{layer_name}"
+        fpath = os.path.join("..", "gis", "kaarten_2024_geinterpoleerd", subdir, fstem)
+        os.makedirs(fpath, exist_ok=True)
+        fpath = os.path.join(fpath, f"{fstem}.nc")
+        da.to_netcdf(fpath)
+
+    # Determine the areas where the bottom of a layer is below the top of the underlying layer
+    if c > 0:
+        dz = zint_b0 - zint_t  # Note that zint_b0 is not defined until the first layer has been processed  # noqa: F821
+        dz[dz > 0] = np.nan
+        da.values = dz.reshape(X.shape)
+
+        fpath = os.path.join(
+            "..", "gis", "kaarten_2024_geinterpoleerd", "overlap", f"overlap_{layer_names[c - 1]}_{layer_name}.nc"
+        )
+        da.to_netcdf(fpath)
+
+    zint_b0 = zint_b  # Store the bottom of the current layer for comparison with the top of the next layer
+    # zint_t[np.isnan(zint_t)] = 0
+    # zint_b[np.isnan(zint_b)] = 0
+
+    # Add the top to the 3D plot
+    grid_t = pv.StructuredGrid(X, Y, zint_t.reshape(X.shape) * 100)
+    for i in np.where(np.isnan(zint_t))[0]:
+        grid_t.BlankPoint(i)
+
+    plotter.add_mesh(
+        grid_t,
+        color=colors_t[c],
+        style="surface",
+        show_edges=False,
+        nan_opacity=0,
+        # scalars=grid.points[:, -1],
+        # scalar_bar_args={'vertical': True},
+    )
+
+    # Add the top to the 3D cross section with the projected top and bottom elevations
+    line_slice_t = grid_t.slice_along_line(ln_xs)
+    plotter_xs.add_mesh(
+        line_slice_t,
+        line_width=1,
+        render_lines_as_tubes=False,
+        color=colors_t[c],
+    )
+
+    # Add the bottom to the 3D plot
+    grid_b = pv.StructuredGrid(X, Y, zint_b.reshape(X.shape) * 100)
+    for i in np.where(np.isnan(zint_b))[0]:
+        grid_b.BlankPoint(i)
+
+    plotter.add_mesh(
+        grid_b,
+        color=colors_b[c],
+        style="surface",
+        show_edges=False,
+        nan_opacity=0,
+        # scalars=grid.points[:, -1],
+        # scalar_bar_args={'vertical': True},
+    )
+
+    # Add the bottom to the 3D cross section with the projected top and bottom elevations
+    line_slice_b = grid_b.slice_along_line(ln_xs)
+    plotter_xs.add_mesh(
+        line_slice_b,
+        line_width=1,
+        render_lines_as_tubes=False,
+        color=colors_b[c],
+    )
+
+# Activate the 3D plots
+plotter.show_grid()
+plotter.show()
+
+plotter_xs.add_mesh(ln_xs, line_width=1, color="grey")
+plotter_xs.show()
diff --git a/src/nhflotools/pwnlayers2/interpolation_helper_functions.py b/src/nhflotools/pwnlayers2/interpolation_helper_functions.py
new file mode 100644
index 0000000..97f25e9
--- /dev/null
+++ b/src/nhflotools/pwnlayers2/interpolation_helper_functions.py
@@ -0,0 +1,635 @@
+from pathlib import Path
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import pyvista as pv
+from scipy.interpolate import griddata
+
+# Default CRS, Amersfoort RD
+CRS_RD = 28992
+
+# These dictonaries map the polygon values in the hnflo data
+# version of the Koster shapefiles to values for the contour
+# lines. The value that is assigned to the contour line is the
+# value that occurs in both lists that correspond to a polygon
+# value. So for example, a line that borders a polygon with
+# -1.50 and 0 for TS11 will be assigned a value of -0.50 m.
+LEGEND_DICTS = {
+    "TS11": {
+        -3.75: [-2.5],
+        -1.50: [-2.50, -0.50],
+        0: [-0.50, 0.50],
+        1.50: [0.50, 2.50],
+        3.75: [2.50, 5.00],
+        5.50: [5.00],
+    },
+    "DS11": {
+        0.13: [0.25],
+        0.38: [0.25, 0.50],
+        0.75: [0.50, 1.00],
+        1.50: [1.00, 2.00],
+        2.50: [2.00, 3.00],
+        4.00: [3.00, 5.00],
+        6.00: [5.00, 7.50],
+        7.75: [7.50],
+    },
+    "TS12": {
+        0.5: [0.00],
+        -2.50: [-5.00, 0.00],
+        -7.50: [-10.00, -5.00],
+        -12.50: [-15.00, -10.00],
+        -17.50: [-15.00],
+    },
+    "DS12": {
+        0.75: [1.00],
+        2.00: [1.00, 3.00],
+        4.00: [3.00, 5.00],
+        7.50: [5.00, 10.00],
+        12.50: [10.00, 15.00],
+        17.50: [15.00, 20.00],
+        22.50: [20.00, 25.00],
+        27.50: [25.00],
+    },
+    "TS13": {
+        -12.50: [-15.00],
+        -17.50: [-20.00, -15.00],
+        -22.50: [-25.00, -20.00],
+        -27.50: [-30.00, -25.00],
+        -32.50: [-35.00, -30.00],
+        -36.00: [-35.00],
+    },
+    "DS13": {
+        0.25: [0.50],
+        0.75: [0.50, 1.00],
+        2.00: [1.00, 3.00],
+        4.00: [3.00, 5.00],
+        7.50: [5.00, 10.00],
+        12.50: [10.00, 15.00],
+        17.50: [15.00, 20.00],
+        22.50: [20.00, 25.00],
+        27.50: [25.00],
+    },
+    "TS21": {
+        -25.00: [-30.00, -20.00],
+        -35.00: [-40.00, -30.00],
+        -45.00: [-50.00, -40.00],
+        -55.00: [-60.00, -50.00],
+        -65.00: [-60.00],
+    },
+    "DS21": {
+        2.00: [3.00],
+        4.00: [3.00, 5.00],
+        7.50: [5.00, 10.00],
+        12.50: [10.00, 15.00],
+        17.50: [15.00, 20.00],
+        22.50: [20.00, 25.00],
+        27.50: [25.00, 30.00],
+        32.50: [30.00, 35.00],
+        37.50: [35.00, 40.00],
+        42.50: [40.00],
+    },
+    "TS22": {
+        -25.00: [-30.00],
+        -35.00: [-40.00, -30.00],
+        -45.00: [-50.00, -40.00],
+        -55.00: [-60.00, -50.00],
+        -65.00: [-70.00, -60.00],
+        -75.00: [-80.00, -70.00],
+        -85.00: [-90.00, -80.00],
+        -95.00: [-90.00],
+    },
+    "DS22": {
+        2.50: [5.00],
+        7.50: [5.00, 10.00],
+        12.50: [10.00, 15.00],
+        17.50: [15.00, 20.00],
+        22.50: [20.00, 25.00],
+        27.50: [25.00, 30.00],
+        32.50: [30.00, 35.00],
+        37.50: [35.00, 40.00],
+        42.50: [40.00, 45.00],
+        47.50: [45.00, 50.00],
+        52.50: [50.00],
+    },
+    "TS31": {
+        -45.00: [-50.00],
+        -55.00: [-60.00, -50.00],
+        -65.00: [-70.00, -60.00],
+        -75.00: [-80.00, -70.00],
+        -85.00: [-90.00, -80.00],
+        -95.00: [-100.0, -90.00],
+        -105.0: [-100.0],
+    },
+    "DS31": {
+        0.25: [0.50],
+        0.75: [0.50, 1.00],
+        2.00: [1.00, 3.00],
+        4.00: [3.00, 5.00],
+        7.50: [5.00, 10.00],
+        12.50: [10.00, 15.00],
+        17.50: [15.00],
+    },
+    "TS32": {
+        -77.50: [-80.00],
+        -82.50: [-85.00, -80.00],
+        -87.50: [-90.00, -85.00],
+        -92.50: [-95.00 - 90.00],
+        -97.50: [-100.0, -95.00],
+        -102.5: [-105.0, -100.00],
+        -107.5: [-110.0, -105.00],
+        -112.5: [-110.0],
+    },
+    "DS32": {
+        0.25: [0.50],
+        0.75: [0.50, 1.00],
+        2.00: [1.00, 3.00],
+        4.00: [3.00, 5.00],
+        7.50: [5.00, 10.00],
+        12.50: [10.00, 15.00],
+        17.50: [15.00],
+    },
+}
+
+
+def get_internal_contour_lines(gdf_ln, gdf_pl):
+    """This function looks for linestrings in gdf_ln that do not
+    overlap with the polygon boundaries in gdf_pl. The purpose is
+    to separate the lines that represent a thickness from the lines
+    that represent the limit of occurrence of a layer. Only used
+    for thicknesses, not for the tops.
+
+    Parameters
+    ----------
+    gdf_ln : GeoDataFrame
+        GeoDataFrame containing the linestrings of the thickness
+        contours
+    gdf_pl : GeoDataFrame
+        GeoDataFrame with the polygons of the thickness
+
+    Returns
+    -------
+    GeoDataFrame
+        Returns gdf_ln without the linestrings that overlap with the
+        limit of occurrence
+    """
+    # Select only the polygons which indicate the regions where the layer does not occur
+    idx = gdf_pl["VALUE"] == 0.01
+
+    # Create a new GeoDataFrame containing the polygon boundaries as (Multi)LineStrings
+    gdf_bnd = gpd.GeoDataFrame(
+        geometry=gdf_pl.loc[idx, "geometry"].boundary,
+        crs=CRS_RD,
+    )
+    # Explode so that MultiLineStrings become LineStrings
+    gdf_bnd = gdf_bnd.explode()
+    # Create a buffer around the polygon boundaries because in rare cases there
+    # are minor differences between the line vertices and the polygon vertices
+    gdf_bnd["geometry"] = gdf_bnd["geometry"].buffer(2.0)
+
+    # Do a spatial join to find out which linestrings in gdf_ln are within
+    # the polygons of gdf_bnd. Those that are, should not be returned by
+    # the function.
+    gdf_jn = gpd.sjoin(gdf_bnd, gdf_ln, how="left", predicate="contains")
+    # Use the index of gdf_ln to create an GeoSeries to slice gdf_ln with
+    idx = gdf_ln.index.isin(gdf_jn["index_right"])
+
+    # Return gdf_ln without the linestrings that overlap with the 0.01 m
+    # thickness polygon boundaries.
+    return gdf_ln.loc[~idx]
+
+
+def assign_poly_values_to_linestrings(
+    gdf_ln,  # GeoDataFrame with contour lines
+    gdf_pl,  # GeoDataFrame with the Koster (1997) polygons
+    layer_name,  # Name of the layer
+):
+    """This function tries to identify which polygons in gdf_pl border
+    a contour line in gdf_ln. Ideally, a contour line forms the separation
+    between two polygons but this is not always the case due to topology
+    errors and the addition of polygons to the Koster (1997) shapefiles
+    from other sources (occurs mostly in the southern part of the area).
+
+    Parameters
+    ----------
+    gdf_ln : GeoDataFrame
+        GeoDataFrame containing the linestrings of the thickness
+        contours
+    gdf_pl : GeoDataFrame
+        GeoDataFrame with the polygons of the thickness
+    layer_name : str
+        Name of the layer being processed. This is needed to look up
+        the dictionary in LEGEND_DICTS that maps the Koster (1997)
+        legend entries to the top/thickness values assigned to the
+        polygons.
+
+    Returns
+    -------
+    GeoDataFrame
+        A GeoDataFrame with for each linestring the assigned value,
+        the number of bordering polygons found and any remarks.
+    """
+    # Get the legend_dict for the current layer
+    legend_dict = LEGEND_DICTS[layer_name]
+
+    # Check for polygons with VALUE attribute of 0.01 m, signals where layer is absent
+    # Only returns rows for DS files, no effect for TS files
+    idx = gdf_pl["VALUE"] == 0.01
+    # Remove the 0.01 m polygons
+    gdf_pl = gdf_pl.loc[~idx]
+    # Renumber the index
+    gdf_pl = gdf_pl.reindex(index=range(len(gdf_pl)))
+    # Make geometries valid by using the buffer(0) trick
+    gdf_pl["geometry"] = gdf_pl.buffer(0)
+
+    # Determine which linestrings in gdf_ln intersect which polygons in gdf_pl
+    gdf_int = gpd.sjoin(gdf_ln, gdf_pl, how="left", predicate="intersects")
+
+    data = []
+    # The index of gdf_int contains duplicates because most lines will
+    # intersect multiple polygons.
+    for i in gdf_int.index.unique():
+        # Select the rows for the contour linestring and store in a separate
+        # GeoDataFrame
+        idx = gdf_int.index == i
+        gdf_i = gdf_int.loc[idx]
+
+        # Determine the length of the DataFrame, i.e. the number of polygons that intersect the linestring
+        N = len(gdf_i)
+
+        # Default remark for N == 2
+        remark = "Value assigned automatically in Python script."
+
+        # The linestring's geometry is the same for all rows of gdf_i. Only one is
+        # needed to build the GeoDataFrame returned by the function.
+        geom = gdf_i["geometry"].values[0]
+
+        # Ideally each contour has a polygon to either side, so N == 2. This is not always the case,
+        # hence the need for these conditional statements
+        if N == 2:
+            # Get the VALUE attribute of each polygon
+            v0 = gdf_i["VALUE"].values[0]
+            v1 = gdf_i["VALUE"].values[1]
+            # Get the legend range from legend_dict
+            list0 = legend_dict.get(v0)
+            list1 = legend_dict.get(v1)
+            # The value may not correspond to a range in the original Koster (1997)
+            # as polygons from other sources were later added to the shapefiles. In
+            # that case (one of) the list(s) will be None.
+            if None in {list0, list1}:
+                # Do not assign a value to the linestring and change the remark that will
+                # appear in the shapefile attribute table.
+                v = [None]
+                remark = "Poly not in mapping dict. Assign value manually."
+            else:
+                # If both lists are not None then find the item they have in common.
+                # Ideally this is a single value. When this is not the case, no value
+                # is assigned and the remark is changed to reflect this problem.
+                v = list({item0 for item0 in list0 if item0 in list1})
+                if len(v) != 1:
+                    v = [None]
+                    remark = f"Ambiguous result {len(v)}. Assign value manually."
+        # A linestring can intersect more than two polygons due to
+        #  - overlapping polygons in the original shapefiles
+        #  - the start- and end points of the contour lines touch (digitizing mistake)
+        #  - when polygons from another data source were added to the original Koster (1997) polygons.
+        # In these cases N is larger than 2 and it cannot be determined
+        # automatically what the value for the linestring must be.
+        elif N > 2:
+            v = [None]
+            remark = "Line intersects more than 2 polygons. Assign value manually."
+            # print(gdf_i["index_right"].to_list())
+            gdf_i["index_right"].dropna().astype(int).values
+        # A line can intersect no or a single polygon. This is most frequently the case for
+        # the lines that were added to the Koster (1997) linestrings for the Bergen area
+        # (based on the Stuyfzand figures).
+        elif N < 2:
+            v = [None]
+            remark = "Line intersects less than 2 polygons. Assign value manually."
+
+        # Append one line to the data for each linestring in gdf_ln
+        data.append([*v, N, remark, geom])
+
+    # Return a GeoDataFrame of linestrings
+    return gpd.GeoDataFrame(
+        data=data,
+        columns=["value", "N", "remark", "geometry"],
+        crs=CRS_RD,
+    )
+
+
+def combine_lists(lists):
+    """Returns combinations of lists that share common items.
+
+    Parameters
+    ----------
+    lists : list
+        A list of lists to be analyzed
+
+    Returns
+    -------
+    list
+        A list in which the input lists with common items have
+        been combined.
+    """
+    # Start by assuming each list is a separate group
+    groups = [set(lst) for lst in lists]
+
+    merged = True
+    while merged:
+        merged = False
+        for i in range(len(groups)):
+            for j in range(i + 1, len(groups)):
+                # If two groups share any common elements, merge them
+                if groups[i].intersection(groups[j]):
+                    groups[i] = groups[i].union(groups[j])
+                    groups.pop(j)
+                    merged = True
+                    break
+            if merged:
+                break
+
+    # Convert sets back to lists for final output
+    return [list(group) for group in groups]
+
+
+def join_contour_line_segments(gdf_ln):
+    """This function combines the individual linestrings that appear
+    in the original Koster (1997) contour line files into larger
+    linestrings.
+
+    Parameters
+    ----------
+    gdf_ln : GeoDataFrame
+        GeoDataFrame with individual linestrings.
+
+    Returns
+    -------
+    GeoDataFrame
+        GeoDataFrame with the comined linestrings.
+    """
+    # Use a spatial join to identify the linstrings with touching endpoints
+    gdf_tch = gpd.sjoin(gdf_ln, gdf_ln, how="left", predicate="touches")
+
+    # Create a dictionary for every line segment, which will store the
+    # index numbers of the linestrings that it touches
+    l_dict = {i: [] for i in gdf_ln.index}
+    # Loop through the index items of gdf_tch
+    for i0 in gdf_tch.index:
+        # Get the rows for the current line segment
+        idx = gdf_tch.index == i0
+        # Get the index numbers of the touching linestrings
+        irs = gdf_tch.loc[idx, "index_right"]
+        irs = irs.dropna()
+        # Combine the linestring index number with the
+        # index numbers of the touchings linestrings into a single list
+        i_list = [i0, *irs.astype(int).tolist()]
+        # Update each item in the l_dict dictionary by adding i_list
+        for i1 in i_list:
+            l_dict[i1] += i_list
+
+    # Combine the linestring segments that form a single contour line. This
+    # will result in a nested list in which each item is a list containing
+    # the index numbers of the line segments that together form a contour line
+    unique_lines = combine_lists(list(l_dict.values()))
+
+    # Loop through the list with the index numbers of the segments
+    # and use these to create single contour lines
+    lns = []
+    ln_vals = []
+    for idx in unique_lines:
+        if len(idx) == 0:
+            pass
+        else:
+            # Combine the line segments into a single linstring
+            lns.append(gdf_ln.loc[idx, "geometry"].union_all())
+            # Each line segment has a top/thickness value associated
+            # with it. Ideally they are all the same but this is not
+            # guaranteed. The next two lines select the most frequently
+            # occurring value, which will be used as the value attribute
+            # in the GeoDataFrame that will be returned.
+            vals_lst = gdf_ln.loc[idx, "value"].to_list()
+            v = max(vals_lst, key=vals_lst.count)
+            ln_vals.append(v)
+
+    # Return a GeoDataFrame with the combined linestrings and their
+    # top/thickness value.
+    return gpd.GeoDataFrame(
+        geometry=lns,
+        data={
+            "script_value": ln_vals,
+            "value": ln_vals,
+        },
+        crs=CRS_RD,
+    )
+
+
+def get_point_values(layer_name):
+    """This function is called from interpolate_layer_boundaries.py to convert
+    line segments to points and combine it with point data from the geo_daw
+    data (top/thickness values for borehole interpreted by Koster, 1997) and
+    the point values for the Bergen area (digitized from the figures in the
+    Stuyfzand, 1987 report).
+
+    Parameters
+    ----------
+    layer_name : str
+        Name of the layer.
+
+    Returns
+    -------
+    GeoDataFrame
+        GeoDataFrame with points and their corresponding top/thickness values
+    """
+    # Folder with the contour lines
+    src_dir = Path("..", "gis", "kaarten_2024_voor_interpolatie")
+    # Set the paths to the files to be read
+    if layer_name.find("T") == 0:
+        fpath_shp = Path(src_dir, "top_aquitard", layer_name, f"{layer_name}_union_with_values_edited.shp")
+        fpath_shp_ber = Path(src_dir, "top_aquitard", layer_name, f"{layer_name}_bergen_points.shp")
+    elif layer_name.find("D") == 0:
+        fpath_shp = Path(src_dir, "dikte_aquitard", layer_name, f"{layer_name}_union_with_values_edited.shp")
+        fpath_shp_ber = Path(src_dir, "dikte_aquitard", layer_name, f"{layer_name}_bergen_points.shp")
+
+    # Import the contour lines
+    gdf_ln = gpd.read_file(fpath_shp)
+    # Convert any multilinestrings to linestrings
+    gdf_ln = gdf_ln.explode()
+    # Add the line vertices as a list of coordinates to each row of the GeoDataFrame
+    gdf_ln["points"] = gdf_ln.apply(lambda x: list(x["geometry"].coords), axis=1)
+
+    # Convert the coordinates to points and assign values
+    values = []
+    pts = []
+    # Loop through each row of gdf_ln
+    for _index, row in gdf_ln.iterrows():
+        # Skip NULL values that can occur for Bergen -999 polygons
+        if np.isnan(row["value"]):
+            continue
+        # Get the coordinates created by the lambda function above and
+        # convert them to Point objects
+        xy_arr = np.array(row["points"])
+        pts_i = list(gpd.points_from_xy(x=xy_arr[:, 0], y=xy_arr[:, 1]))
+        # Add to the list of existing poins
+        pts += pts_i
+        # Expand the list with values
+        values += [row["value"]] * len(pts_i)
+
+    # Convert to a GeoDataFrame
+    gdf_pts = gpd.GeoDataFrame(
+        data=values,
+        geometry=pts,
+        columns=["value"],
+        crs=CRS_RD,
+    )
+
+    # Check if a shapefile with point data exists for the Bergen area
+    if fpath_shp_ber.exists() is True:
+        # Read the file
+        gdf_pts_ber = gpd.read_file(fpath_shp_ber)
+        # Discard columns other than 'VALUE' and 'geometry'
+        gdf_pts_ber = gdf_pts_ber[["geometry", "VALUE"]]
+        # Rename the 'VALUE' column to 'value' to be compatible with gdf_pts
+        gdf_pts_ber = gdf_pts_ber.rename(columns={"VALUE": "value"})
+        # Add the Bergen points to gdf_pts
+        gdf_pts = pd.concat([gdf_pts, gdf_pts_ber])
+
+    # Read the point data for the boreholes
+    fpath_daw = Path("..", "gis", "koster_1997", "daw_bestanden", "daw_data_TS_DS", "daw_data_TS_DS.shp")
+    gdf_daw = gpd.read_file(fpath_daw)
+    # Select the column for the layer being processed
+    gdf_daw = gdf_daw[[layer_name, "geometry"]].dropna()
+    # Rename the column from layer name to 'value'
+    gdf_daw = gdf_daw.rename(columns={layer_name: "value"})
+
+    # Add the points to gdf_pts
+    return pd.concat([gdf_pts, gdf_daw])
+
+
+def interpolate_gdf(gdf_pt, gdf, gdf_ns=None, gdf_msk=None):
+    """Interpolate the point values of a layer to a (model) grid.
+
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        GeoDataFrame with the points of the interpolation grid
+    gdf : GeoDataFrame
+        GeoDataFrame with the values to be interpolated
+    gdf_ns : GeoDataFrame, optional
+        GeoDataFrame with a polygon used to fill the grid below the North Sea
+        with nearest neighbour values after the interpolation. Not used
+        if None is passed (default).
+    gdf_msk : GeoDataFrame, optional
+        GeoDataFrame with polygons that indicates where a layer is absent, as
+        indicated by the 0.01 m thickness values in the Koster (1997)
+        shapefiles. Points inside these polygons will be assigned a zero
+        thickness. Should be set to None (default) when interpolating the
+        layer top.
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    # Create 1D arrays for the interpolation points
+    xi = gdf_pt["geometry"].x.to_numpy()
+    yi = gdf_pt["geometry"].y.to_numpy()
+
+    # Convert the data point coordinates and values to NumPy arrays
+    x = gdf["geometry"].x.to_numpy()
+    y = gdf["geometry"].y.to_numpy()
+    z = gdf["value"].to_numpy()
+
+    # Call SciPy's griddata to perform the interpolation. Note that zint
+    # is assigned NaN outside the convex hull of the data point
+    # cloud
+    zint = griddata(
+        points=(x, y),
+        values=z,
+        xi=(xi[None, :], yi[None, :]),
+        method="linear",  # Note: cubic gives very poor results
+    )
+
+    # Repeat the interpolation for the interpolation points below
+    # the North Sea if the polygon is supplied
+    if gdf_ns is not None:
+        # Take the existing interpolation result and make array 1D
+        zint = zint.ravel()
+
+        # Identify the points in the interpolaton grid that are within
+        # the North Sea polygon
+        gdf_within = gpd.sjoin(gdf_pt, gdf_ns, predicate="within")
+        # Convert their indices to a list
+        idx_ns = gdf_within.index.to_list()
+        # Use the list to slice the arrays with interpolation point
+        # x and y values
+        xi_ns = xi[idx_ns]
+        yi_ns = yi[idx_ns]
+
+        # Find the points in the previous interpolation result that
+        # have NaN values (these were outside the convex hull of the
+        # data points)
+        idx = np.isnan(zint)
+        # Keep only the non-NaN values for the interpolation that will
+        # assign the nearest neighbour value to the points below the
+        # North Sea
+        x = xi[~idx]
+        y = yi[~idx]
+        z = zint[~idx]
+
+        # Perform interpolation
+        zint_ns = griddata(
+            points=(x, y),
+            values=z,
+            xi=(xi_ns[None, :], yi_ns[None, :]),
+            method="nearest",
+        )
+
+        # Replace the values of the  points in zint that are below the
+        # North Sea to the nearest neighbour values.
+        zint[idx_ns] = zint_ns
+
+    # Return the interpolated values
+    return zint
+
+
+# from sklearn.neighbors import KNeighborsRegressor
+
+# def interpolate_gdf(X, Y, gdf):
+#     x = gdf["geometry"].x.to_numpy()
+#     y = gdf["geometry"].y.to_numpy()
+#     z = gdf["value"].to_numpy()
+
+#     zint = griddata(
+#         points=(x, y),
+#         values=z,
+#         xi=(X.ravel()[None, :], Y.ravel()[None, :]),
+#         method='linear',
+#     )
+
+#     return zint
+
+# neigh = KNeighborsRegressor()
+
+# neigh.fit(X, y)
+
+
+def polyline_from_points(points):
+    """_summary_.
+
+    Parameters
+    ----------
+    points : _type_
+        _description_
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    poly = pv.PolyData()
+    poly.points = points
+    the_cell = np.arange(0, len(points), dtype=int)
+    the_cell = np.insert(the_cell, 0, len(points))
+    poly.lines = the_cell
+    return poly
diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
new file mode 100644
index 0000000..b1943eb
--- /dev/null
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -0,0 +1,344 @@
+import logging
+
+import numpy as np
+import xarray as xr
+
+logger = logging.getLogger(__name__)
+
+translate_triwaco_nhd_names_to_index = {
+    "W11": 0,
+    "S11": 1,
+    "W12": 2,
+    "S12": 3,
+    "W13": 4,
+    "S13": 5,
+    "W21": 6,
+    "S21": 7,
+    "W22": 8,
+    "S22": 9,
+}
+
+
+def get_nhd_layer_model(ds_pwn_data, fix_min_layer_thickness=True):
+    layer_model_nhd = xr.Dataset(
+        {
+            "top": ds_pwn_data["top"],
+            "botm": get_nhd_botm(ds_pwn_data, fix_min_layer_thickness=fix_min_layer_thickness),
+            "kh": get_nhd_kh(ds_pwn_data),
+            "kv": get_nhd_kv(ds_pwn_data),
+        },
+        coords={"layer": list(translate_triwaco_nhd_names_to_index.keys())},
+        attrs={
+            "extent": ds_pwn_data.attrs["extent"],
+            "gridtype": ds_pwn_data.attrs["gridtype"],
+        },
+    )
+    mask_model_nhd = xr.Dataset(
+        {
+            "top": ds_pwn_data["top_mask"],
+            "botm": get_nhd_botm(
+                ds_pwn_data,
+                mask=True,
+                transition=False,
+                fix_min_layer_thickness=fix_min_layer_thickness,
+            ),
+            "kh": get_nhd_kh(ds_pwn_data, mask=True, transition=False),
+            "kv": get_nhd_kv(ds_pwn_data, mask=True, transition=False),
+        },
+        coords={"layer": list(translate_triwaco_nhd_names_to_index.keys())},
+    )
+    transition_model_nhd = xr.Dataset(
+        {
+            "top": ds_pwn_data["top_transition"],
+            "botm": get_nhd_botm(
+                ds_pwn_data,
+                mask=False,
+                transition=True,
+                fix_min_layer_thickness=fix_min_layer_thickness,
+            ),
+            "kh": get_nhd_kh(ds_pwn_data, mask=False, transition=True),
+            "kv": get_nhd_kv(ds_pwn_data, mask=False, transition=True),
+        },
+        coords={"layer": list(translate_triwaco_nhd_names_to_index.keys())},
+    )
+
+    for var in ["kh", "kv", "botm"]:
+        layer_model_nhd[var] = layer_model_nhd[var].where(mask_model_nhd[var], np.nan)
+        assert (
+            layer_model_nhd[var].notnull() == mask_model_nhd[var]
+        ).all(), f"There were nan values present in {var} in cells that should be valid"
+        assert (
+            (mask_model_nhd[var] + transition_model_nhd[var]) <= 1
+        ).all(), f"There should be no overlap between mask and transition of {var}"
+
+    return (
+        layer_model_nhd,
+        mask_model_nhd,
+        transition_model_nhd,
+    )
+
+
+def get_nhd_thickness(data, mask=False, transition=False, fix_min_layer_thickness=True):
+    """
+    Calculate the thickness of layers in a given dataset.
+
+    If mask is True, the function returns a boolean mask indicating the valid
+    thickness values, requiering all dependent values to be valid.
+    If transisition is True, the function returns a boolean mask indicating the
+    cells for which any of the dependent values is marked as a transition.
+
+    The masks are computated with nan's for False, so that if any of the dependent
+    values is nan, the mask_float will be nan and mask will be False.
+    The transitions are computed with nan's for True, so that if any of the dependent
+    values is nan, the transition_float will be nan and transition will be True.
+
+    If the dataset contains a variable 'top', the thickness is calculated
+    from the difference between the top and bottom of each layer. If the
+    dataset does not contain a variable 'top', the thickness is calculated
+    from the difference between the bottoms.
+
+    Parameters
+    ----------
+    data : xarray.Dataset or xarray.DataArray
+        Input dataset containing the layer data.
+    mask : bool, optional
+        If True, returns a boolean mask indicating the valid thickness values.
+        If False, returns the thickness values directly. Default is False.
+    transition : bool, optional
+        If True, treat data as a mask with True for transition cells. Default is False.
+
+    Returns
+    -------
+    thickness: xarray.DataArray or numpy.ndarray
+        If mask is True, returns a boolean mask indicating the valid thickness values.
+        If mask is False, returns the thickness values as a DataArray or ndarray.
+
+    """
+    botm = get_nhd_botm(
+        data,
+        mask=mask,
+        transition=transition,
+        fix_min_layer_thickness=fix_min_layer_thickness,
+    )
+
+    if mask:
+        _a = data[[var for var in data.variables if var.endswith("_mask")]]
+        a = _a.where(_a, other=np.nan)
+        botm_nodata_isnan = botm.where(botm, other=np.nan)
+
+        def n(s):
+            return f"{s}_mask"
+
+    elif transition:
+        # note the ~ operator
+        _a = data[[var for var in data.variables if var.endswith("_transition")]]
+        a = _a.where(~_a, other=np.nan).where(_a, 1.0)
+        botm_nodata_isnan = botm.where(~botm, other=np.nan)
+
+        def n(s):
+            return f"{s}_transition"
+
+    else:
+        a = data
+        botm_nodata_isnan = botm
+
+        def n(s):
+            return s
+
+    if "top" in data.data_vars:
+        top_botm = xr.concat((a[n("top")].expand_dims(dim={"layer": ["mv"]}, axis=0), botm_nodata_isnan), dim="layer")
+    else:
+        top_botm = botm
+
+    out = -top_botm.diff(dim="layer")
+
+    if mask:
+        return ~np.isnan(out)
+    if transition:
+        mask = get_nhd_thickness(data, mask=True, transition=False)
+        transition = np.isnan(out)
+        check = mask.astype(int) + transition.astype(int)
+        assert (check <= 1).all(), "Transition cells should not overlap with mask."
+        return transition
+    out = out.where(~np.isclose(out, 0.0), other=0.0)
+
+    if (out < 0.0).any():
+        logger.warning("Botm nhd is not monotonically decreasing. Resulting in negative conductivity values.")
+    return out
+
+
+def get_nhd_kh(data, mask=False, anisotropy=5.0, transition=False):
+    """
+    Calculate the hydraulic conductivity (kh) based on the given data.
+
+    Values may be applied everywhere. Use mask and/or thickness to determine
+    where the values are valid.
+
+    Parameters
+    ----------
+    data : xarray.Dataset or xarray.DataArray
+        The input data containing the necessary variables.
+    mask : bool, optional
+        Flag indicating whether to apply a mask to the data. Default is False.
+    anisotropy : float, optional
+        Anisotropy factor to be applied to the aquitard layers. Default is 5.0.
+    transition : bool, optional
+        Flag indicating whether to treat data as a mask with True for transition cells.
+
+    Returns
+    -------
+    kh: xarray.DataArray
+        The calculated hydraulic conductivity.
+
+    kh = xr.zeros_like(t_da)
+    kh[0] = 7.0
+    kh[1] = thickness[1] / clist[0] / f_anisotropy
+    kh[2] = 7.0
+    kh[3] = thickness[3] / clist[1] / f_anisotropy
+    kh[4] = 12.0
+    kh[5] = thickness[5] / clist[2] / f_anisotropy
+    kh[6] = 15.0
+    kh[7] = thickness[7] / clist[3] / f_anisotropy
+    kh[8] = 20.0
+
+    """
+    if mask:
+        # valid value if valid thickness and valid NHD_C
+        out = get_nhd_thickness(data, mask=True, transition=False).rename("kh").drop_vars("layer")
+        out[{"layer": 1}] *= data["NHD_C1A_mask"]
+        out[{"layer": 3}] *= data["NHD_C1B_mask"]
+        out[{"layer": 5}] *= data["NHD_C1C_mask"]
+        out[{"layer": 7}] *= data["NHD_C1D_mask"]
+        out[{"layer": 9}] *= data["NHD_C2_mask"]
+
+    elif transition:
+        # Valid value if valid thickness or valid NHD_C
+        out = get_nhd_thickness(data, mask=True, transition=False).rename("kh").drop_vars("layer")
+        out[{"layer": 1}] |= data["NHD_C1A_mask"]
+        out[{"layer": 3}] |= data["NHD_C1B_mask"]
+        out[{"layer": 5}] |= data["NHD_C1C_mask"]
+        out[{"layer": 7}] |= data["NHD_C1D_mask"]
+        out[{"layer": 9}] |= data["NHD_C2_mask"]
+
+    else:
+        thickness = get_nhd_thickness(data, mask=mask, transition=transition).drop_vars("layer")
+        out = xr.ones_like(thickness).rename("kh")
+
+        out[{"layer": [0, 2, 4, 6, 8]}] *= [[8.0], [7.0], [12.0], [15.0], [20.0]]
+        out[{"layer": 1}] = thickness[{"layer": 1}] / data["NHD_C1A"] * anisotropy
+        out[{"layer": 3}] = thickness[{"layer": 3}] / data["NHD_C1B"] * anisotropy
+        out[{"layer": 5}] = thickness[{"layer": 5}] / data["NHD_C1C"] * anisotropy
+        out[{"layer": 7}] = thickness[{"layer": 7}] / data["NHD_C1D"] * anisotropy
+        out[{"layer": 9}] = thickness[{"layer": 9}] / data["NHD_C2"] * anisotropy
+
+    return out
+
+
+def get_nhd_kv(data, mask=False, anisotropy=5.0, transition=False):
+    """
+    Calculate the hydraulic conductivity (KV) for different aquifers and aquitards.
+
+    Parameters
+    ----------
+        data (xarray.Dataset): Dataset containing the necessary variables for calculation.
+        mask (bool, optional): Flag indicating whether to apply a mask to the data. Defaults to False.
+        anisotropy (float, optional): Anisotropy factor for adjusting the hydraulic conductivity. Defaults to 5.0.
+
+    Returns
+    -------
+        xarray.DataArray: Array containing the calculated hydraulic conductivity values for each layer.
+
+    Example:
+        # Calculate hydraulic conductivity values without applying a mask
+        kv_values = get_nhd_kv(data)
+
+        # Calculate hydraulic conductivity values with a mask applied
+        kv_values_masked = get_nhd_kv(data, mask=True)
+
+        Note the f_anisotropy vs anisotropy
+        # kv[0] = kh[0] * f_anisotropy
+        # kv[1] = thickness[1] / clist[0]
+        # kv[2] = kh[2] * f_anisotropy
+        # kv[3] = thickness[3] / clist[1]
+        # kv[4] = kh[4] * f_anisotropy
+        # kv[5] = thickness[5] / clist[2]
+        # kv[6] = kh[6] * f_anisotropy
+        # kv[7] = thickness[7] / clist[3]
+        # kv[8] = kh[8] * f_anisotropy
+    """
+    kh = get_nhd_kh(data, mask=mask, anisotropy=anisotropy, transition=transition)
+
+    if not mask and not transition:
+        # bool divided by float is float
+        out = kh / anisotropy
+    else:
+        out = kh
+
+    return out
+
+
+def get_nhd_botm(data, mask=False, transition=False, fix_min_layer_thickness=True):
+    """
+    Calculate the bottom elevation of each layer in the nhd model.
+
+    Parameters
+    ----------
+    data (xarray.Dataset): Dataset containing the necessary variables.
+    mask (bool, optional): If True, return a mask indicating the valid values. Default is False.
+
+    Returns
+    -------
+    out (xarray.DataArray): Array containing the bottom elevation of each layer.
+    """
+    if mask:
+        _a = data[[var for var in data.variables if var.endswith("_mask")]]
+        a = _a.where(_a, np.nan)
+
+        def n(s):
+            return f"NHD_{s}_mask"
+
+    elif transition:
+        # note the ~ operator
+        _a = data[[var for var in data.variables if var.endswith("_transition")]]
+        a = _a.where(~_a, np.nan).where(_a, 1.0)
+
+        def n(s):
+            return f"NHD_{s}_transition"
+
+    else:
+        a = data
+
+        def n(s):
+            return f"NHD_{s}"
+
+    out = xr.concat(
+        (
+            a[n("BA1A")] + a[n("DI1A")],  # Base aquifer 11
+            a[n("BA1A")],  # Base aquitard 11
+            a[n("BA1B")] + a[n("DI1B")],  # Base aquifer 12
+            a[n("BA1B")],  # Base aquitard 12
+            a[n("BA1C")] + a[n("DI1C")],  # Base aquifer 13
+            a[n("BA1C")],  # Base aquitard 13
+            a[n("BA1D")] + a[n("DI1D")],  # Base aquifer 14
+            a[n("BA1D")],  # Base aquitard 14
+            a[n("BAq2")] + a[n("DIq2")],  # Base aquifer 21
+            a[n("BAq2")],  # Base aquitard 21
+        ),
+        dim="layer",
+    ).transpose("layer", "icell2d")
+    out.coords["layer"] = list(translate_triwaco_nhd_names_to_index.keys())
+
+    if mask:
+        return ~np.isnan(out)
+    if transition:
+        mask = get_nhd_botm(data, mask=True, transition=False)
+        transition = np.isnan(out)
+        check = mask.astype(int) + transition.astype(int)
+        assert (check <= 1).all(), "Transition cells should not overlap with mask."
+        return transition
+    if fix_min_layer_thickness:
+        ds = xr.Dataset({"botm": out, "top": data["top"]})
+        _fix_missings_botms_and_min_layer_thickness(ds)
+        out = ds["botm"]
+
+    return out

From 97431d47313a4cd5139d1daac3b3760a0360a1bf Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sat, 14 Dec 2024 09:31:21 +0100
Subject: [PATCH 02/12] Update pwn bodemlagen layers.py

---
 .../interpolate_layer_boundaries.py           | 261 -------
 .../interpolation_helper_functions.py         | 635 ------------------
 src/nhflotools/pwnlayers2/layers.py           | 402 ++---------
 3 files changed, 74 insertions(+), 1224 deletions(-)
 delete mode 100644 src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py
 delete mode 100644 src/nhflotools/pwnlayers2/interpolation_helper_functions.py

diff --git a/src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py b/src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py
deleted file mode 100644
index 3770ee5..0000000
--- a/src/nhflotools/pwnlayers2/interpolate_layer_boundaries.py
+++ /dev/null
@@ -1,261 +0,0 @@
-import os
-
-import geopandas as gpd
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import xarray as xr
-
-from nhflotools.pwnlayers2.interpolation_helper_functions import (
-    CRS_RD,
-    get_point_values,
-    interpolate_gdf,
-    polyline_from_points,
-)
-
-try:
-    import pyvista as pv
-except ImportError as e:
-    msg = "pyvista is not installed. Please install it to run this script."
-    raise ImportError(msg) from e
-
-# Define the interpolation grid (to be replaced with the model grid in NHFLO)
-xmin, ymin = 95000, 496000
-xmax, ymax = 115000, 533000
-dx = 100.0
-xi = np.arange(xmin, xmax + dx, dx)
-yi = np.arange(ymin, ymax + dx, dx)
-X, Y = np.meshgrid(xi, yi)
-
-# Create a GeoDataFrame with the points of the interpolation
-# grid. The values are set to zero, which get used as helper
-# points in the interpolation of the thickness in the areas
-# where the layer is reported as absent by Kosten (1997)
-
-pts = gpd.points_from_xy(X.ravel(), Y.ravel())
-gdf_pt = gpd.GeoDataFrame(
-    geometry=pts,
-    data={"value": [0] * len(pts)},
-    crs=CRS_RD,
-)
-
-# Names of the layers to be interpolated
-layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"]
-
-# Define colours for the 3D plot
-cmap_t = mpl.colormaps["Blues"]
-colors_t = cmap_t(np.linspace(0.5, 1, len(layer_names)))
-
-cmap_b = mpl.colormaps["Oranges"]
-colors_b = cmap_b(np.linspace(0.5, 1, len(layer_names)))
-
-fig, ax = plt.subplots()
-# cf = plt.contourf(X, Y, zint_t.reshape(X.shape))
-# plt.colorbar(mappable=cf)
-# plt.plot(x, y,'o', ms=3, mec='w', mfc='none' )
-# plt.axis('square')
-# plt.show()
-
-# Define a line for creating a cross section that shows the
-# projection of the layer top/bottoms along a line.
-ln_xs = polyline_from_points(
-    np.array([
-        [(xmin + xmax) / 2, ymin, 0],
-        [(xmin + xmax) / 2, ymax, 0],
-        [(xmin + xmax) / 2, ymax, -15000],
-        [(xmin + xmax) / 2, ymin, -15000],
-    ])
-)
-
-# Create a plotter instance for the 3D plot
-plotter = pv.Plotter()
-# Same for the cross section
-plotter_xs = pv.Plotter()
-
-overlap = np.zeros(X.shape)
-
-da = xr.DataArray(
-    data=overlap,
-    dims=["lat", "lon"],
-    coords={
-        "lat": yi,
-        "lon": xi,
-    },
-)
-
-# Load the polygon to fill the nans below the North Sea with nearest neighbour interpolation values
-fpath_shp = os.path.join("..", "gis", "kaarten_2024_voor_interpolatie", "noordzee_clip", "noordzee_clip.shp")
-gdf_ns = gpd.read_file(fpath_shp)
-
-# Create a list with the names of the subfolders where the interpolation result will be stored
-subdirs = ["top_aquitard", "dikte_aquitard", "bot_aquitard"]
-
-# Loop over the layers
-fpath_gpkg = os.path.join("..", "gis", "kaarten_2024_voor_interpolatie", "interpolation_points.gpkg")
-for c, layer_name in enumerate(layer_names):
-    # Create GeoDataFrames with the data points of the top and thicknesses
-    gdf_t = get_point_values(f"T{layer_name}")
-    # gdf_t.set_crs(CRS_RD)
-    gdf_d = get_point_values(f"D{layer_name}")
-    # gdf_d.set_crs(CRS_RD)
-
-    # Read the polygons that indicate the absence of a layer (0.01 m polygons in the Koster (1997) shapefiles)
-    fpath_shp = os.path.join(
-        "..", "gis", "kaarten_2024_voor_interpolatie", "dikte_aquitard", f"D{layer_name}", f"D{layer_name}_mask.shp"
-    )
-    gdf_msk = gpd.read_file(fpath_shp)
-    gdf_msk = gdf_msk[["geometry", "VALUE"]]
-    gdf_msk = gdf_msk.rename(columns={"VALUE": "value"})
-    gdf_within = gpd.sjoin(gdf_pt, gdf_msk, predicate="within")
-    gdf_d = pd.concat([gdf_d, gdf_within[["geometry", "value_left"]]])
-
-    gdf_t = gdf_t.drop_duplicates()
-    gdf_d = gdf_d.drop_duplicates()
-
-    # Store the interpolation points (layer top) so that they can be visualised in QGIS
-    # Experimental, commented out for the time being
-    # gdf_t.to_file(
-    #     fpath_gpkg,
-    #     driver="GPKG",
-    #     mode="a",
-    #     layer=layer_name,
-    # )
-
-    # gdf_d.to_file(
-    #     fpath_gpkg,
-    #     driver="GPKG",
-    #     mode="a",
-    #     layer=layer_name,
-    # )
-
-    # # Store the interpolation points (layer top) so that they can be visualised in QGIS
-    fpath_shp = os.path.join(
-        "..",
-        "gis",
-        "kaarten_2024_voor_interpolatie",
-        "top_aquitard",
-        f"T{layer_name}",
-        f"T{layer_name}_interpolation_points.shp",
-    )
-    gdf_t.set_crs(CRS_RD)
-    # gdf_t.to_file(fpath_shp)
-
-    # # Store the interpolation points (layer thickness) so that they can be visualised in QGIS
-    fpath_shp = os.path.join(
-        "..",
-        "gis",
-        "kaarten_2024_voor_interpolatie",
-        "dikte_aquitard",
-        f"D{layer_name}",
-        f"D{layer_name}_interpolation_points.shp",
-    )
-    gdf_d.set_crs(CRS_RD)
-    # gdf_d.to_file(fpath_shp)
-
-    # Interpolate the top
-    zint_t = interpolate_gdf(gdf_pt, gdf_t, gdf_ns=gdf_ns)
-    # Interpolate the thickness
-    zint_d = interpolate_gdf(gdf_pt, gdf_d, gdf_ns=gdf_ns, gdf_msk=gdf_msk)
-
-    # Check if a mask exists for the Bergen area
-    fpath_shp = os.path.join(
-        "..",
-        "gis",
-        "kaarten_2024_voor_interpolatie",
-        "dikte_aquitard",
-        f"D{layer_name}",
-        f"D{layer_name}_mask_bergen_area.shp",
-    )
-    if os.path.isfile(fpath_shp):
-        # Read the shapefile
-        gdf_msk_bergen = gpd.read_file(fpath_shp)
-        # Check which grid points are within the clipping polygons
-        gdf_within = gpd.sjoin(gdf_pt, gdf_msk_bergen, predicate="within")
-        # Convert their indices to a list
-        idx_msk = gdf_within.index.to_list()
-        # Set the interpolated values to NaN
-        zint_t[idx_msk] = np.nan
-        zint_d[idx_msk] = np.nan
-
-    # Calculate the layer bottom using the interpolated values
-    zint_b = zint_t - zint_d
-
-    # Store the interpolated values for visualization in QGIS
-    for subdir, zint in zip(subdirs, [zint_t, zint_d, zint_b], strict=False):
-        da.values = zint.reshape(X.shape)
-        fstem = f"{subdir[0].capitalize()}{layer_name}"
-        fpath = os.path.join("..", "gis", "kaarten_2024_geinterpoleerd", subdir, fstem)
-        os.makedirs(fpath, exist_ok=True)
-        fpath = os.path.join(fpath, f"{fstem}.nc")
-        da.to_netcdf(fpath)
-
-    # Determine the areas where the bottom of a layer is below the top of the underlying layer
-    if c > 0:
-        dz = zint_b0 - zint_t  # Note that zint_b0 is not defined until the first layer has been processed  # noqa: F821
-        dz[dz > 0] = np.nan
-        da.values = dz.reshape(X.shape)
-
-        fpath = os.path.join(
-            "..", "gis", "kaarten_2024_geinterpoleerd", "overlap", f"overlap_{layer_names[c - 1]}_{layer_name}.nc"
-        )
-        da.to_netcdf(fpath)
-
-    zint_b0 = zint_b  # Store the bottom of the current layer for comparison with the top of the next layer
-    # zint_t[np.isnan(zint_t)] = 0
-    # zint_b[np.isnan(zint_b)] = 0
-
-    # Add the top to the 3D plot
-    grid_t = pv.StructuredGrid(X, Y, zint_t.reshape(X.shape) * 100)
-    for i in np.where(np.isnan(zint_t))[0]:
-        grid_t.BlankPoint(i)
-
-    plotter.add_mesh(
-        grid_t,
-        color=colors_t[c],
-        style="surface",
-        show_edges=False,
-        nan_opacity=0,
-        # scalars=grid.points[:, -1],
-        # scalar_bar_args={'vertical': True},
-    )
-
-    # Add the top to the 3D cross section with the projected top and bottom elevations
-    line_slice_t = grid_t.slice_along_line(ln_xs)
-    plotter_xs.add_mesh(
-        line_slice_t,
-        line_width=1,
-        render_lines_as_tubes=False,
-        color=colors_t[c],
-    )
-
-    # Add the bottom to the 3D plot
-    grid_b = pv.StructuredGrid(X, Y, zint_b.reshape(X.shape) * 100)
-    for i in np.where(np.isnan(zint_b))[0]:
-        grid_b.BlankPoint(i)
-
-    plotter.add_mesh(
-        grid_b,
-        color=colors_b[c],
-        style="surface",
-        show_edges=False,
-        nan_opacity=0,
-        # scalars=grid.points[:, -1],
-        # scalar_bar_args={'vertical': True},
-    )
-
-    # Add the bottom to the 3D cross section with the projected top and bottom elevations
-    line_slice_b = grid_b.slice_along_line(ln_xs)
-    plotter_xs.add_mesh(
-        line_slice_b,
-        line_width=1,
-        render_lines_as_tubes=False,
-        color=colors_b[c],
-    )
-
-# Activate the 3D plots
-plotter.show_grid()
-plotter.show()
-
-plotter_xs.add_mesh(ln_xs, line_width=1, color="grey")
-plotter_xs.show()
diff --git a/src/nhflotools/pwnlayers2/interpolation_helper_functions.py b/src/nhflotools/pwnlayers2/interpolation_helper_functions.py
deleted file mode 100644
index 97f25e9..0000000
--- a/src/nhflotools/pwnlayers2/interpolation_helper_functions.py
+++ /dev/null
@@ -1,635 +0,0 @@
-from pathlib import Path
-
-import geopandas as gpd
-import numpy as np
-import pandas as pd
-import pyvista as pv
-from scipy.interpolate import griddata
-
-# Default CRS, Amersfoort RD
-CRS_RD = 28992
-
-# These dictonaries map the polygon values in the hnflo data
-# version of the Koster shapefiles to values for the contour
-# lines. The value that is assigned to the contour line is the
-# value that occurs in both lists that correspond to a polygon
-# value. So for example, a line that borders a polygon with
-# -1.50 and 0 for TS11 will be assigned a value of -0.50 m.
-LEGEND_DICTS = {
-    "TS11": {
-        -3.75: [-2.5],
-        -1.50: [-2.50, -0.50],
-        0: [-0.50, 0.50],
-        1.50: [0.50, 2.50],
-        3.75: [2.50, 5.00],
-        5.50: [5.00],
-    },
-    "DS11": {
-        0.13: [0.25],
-        0.38: [0.25, 0.50],
-        0.75: [0.50, 1.00],
-        1.50: [1.00, 2.00],
-        2.50: [2.00, 3.00],
-        4.00: [3.00, 5.00],
-        6.00: [5.00, 7.50],
-        7.75: [7.50],
-    },
-    "TS12": {
-        0.5: [0.00],
-        -2.50: [-5.00, 0.00],
-        -7.50: [-10.00, -5.00],
-        -12.50: [-15.00, -10.00],
-        -17.50: [-15.00],
-    },
-    "DS12": {
-        0.75: [1.00],
-        2.00: [1.00, 3.00],
-        4.00: [3.00, 5.00],
-        7.50: [5.00, 10.00],
-        12.50: [10.00, 15.00],
-        17.50: [15.00, 20.00],
-        22.50: [20.00, 25.00],
-        27.50: [25.00],
-    },
-    "TS13": {
-        -12.50: [-15.00],
-        -17.50: [-20.00, -15.00],
-        -22.50: [-25.00, -20.00],
-        -27.50: [-30.00, -25.00],
-        -32.50: [-35.00, -30.00],
-        -36.00: [-35.00],
-    },
-    "DS13": {
-        0.25: [0.50],
-        0.75: [0.50, 1.00],
-        2.00: [1.00, 3.00],
-        4.00: [3.00, 5.00],
-        7.50: [5.00, 10.00],
-        12.50: [10.00, 15.00],
-        17.50: [15.00, 20.00],
-        22.50: [20.00, 25.00],
-        27.50: [25.00],
-    },
-    "TS21": {
-        -25.00: [-30.00, -20.00],
-        -35.00: [-40.00, -30.00],
-        -45.00: [-50.00, -40.00],
-        -55.00: [-60.00, -50.00],
-        -65.00: [-60.00],
-    },
-    "DS21": {
-        2.00: [3.00],
-        4.00: [3.00, 5.00],
-        7.50: [5.00, 10.00],
-        12.50: [10.00, 15.00],
-        17.50: [15.00, 20.00],
-        22.50: [20.00, 25.00],
-        27.50: [25.00, 30.00],
-        32.50: [30.00, 35.00],
-        37.50: [35.00, 40.00],
-        42.50: [40.00],
-    },
-    "TS22": {
-        -25.00: [-30.00],
-        -35.00: [-40.00, -30.00],
-        -45.00: [-50.00, -40.00],
-        -55.00: [-60.00, -50.00],
-        -65.00: [-70.00, -60.00],
-        -75.00: [-80.00, -70.00],
-        -85.00: [-90.00, -80.00],
-        -95.00: [-90.00],
-    },
-    "DS22": {
-        2.50: [5.00],
-        7.50: [5.00, 10.00],
-        12.50: [10.00, 15.00],
-        17.50: [15.00, 20.00],
-        22.50: [20.00, 25.00],
-        27.50: [25.00, 30.00],
-        32.50: [30.00, 35.00],
-        37.50: [35.00, 40.00],
-        42.50: [40.00, 45.00],
-        47.50: [45.00, 50.00],
-        52.50: [50.00],
-    },
-    "TS31": {
-        -45.00: [-50.00],
-        -55.00: [-60.00, -50.00],
-        -65.00: [-70.00, -60.00],
-        -75.00: [-80.00, -70.00],
-        -85.00: [-90.00, -80.00],
-        -95.00: [-100.0, -90.00],
-        -105.0: [-100.0],
-    },
-    "DS31": {
-        0.25: [0.50],
-        0.75: [0.50, 1.00],
-        2.00: [1.00, 3.00],
-        4.00: [3.00, 5.00],
-        7.50: [5.00, 10.00],
-        12.50: [10.00, 15.00],
-        17.50: [15.00],
-    },
-    "TS32": {
-        -77.50: [-80.00],
-        -82.50: [-85.00, -80.00],
-        -87.50: [-90.00, -85.00],
-        -92.50: [-95.00 - 90.00],
-        -97.50: [-100.0, -95.00],
-        -102.5: [-105.0, -100.00],
-        -107.5: [-110.0, -105.00],
-        -112.5: [-110.0],
-    },
-    "DS32": {
-        0.25: [0.50],
-        0.75: [0.50, 1.00],
-        2.00: [1.00, 3.00],
-        4.00: [3.00, 5.00],
-        7.50: [5.00, 10.00],
-        12.50: [10.00, 15.00],
-        17.50: [15.00],
-    },
-}
-
-
-def get_internal_contour_lines(gdf_ln, gdf_pl):
-    """This function looks for linestrings in gdf_ln that do not
-    overlap with the polygon boundaries in gdf_pl. The purpose is
-    to separate the lines that represent a thickness from the lines
-    that represent the limit of occurrence of a layer. Only used
-    for thicknesses, not for the tops.
-
-    Parameters
-    ----------
-    gdf_ln : GeoDataFrame
-        GeoDataFrame containing the linestrings of the thickness
-        contours
-    gdf_pl : GeoDataFrame
-        GeoDataFrame with the polygons of the thickness
-
-    Returns
-    -------
-    GeoDataFrame
-        Returns gdf_ln without the linestrings that overlap with the
-        limit of occurrence
-    """
-    # Select only the polygons which indicate the regions where the layer does not occur
-    idx = gdf_pl["VALUE"] == 0.01
-
-    # Create a new GeoDataFrame containing the polygon boundaries as (Multi)LineStrings
-    gdf_bnd = gpd.GeoDataFrame(
-        geometry=gdf_pl.loc[idx, "geometry"].boundary,
-        crs=CRS_RD,
-    )
-    # Explode so that MultiLineStrings become LineStrings
-    gdf_bnd = gdf_bnd.explode()
-    # Create a buffer around the polygon boundaries because in rare cases there
-    # are minor differences between the line vertices and the polygon vertices
-    gdf_bnd["geometry"] = gdf_bnd["geometry"].buffer(2.0)
-
-    # Do a spatial join to find out which linestrings in gdf_ln are within
-    # the polygons of gdf_bnd. Those that are, should not be returned by
-    # the function.
-    gdf_jn = gpd.sjoin(gdf_bnd, gdf_ln, how="left", predicate="contains")
-    # Use the index of gdf_ln to create an GeoSeries to slice gdf_ln with
-    idx = gdf_ln.index.isin(gdf_jn["index_right"])
-
-    # Return gdf_ln without the linestrings that overlap with the 0.01 m
-    # thickness polygon boundaries.
-    return gdf_ln.loc[~idx]
-
-
-def assign_poly_values_to_linestrings(
-    gdf_ln,  # GeoDataFrame with contour lines
-    gdf_pl,  # GeoDataFrame with the Koster (1997) polygons
-    layer_name,  # Name of the layer
-):
-    """This function tries to identify which polygons in gdf_pl border
-    a contour line in gdf_ln. Ideally, a contour line forms the separation
-    between two polygons but this is not always the case due to topology
-    errors and the addition of polygons to the Koster (1997) shapefiles
-    from other sources (occurs mostly in the southern part of the area).
-
-    Parameters
-    ----------
-    gdf_ln : GeoDataFrame
-        GeoDataFrame containing the linestrings of the thickness
-        contours
-    gdf_pl : GeoDataFrame
-        GeoDataFrame with the polygons of the thickness
-    layer_name : str
-        Name of the layer being processed. This is needed to look up
-        the dictionary in LEGEND_DICTS that maps the Koster (1997)
-        legend entries to the top/thickness values assigned to the
-        polygons.
-
-    Returns
-    -------
-    GeoDataFrame
-        A GeoDataFrame with for each linestring the assigned value,
-        the number of bordering polygons found and any remarks.
-    """
-    # Get the legend_dict for the current layer
-    legend_dict = LEGEND_DICTS[layer_name]
-
-    # Check for polygons with VALUE attribute of 0.01 m, signals where layer is absent
-    # Only returns rows for DS files, no effect for TS files
-    idx = gdf_pl["VALUE"] == 0.01
-    # Remove the 0.01 m polygons
-    gdf_pl = gdf_pl.loc[~idx]
-    # Renumber the index
-    gdf_pl = gdf_pl.reindex(index=range(len(gdf_pl)))
-    # Make geometries valid by using the buffer(0) trick
-    gdf_pl["geometry"] = gdf_pl.buffer(0)
-
-    # Determine which linestrings in gdf_ln intersect which polygons in gdf_pl
-    gdf_int = gpd.sjoin(gdf_ln, gdf_pl, how="left", predicate="intersects")
-
-    data = []
-    # The index of gdf_int contains duplicates because most lines will
-    # intersect multiple polygons.
-    for i in gdf_int.index.unique():
-        # Select the rows for the contour linestring and store in a separate
-        # GeoDataFrame
-        idx = gdf_int.index == i
-        gdf_i = gdf_int.loc[idx]
-
-        # Determine the length of the DataFrame, i.e. the number of polygons that intersect the linestring
-        N = len(gdf_i)
-
-        # Default remark for N == 2
-        remark = "Value assigned automatically in Python script."
-
-        # The linestring's geometry is the same for all rows of gdf_i. Only one is
-        # needed to build the GeoDataFrame returned by the function.
-        geom = gdf_i["geometry"].values[0]
-
-        # Ideally each contour has a polygon to either side, so N == 2. This is not always the case,
-        # hence the need for these conditional statements
-        if N == 2:
-            # Get the VALUE attribute of each polygon
-            v0 = gdf_i["VALUE"].values[0]
-            v1 = gdf_i["VALUE"].values[1]
-            # Get the legend range from legend_dict
-            list0 = legend_dict.get(v0)
-            list1 = legend_dict.get(v1)
-            # The value may not correspond to a range in the original Koster (1997)
-            # as polygons from other sources were later added to the shapefiles. In
-            # that case (one of) the list(s) will be None.
-            if None in {list0, list1}:
-                # Do not assign a value to the linestring and change the remark that will
-                # appear in the shapefile attribute table.
-                v = [None]
-                remark = "Poly not in mapping dict. Assign value manually."
-            else:
-                # If both lists are not None then find the item they have in common.
-                # Ideally this is a single value. When this is not the case, no value
-                # is assigned and the remark is changed to reflect this problem.
-                v = list({item0 for item0 in list0 if item0 in list1})
-                if len(v) != 1:
-                    v = [None]
-                    remark = f"Ambiguous result {len(v)}. Assign value manually."
-        # A linestring can intersect more than two polygons due to
-        #  - overlapping polygons in the original shapefiles
-        #  - the start- and end points of the contour lines touch (digitizing mistake)
-        #  - when polygons from another data source were added to the original Koster (1997) polygons.
-        # In these cases N is larger than 2 and it cannot be determined
-        # automatically what the value for the linestring must be.
-        elif N > 2:
-            v = [None]
-            remark = "Line intersects more than 2 polygons. Assign value manually."
-            # print(gdf_i["index_right"].to_list())
-            gdf_i["index_right"].dropna().astype(int).values
-        # A line can intersect no or a single polygon. This is most frequently the case for
-        # the lines that were added to the Koster (1997) linestrings for the Bergen area
-        # (based on the Stuyfzand figures).
-        elif N < 2:
-            v = [None]
-            remark = "Line intersects less than 2 polygons. Assign value manually."
-
-        # Append one line to the data for each linestring in gdf_ln
-        data.append([*v, N, remark, geom])
-
-    # Return a GeoDataFrame of linestrings
-    return gpd.GeoDataFrame(
-        data=data,
-        columns=["value", "N", "remark", "geometry"],
-        crs=CRS_RD,
-    )
-
-
-def combine_lists(lists):
-    """Returns combinations of lists that share common items.
-
-    Parameters
-    ----------
-    lists : list
-        A list of lists to be analyzed
-
-    Returns
-    -------
-    list
-        A list in which the input lists with common items have
-        been combined.
-    """
-    # Start by assuming each list is a separate group
-    groups = [set(lst) for lst in lists]
-
-    merged = True
-    while merged:
-        merged = False
-        for i in range(len(groups)):
-            for j in range(i + 1, len(groups)):
-                # If two groups share any common elements, merge them
-                if groups[i].intersection(groups[j]):
-                    groups[i] = groups[i].union(groups[j])
-                    groups.pop(j)
-                    merged = True
-                    break
-            if merged:
-                break
-
-    # Convert sets back to lists for final output
-    return [list(group) for group in groups]
-
-
-def join_contour_line_segments(gdf_ln):
-    """This function combines the individual linestrings that appear
-    in the original Koster (1997) contour line files into larger
-    linestrings.
-
-    Parameters
-    ----------
-    gdf_ln : GeoDataFrame
-        GeoDataFrame with individual linestrings.
-
-    Returns
-    -------
-    GeoDataFrame
-        GeoDataFrame with the comined linestrings.
-    """
-    # Use a spatial join to identify the linstrings with touching endpoints
-    gdf_tch = gpd.sjoin(gdf_ln, gdf_ln, how="left", predicate="touches")
-
-    # Create a dictionary for every line segment, which will store the
-    # index numbers of the linestrings that it touches
-    l_dict = {i: [] for i in gdf_ln.index}
-    # Loop through the index items of gdf_tch
-    for i0 in gdf_tch.index:
-        # Get the rows for the current line segment
-        idx = gdf_tch.index == i0
-        # Get the index numbers of the touching linestrings
-        irs = gdf_tch.loc[idx, "index_right"]
-        irs = irs.dropna()
-        # Combine the linestring index number with the
-        # index numbers of the touchings linestrings into a single list
-        i_list = [i0, *irs.astype(int).tolist()]
-        # Update each item in the l_dict dictionary by adding i_list
-        for i1 in i_list:
-            l_dict[i1] += i_list
-
-    # Combine the linestring segments that form a single contour line. This
-    # will result in a nested list in which each item is a list containing
-    # the index numbers of the line segments that together form a contour line
-    unique_lines = combine_lists(list(l_dict.values()))
-
-    # Loop through the list with the index numbers of the segments
-    # and use these to create single contour lines
-    lns = []
-    ln_vals = []
-    for idx in unique_lines:
-        if len(idx) == 0:
-            pass
-        else:
-            # Combine the line segments into a single linstring
-            lns.append(gdf_ln.loc[idx, "geometry"].union_all())
-            # Each line segment has a top/thickness value associated
-            # with it. Ideally they are all the same but this is not
-            # guaranteed. The next two lines select the most frequently
-            # occurring value, which will be used as the value attribute
-            # in the GeoDataFrame that will be returned.
-            vals_lst = gdf_ln.loc[idx, "value"].to_list()
-            v = max(vals_lst, key=vals_lst.count)
-            ln_vals.append(v)
-
-    # Return a GeoDataFrame with the combined linestrings and their
-    # top/thickness value.
-    return gpd.GeoDataFrame(
-        geometry=lns,
-        data={
-            "script_value": ln_vals,
-            "value": ln_vals,
-        },
-        crs=CRS_RD,
-    )
-
-
-def get_point_values(layer_name):
-    """This function is called from interpolate_layer_boundaries.py to convert
-    line segments to points and combine it with point data from the geo_daw
-    data (top/thickness values for borehole interpreted by Koster, 1997) and
-    the point values for the Bergen area (digitized from the figures in the
-    Stuyfzand, 1987 report).
-
-    Parameters
-    ----------
-    layer_name : str
-        Name of the layer.
-
-    Returns
-    -------
-    GeoDataFrame
-        GeoDataFrame with points and their corresponding top/thickness values
-    """
-    # Folder with the contour lines
-    src_dir = Path("..", "gis", "kaarten_2024_voor_interpolatie")
-    # Set the paths to the files to be read
-    if layer_name.find("T") == 0:
-        fpath_shp = Path(src_dir, "top_aquitard", layer_name, f"{layer_name}_union_with_values_edited.shp")
-        fpath_shp_ber = Path(src_dir, "top_aquitard", layer_name, f"{layer_name}_bergen_points.shp")
-    elif layer_name.find("D") == 0:
-        fpath_shp = Path(src_dir, "dikte_aquitard", layer_name, f"{layer_name}_union_with_values_edited.shp")
-        fpath_shp_ber = Path(src_dir, "dikte_aquitard", layer_name, f"{layer_name}_bergen_points.shp")
-
-    # Import the contour lines
-    gdf_ln = gpd.read_file(fpath_shp)
-    # Convert any multilinestrings to linestrings
-    gdf_ln = gdf_ln.explode()
-    # Add the line vertices as a list of coordinates to each row of the GeoDataFrame
-    gdf_ln["points"] = gdf_ln.apply(lambda x: list(x["geometry"].coords), axis=1)
-
-    # Convert the coordinates to points and assign values
-    values = []
-    pts = []
-    # Loop through each row of gdf_ln
-    for _index, row in gdf_ln.iterrows():
-        # Skip NULL values that can occur for Bergen -999 polygons
-        if np.isnan(row["value"]):
-            continue
-        # Get the coordinates created by the lambda function above and
-        # convert them to Point objects
-        xy_arr = np.array(row["points"])
-        pts_i = list(gpd.points_from_xy(x=xy_arr[:, 0], y=xy_arr[:, 1]))
-        # Add to the list of existing poins
-        pts += pts_i
-        # Expand the list with values
-        values += [row["value"]] * len(pts_i)
-
-    # Convert to a GeoDataFrame
-    gdf_pts = gpd.GeoDataFrame(
-        data=values,
-        geometry=pts,
-        columns=["value"],
-        crs=CRS_RD,
-    )
-
-    # Check if a shapefile with point data exists for the Bergen area
-    if fpath_shp_ber.exists() is True:
-        # Read the file
-        gdf_pts_ber = gpd.read_file(fpath_shp_ber)
-        # Discard columns other than 'VALUE' and 'geometry'
-        gdf_pts_ber = gdf_pts_ber[["geometry", "VALUE"]]
-        # Rename the 'VALUE' column to 'value' to be compatible with gdf_pts
-        gdf_pts_ber = gdf_pts_ber.rename(columns={"VALUE": "value"})
-        # Add the Bergen points to gdf_pts
-        gdf_pts = pd.concat([gdf_pts, gdf_pts_ber])
-
-    # Read the point data for the boreholes
-    fpath_daw = Path("..", "gis", "koster_1997", "daw_bestanden", "daw_data_TS_DS", "daw_data_TS_DS.shp")
-    gdf_daw = gpd.read_file(fpath_daw)
-    # Select the column for the layer being processed
-    gdf_daw = gdf_daw[[layer_name, "geometry"]].dropna()
-    # Rename the column from layer name to 'value'
-    gdf_daw = gdf_daw.rename(columns={layer_name: "value"})
-
-    # Add the points to gdf_pts
-    return pd.concat([gdf_pts, gdf_daw])
-
-
-def interpolate_gdf(gdf_pt, gdf, gdf_ns=None, gdf_msk=None):
-    """Interpolate the point values of a layer to a (model) grid.
-
-    Parameters
-    ----------
-    gdf : GeoDataFrame
-        GeoDataFrame with the points of the interpolation grid
-    gdf : GeoDataFrame
-        GeoDataFrame with the values to be interpolated
-    gdf_ns : GeoDataFrame, optional
-        GeoDataFrame with a polygon used to fill the grid below the North Sea
-        with nearest neighbour values after the interpolation. Not used
-        if None is passed (default).
-    gdf_msk : GeoDataFrame, optional
-        GeoDataFrame with polygons that indicates where a layer is absent, as
-        indicated by the 0.01 m thickness values in the Koster (1997)
-        shapefiles. Points inside these polygons will be assigned a zero
-        thickness. Should be set to None (default) when interpolating the
-        layer top.
-
-    Returns
-    -------
-    _type_
-        _description_
-    """
-    # Create 1D arrays for the interpolation points
-    xi = gdf_pt["geometry"].x.to_numpy()
-    yi = gdf_pt["geometry"].y.to_numpy()
-
-    # Convert the data point coordinates and values to NumPy arrays
-    x = gdf["geometry"].x.to_numpy()
-    y = gdf["geometry"].y.to_numpy()
-    z = gdf["value"].to_numpy()
-
-    # Call SciPy's griddata to perform the interpolation. Note that zint
-    # is assigned NaN outside the convex hull of the data point
-    # cloud
-    zint = griddata(
-        points=(x, y),
-        values=z,
-        xi=(xi[None, :], yi[None, :]),
-        method="linear",  # Note: cubic gives very poor results
-    )
-
-    # Repeat the interpolation for the interpolation points below
-    # the North Sea if the polygon is supplied
-    if gdf_ns is not None:
-        # Take the existing interpolation result and make array 1D
-        zint = zint.ravel()
-
-        # Identify the points in the interpolaton grid that are within
-        # the North Sea polygon
-        gdf_within = gpd.sjoin(gdf_pt, gdf_ns, predicate="within")
-        # Convert their indices to a list
-        idx_ns = gdf_within.index.to_list()
-        # Use the list to slice the arrays with interpolation point
-        # x and y values
-        xi_ns = xi[idx_ns]
-        yi_ns = yi[idx_ns]
-
-        # Find the points in the previous interpolation result that
-        # have NaN values (these were outside the convex hull of the
-        # data points)
-        idx = np.isnan(zint)
-        # Keep only the non-NaN values for the interpolation that will
-        # assign the nearest neighbour value to the points below the
-        # North Sea
-        x = xi[~idx]
-        y = yi[~idx]
-        z = zint[~idx]
-
-        # Perform interpolation
-        zint_ns = griddata(
-            points=(x, y),
-            values=z,
-            xi=(xi_ns[None, :], yi_ns[None, :]),
-            method="nearest",
-        )
-
-        # Replace the values of the  points in zint that are below the
-        # North Sea to the nearest neighbour values.
-        zint[idx_ns] = zint_ns
-
-    # Return the interpolated values
-    return zint
-
-
-# from sklearn.neighbors import KNeighborsRegressor
-
-# def interpolate_gdf(X, Y, gdf):
-#     x = gdf["geometry"].x.to_numpy()
-#     y = gdf["geometry"].y.to_numpy()
-#     z = gdf["value"].to_numpy()
-
-#     zint = griddata(
-#         points=(x, y),
-#         values=z,
-#         xi=(X.ravel()[None, :], Y.ravel()[None, :]),
-#         method='linear',
-#     )
-
-#     return zint
-
-# neigh = KNeighborsRegressor()
-
-# neigh.fit(X, y)
-
-
-def polyline_from_points(points):
-    """_summary_.
-
-    Parameters
-    ----------
-    points : _type_
-        _description_
-
-    Returns
-    -------
-    _type_
-        _description_
-    """
-    poly = pv.PolyData()
-    poly.points = points
-    the_cell = np.arange(0, len(points), dtype=int)
-    the_cell = np.insert(the_cell, 0, len(points))
-    poly.lines = the_cell
-    return poly
diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index b1943eb..96c3bcf 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -1,344 +1,90 @@
+"""Module containing functions to retrieve PWN bodemlagen."""
 import logging
+from pathlib import Path
 
+import geopandas as gpd
+import nlmod
 import numpy as np
+import pykrige.ok
 import xarray as xr
+from shapely.ops import unary_union
 
 logger = logging.getLogger(__name__)
 
-translate_triwaco_nhd_names_to_index = {
-    "W11": 0,
-    "S11": 1,
-    "W12": 2,
-    "S12": 3,
-    "W13": 4,
-    "S13": 5,
-    "W21": 6,
-    "S21": 7,
-    "W22": 8,
-    "S22": 9,
-}
-
-
-def get_nhd_layer_model(ds_pwn_data, fix_min_layer_thickness=True):
-    layer_model_nhd = xr.Dataset(
-        {
-            "top": ds_pwn_data["top"],
-            "botm": get_nhd_botm(ds_pwn_data, fix_min_layer_thickness=fix_min_layer_thickness),
-            "kh": get_nhd_kh(ds_pwn_data),
-            "kv": get_nhd_kv(ds_pwn_data),
-        },
-        coords={"layer": list(translate_triwaco_nhd_names_to_index.keys())},
-        attrs={
-            "extent": ds_pwn_data.attrs["extent"],
-            "gridtype": ds_pwn_data.attrs["gridtype"],
-        },
-    )
-    mask_model_nhd = xr.Dataset(
-        {
-            "top": ds_pwn_data["top_mask"],
-            "botm": get_nhd_botm(
-                ds_pwn_data,
-                mask=True,
-                transition=False,
-                fix_min_layer_thickness=fix_min_layer_thickness,
-            ),
-            "kh": get_nhd_kh(ds_pwn_data, mask=True, transition=False),
-            "kv": get_nhd_kv(ds_pwn_data, mask=True, transition=False),
-        },
-        coords={"layer": list(translate_triwaco_nhd_names_to_index.keys())},
-    )
-    transition_model_nhd = xr.Dataset(
-        {
-            "top": ds_pwn_data["top_transition"],
-            "botm": get_nhd_botm(
-                ds_pwn_data,
-                mask=False,
-                transition=True,
-                fix_min_layer_thickness=fix_min_layer_thickness,
-            ),
-            "kh": get_nhd_kh(ds_pwn_data, mask=False, transition=True),
-            "kv": get_nhd_kv(ds_pwn_data, mask=False, transition=True),
-        },
-        coords={"layer": list(translate_triwaco_nhd_names_to_index.keys())},
-    )
-
-    for var in ["kh", "kv", "botm"]:
-        layer_model_nhd[var] = layer_model_nhd[var].where(mask_model_nhd[var], np.nan)
-        assert (
-            layer_model_nhd[var].notnull() == mask_model_nhd[var]
-        ).all(), f"There were nan values present in {var} in cells that should be valid"
-        assert (
-            (mask_model_nhd[var] + transition_model_nhd[var]) <= 1
-        ).all(), f"There should be no overlap between mask and transition of {var}"
-
-    return (
-        layer_model_nhd,
-        mask_model_nhd,
-        transition_model_nhd,
-    )
-
-
-def get_nhd_thickness(data, mask=False, transition=False, fix_min_layer_thickness=True):
+def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index, transition_length: float) -> dict:
     """
-    Calculate the thickness of layers in a given dataset.
-
-    If mask is True, the function returns a boolean mask indicating the valid
-    thickness values, requiering all dependent values to be valid.
-    If transisition is True, the function returns a boolean mask indicating the
-    cells for which any of the dependent values is marked as a transition.
+    Interpolate the thickness of the aquitard layers and the top of the aquitard layers using Kriging.
 
-    The masks are computated with nan's for False, so that if any of the dependent
-    values is nan, the mask_float will be nan and mask will be False.
-    The transitions are computed with nan's for True, so that if any of the dependent
-    values is nan, the transition_float will be nan and transition will be True.
-
-    If the dataset contains a variable 'top', the thickness is calculated
-    from the difference between the top and bottom of each layer. If the
-    dataset does not contain a variable 'top', the thickness is calculated
-    from the difference between the bottoms.
+    The thickness of the aquitard layers is interpolated using the points in the file
+    `dikte_aquitard/D{layer_name}/D{layer_name}_interpolation_points.geojson`.
+    The top of the aquitard layers is interpolated using the points in the file
+    `top_aquitard/T{layer_name}/T{layer_name}_interpolation_points.geojson`.
+    The mask of the aquitard layers is defined in the file
+    `dikte_aquitard/D{layer_name}/D{layer_name}_mask_combined.geojson`.
 
     Parameters
     ----------
-    data : xarray.Dataset or xarray.DataArray
-        Input dataset containing the layer data.
-    mask : bool, optional
-        If True, returns a boolean mask indicating the valid thickness values.
-        If False, returns the thickness values directly. Default is False.
-    transition : bool, optional
-        If True, treat data as a mask with True for transition cells. Default is False.
+    data_dir : Path
+        The directory containing the data. Contains folders `dikte_aquitard` and `top_aquitard`.
+    ds_regis : xr.Dataset
+        The REGIS modellayer that contains the vertex grid.
+    ix : nlmod.Index
+        The index of the model grid.
+    transition_length : float
+        The length of the transition zone in meters.
 
     Returns
     -------
-    thickness: xarray.DataArray or numpy.ndarray
-        If mask is True, returns a boolean mask indicating the valid thickness values.
-        If mask is False, returns the thickness values as a DataArray or ndarray.
-
+    dict
+        A dictionary containing the interpolated values of the aquitard layers.
     """
-    botm = get_nhd_botm(
-        data,
-        mask=mask,
-        transition=transition,
-        fix_min_layer_thickness=fix_min_layer_thickness,
-    )
-
-    if mask:
-        _a = data[[var for var in data.variables if var.endswith("_mask")]]
-        a = _a.where(_a, other=np.nan)
-        botm_nodata_isnan = botm.where(botm, other=np.nan)
-
-        def n(s):
-            return f"{s}_mask"
-
-    elif transition:
-        # note the ~ operator
-        _a = data[[var for var in data.variables if var.endswith("_transition")]]
-        a = _a.where(~_a, other=np.nan).where(_a, 1.0)
-        botm_nodata_isnan = botm.where(~botm, other=np.nan)
-
-        def n(s):
-            return f"{s}_transition"
-
-    else:
-        a = data
-        botm_nodata_isnan = botm
-
-        def n(s):
-            return s
-
-    if "top" in data.data_vars:
-        top_botm = xr.concat((a[n("top")].expand_dims(dim={"layer": ["mv"]}, axis=0), botm_nodata_isnan), dim="layer")
-    else:
-        top_botm = botm
-
-    out = -top_botm.diff(dim="layer")
-
-    if mask:
-        return ~np.isnan(out)
-    if transition:
-        mask = get_nhd_thickness(data, mask=True, transition=False)
-        transition = np.isnan(out)
-        check = mask.astype(int) + transition.astype(int)
-        assert (check <= 1).all(), "Transition cells should not overlap with mask."
-        return transition
-    out = out.where(~np.isclose(out, 0.0), other=0.0)
-
-    if (out < 0.0).any():
-        logger.warning("Botm nhd is not monotonically decreasing. Resulting in negative conductivity values.")
-    return out
-
-
-def get_nhd_kh(data, mask=False, anisotropy=5.0, transition=False):
-    """
-    Calculate the hydraulic conductivity (kh) based on the given data.
-
-    Values may be applied everywhere. Use mask and/or thickness to determine
-    where the values are valid.
-
-    Parameters
-    ----------
-    data : xarray.Dataset or xarray.DataArray
-        The input data containing the necessary variables.
-    mask : bool, optional
-        Flag indicating whether to apply a mask to the data. Default is False.
-    anisotropy : float, optional
-        Anisotropy factor to be applied to the aquitard layers. Default is 5.0.
-    transition : bool, optional
-        Flag indicating whether to treat data as a mask with True for transition cells.
-
-    Returns
-    -------
-    kh: xarray.DataArray
-        The calculated hydraulic conductivity.
-
-    kh = xr.zeros_like(t_da)
-    kh[0] = 7.0
-    kh[1] = thickness[1] / clist[0] / f_anisotropy
-    kh[2] = 7.0
-    kh[3] = thickness[3] / clist[1] / f_anisotropy
-    kh[4] = 12.0
-    kh[5] = thickness[5] / clist[2] / f_anisotropy
-    kh[6] = 15.0
-    kh[7] = thickness[7] / clist[3] / f_anisotropy
-    kh[8] = 20.0
-
-    """
-    if mask:
-        # valid value if valid thickness and valid NHD_C
-        out = get_nhd_thickness(data, mask=True, transition=False).rename("kh").drop_vars("layer")
-        out[{"layer": 1}] *= data["NHD_C1A_mask"]
-        out[{"layer": 3}] *= data["NHD_C1B_mask"]
-        out[{"layer": 5}] *= data["NHD_C1C_mask"]
-        out[{"layer": 7}] *= data["NHD_C1D_mask"]
-        out[{"layer": 9}] *= data["NHD_C2_mask"]
-
-    elif transition:
-        # Valid value if valid thickness or valid NHD_C
-        out = get_nhd_thickness(data, mask=True, transition=False).rename("kh").drop_vars("layer")
-        out[{"layer": 1}] |= data["NHD_C1A_mask"]
-        out[{"layer": 3}] |= data["NHD_C1B_mask"]
-        out[{"layer": 5}] |= data["NHD_C1C_mask"]
-        out[{"layer": 7}] |= data["NHD_C1D_mask"]
-        out[{"layer": 9}] |= data["NHD_C2_mask"]
-
-    else:
-        thickness = get_nhd_thickness(data, mask=mask, transition=transition).drop_vars("layer")
-        out = xr.ones_like(thickness).rename("kh")
-
-        out[{"layer": [0, 2, 4, 6, 8]}] *= [[8.0], [7.0], [12.0], [15.0], [20.0]]
-        out[{"layer": 1}] = thickness[{"layer": 1}] / data["NHD_C1A"] * anisotropy
-        out[{"layer": 3}] = thickness[{"layer": 3}] / data["NHD_C1B"] * anisotropy
-        out[{"layer": 5}] = thickness[{"layer": 5}] / data["NHD_C1C"] * anisotropy
-        out[{"layer": 7}] = thickness[{"layer": 7}] / data["NHD_C1D"] * anisotropy
-        out[{"layer": 9}] = thickness[{"layer": 9}] / data["NHD_C2"] * anisotropy
-
-    return out
-
-
-def get_nhd_kv(data, mask=False, anisotropy=5.0, transition=False):
-    """
-    Calculate the hydraulic conductivity (KV) for different aquifers and aquitards.
-
-    Parameters
-    ----------
-        data (xarray.Dataset): Dataset containing the necessary variables for calculation.
-        mask (bool, optional): Flag indicating whether to apply a mask to the data. Defaults to False.
-        anisotropy (float, optional): Anisotropy factor for adjusting the hydraulic conductivity. Defaults to 5.0.
-
-    Returns
-    -------
-        xarray.DataArray: Array containing the calculated hydraulic conductivity values for each layer.
-
-    Example:
-        # Calculate hydraulic conductivity values without applying a mask
-        kv_values = get_nhd_kv(data)
-
-        # Calculate hydraulic conductivity values with a mask applied
-        kv_values_masked = get_nhd_kv(data, mask=True)
-
-        Note the f_anisotropy vs anisotropy
-        # kv[0] = kh[0] * f_anisotropy
-        # kv[1] = thickness[1] / clist[0]
-        # kv[2] = kh[2] * f_anisotropy
-        # kv[3] = thickness[3] / clist[1]
-        # kv[4] = kh[4] * f_anisotropy
-        # kv[5] = thickness[5] / clist[2]
-        # kv[6] = kh[6] * f_anisotropy
-        # kv[7] = thickness[7] / clist[3]
-        # kv[8] = kh[8] * f_anisotropy
-    """
-    kh = get_nhd_kh(data, mask=mask, anisotropy=anisotropy, transition=transition)
-
-    if not mask and not transition:
-        # bool divided by float is float
-        out = kh / anisotropy
-    else:
-        out = kh
-
-    return out
-
-
-def get_nhd_botm(data, mask=False, transition=False, fix_min_layer_thickness=True):
-    """
-    Calculate the bottom elevation of each layer in the nhd model.
-
-    Parameters
-    ----------
-    data (xarray.Dataset): Dataset containing the necessary variables.
-    mask (bool, optional): If True, return a mask indicating the valid values. Default is False.
-
-    Returns
-    -------
-    out (xarray.DataArray): Array containing the bottom elevation of each layer.
-    """
-    if mask:
-        _a = data[[var for var in data.variables if var.endswith("_mask")]]
-        a = _a.where(_a, np.nan)
-
-        def n(s):
-            return f"NHD_{s}_mask"
-
-    elif transition:
-        # note the ~ operator
-        _a = data[[var for var in data.variables if var.endswith("_transition")]]
-        a = _a.where(~_a, np.nan).where(_a, 1.0)
-
-        def n(s):
-            return f"NHD_{s}_transition"
-
-    else:
-        a = data
-
-        def n(s):
-            return f"NHD_{s}"
-
-    out = xr.concat(
-        (
-            a[n("BA1A")] + a[n("DI1A")],  # Base aquifer 11
-            a[n("BA1A")],  # Base aquitard 11
-            a[n("BA1B")] + a[n("DI1B")],  # Base aquifer 12
-            a[n("BA1B")],  # Base aquitard 12
-            a[n("BA1C")] + a[n("DI1C")],  # Base aquifer 13
-            a[n("BA1C")],  # Base aquitard 13
-            a[n("BA1D")] + a[n("DI1D")],  # Base aquifer 14
-            a[n("BA1D")],  # Base aquitard 14
-            a[n("BAq2")] + a[n("DIq2")],  # Base aquifer 21
-            a[n("BAq2")],  # Base aquitard 21
-        ),
-        dim="layer",
-    ).transpose("layer", "icell2d")
-    out.coords["layer"] = list(translate_triwaco_nhd_names_to_index.keys())
-
-    if mask:
-        return ~np.isnan(out)
-    if transition:
-        mask = get_nhd_botm(data, mask=True, transition=False)
-        transition = np.isnan(out)
-        check = mask.astype(int) + transition.astype(int)
-        assert (check <= 1).all(), "Transition cells should not overlap with mask."
-        return transition
-    if fix_min_layer_thickness:
-        ds = xr.Dataset({"botm": out, "top": data["top"]})
-        _fix_missings_botms_and_min_layer_thickness(ds)
-        out = ds["botm"]
-
-    return out
+    layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"]
+    data = {}
+
+    for name in layer_names:
+        # Compute where the layer is _not_ present
+        fp_mask = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask_combined.geojson"
+        gdf_mask = gpd.read_file(fp_mask)
+
+        multipolygon = unary_union(gdf_mask.geometry)
+        ids = ix.intersect(multipolygon, contains_centroid=False, min_area_fraction=0.5).cellids.astype(int)
+        data[f"{name}_mask"] = np.zeros(ds_regis.sizes["icell2d"], dtype=bool)
+        data[f"{name}_mask"][ids] = True
+
+        # Compute where the layer transitions to REGIS
+        multipolygon_transition = multipolygon.buffer(transition_length).difference(multipolygon)
+        ids_trans = ix.intersect(multipolygon_transition, contains_centroid=False, min_area_fraction=0.5).cellids.astype(int)
+        data[f"{name}_transition"] = np.zeros(ds_regis.sizes["icell2d"], dtype=bool)
+        data[f"{name}_transition"][ids_trans] = True
+
+        # Interpolate thickness points using Krieging
+        fp_pts = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_interpolation_points.geojson"
+        gdf_pts = gpd.read_file(fp_pts)
+        ok = pykrige.ok.OrdinaryKriging(
+            gdf_pts.geometry.x.values,
+            gdf_pts.geometry.y.values,
+            gdf_pts.value.values,
+            variogram_model="linear",
+            verbose=False,
+            enable_plotting=False,
+        )
+        xq = ds_regis.x.values[~data[f"{name}_mask"]]
+        yq = ds_regis.y.values[~data[f"{name}_mask"]]
+        data[f"D{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
+        data[f"D{name}_value"][~data[f"{name}_mask"]] = ok.execute("points", xq, yq)[0]
+
+        # Interpolate top aquitard points using Krieging
+        fp_pts = data_dir / "top_aquitard" / f"T{name}" / f"T{name}_interpolation_points.geojson"
+        gdf_pts = gpd.read_file(fp_pts)
+        ok = pykrige.ok.OrdinaryKriging(
+            gdf_pts.geometry.x.values,
+            gdf_pts.geometry.y.values,
+            gdf_pts.value.values,
+            variogram_model="linear",
+            verbose=False,
+            enable_plotting=False,
+        )
+        data[f"T{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
+        data[f"T{name}_value"][~data[f"D{name}_mask"]] = ok.execute("points", xq, yq)[0]
+
+    return data

From ffe798877206a367a1d0a630e60bb6144aed91dc Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sat, 14 Dec 2024 09:33:09 +0100
Subject: [PATCH 03/12] Format layers.py

---
 src/nhflotools/pwnlayers2/layers.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index 96c3bcf..105a7fe 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -1,4 +1,5 @@
 """Module containing functions to retrieve PWN bodemlagen."""
+
 import logging
 from pathlib import Path
 
@@ -11,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index, transition_length: float) -> dict:
     """
     Interpolate the thickness of the aquitard layers and the top of the aquitard layers using Kriging.
@@ -53,7 +55,9 @@ def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index,
 
         # Compute where the layer transitions to REGIS
         multipolygon_transition = multipolygon.buffer(transition_length).difference(multipolygon)
-        ids_trans = ix.intersect(multipolygon_transition, contains_centroid=False, min_area_fraction=0.5).cellids.astype(int)
+        ids_trans = ix.intersect(
+            multipolygon_transition, contains_centroid=False, min_area_fraction=0.5
+        ).cellids.astype(int)
         data[f"{name}_transition"] = np.zeros(ds_regis.sizes["icell2d"], dtype=bool)
         data[f"{name}_transition"][ids_trans] = True
 
@@ -86,5 +90,4 @@ def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index,
         )
         data[f"T{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
         data[f"T{name}_value"][~data[f"D{name}_mask"]] = ok.execute("points", xq, yq)[0]
-
     return data

From c78206a9190c10220f39b16371bf0270fb2c0ea7 Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sun, 15 Dec 2024 16:38:19 +0100
Subject: [PATCH 04/12] Update pwnlayers2/layers.py

---
 src/.DS_Store                       | Bin 0 -> 6148 bytes
 src/nhflotools/.DS_Store            | Bin 0 -> 6148 bytes
 src/nhflotools/pwnlayers2/layers.py |  22 +++++++++++++++-------
 3 files changed, 15 insertions(+), 7 deletions(-)
 create mode 100644 src/.DS_Store
 create mode 100644 src/nhflotools/.DS_Store

diff --git a/src/.DS_Store b/src/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..148ece91c2a9f0147c678ee905a90bd71783cce1
GIT binary patch
literal 6148
zcmeHKQA+|r5S~@byCm?Tz{i7Lg)~HxUJkQA&_z8|*j;CNblrw`=7R|LTz{xv((h?z
zcNIf>iwK#4nQz>g*<-$iT?YU}cN{hWssLc25*BPUYlPyYD^f5kMMR<YkPw0o8Tetk
zlFbYMkpVh88xk181H`a&e)n-8WsJuPJ4@jVQzRy)!6bbNVi{okBY26UG;1_IMWI;Q
z+AdpVt76@HLpkv>KO3iAe{@a1E^xzO?)$-IG??_Od#5tV{3scWR6!IDFy-bdN<um5
z%5f5=s@Bs1%eL%(wKko$JI%V&Y9G()&a`vXAip)6+1CES;mLXbDSl4mo90_k4<x%9
z7WN&zwY|N1qa>EeBgW^oiz0TI0cL<1*aQa5<z`hjp&9Oh8DIu}l>xdR98^NrV6IVJ
z9ca`DfLOw1CFs*EK{bv+*I=#@BPc?bBI;6MMhv0LaosU_uEAWRE(c+T4`IG6%!DG;
z*KvPGrGs!avSkLCf$t2^?1xgN_y6qY`ClcmVFs9i&1687dS0)Ky_vUlrAvBi9n?Ei
q63WXpE=tf*Pci1wQ(Qw;g6j@Bh_1n0Bd(zE9|1)J8)o278TbOuomGSY

literal 0
HcmV?d00001

diff --git a/src/nhflotools/.DS_Store b/src/nhflotools/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..db115a1b9a6f4cb11c5ef34cc93e9815f2c966ea
GIT binary patch
literal 6148
zcmeHKJ5EC}5S)b+5ouCV`U-FYD@snl1^9&Si69~r(Y=auaWrN>1<`{pG|{ZI9(%oG
z%Tv643&2+I+Z$jBU`}_$rw>!}efNo7RK$pMp7DSWJmU>}+)lE84><P<uQ=cZ!%zNp
z|1>=Ac7rF#q<|EV0#ZN<NP&|nP^ERgIN6ChND4@Sb5X#*4~_2F3&+Ixba03kfH-3~
zjPvLvh|L4UUN|N)LbIe2lWNssSkf79mDdZ$#H7Qj`LMd#szb53o#(emhxJ5_Qa}nE
zD{z_1rPu!r{g3|tn52~ykOF6=fX&u->lL3=wRQ41ueFVSPxqWJx*O*~;SlAR80DA?
fFUQxBlzGkP-0y{BV$c~6I#E9Zu8T|x{I>$%knI&%

literal 0
HcmV?d00001

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index 105a7fe..b5f5461 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -4,16 +4,16 @@
 from pathlib import Path
 
 import geopandas as gpd
-import nlmod
 import numpy as np
 import pykrige.ok
 import xarray as xr
+from flopy.utils.gridintersect import GridIntersect
 from shapely.ops import unary_union
 
 logger = logging.getLogger(__name__)
 
 
-def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index, transition_length: float) -> dict:
+def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersect, transition_length: float) -> dict:
     """
     Interpolate the thickness of the aquitard layers and the top of the aquitard layers using Kriging.
 
@@ -26,11 +26,11 @@ def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index,
 
     Parameters
     ----------
-    data_dir : Path
-        The directory containing the data. Contains folders `dikte_aquitard` and `top_aquitard`.
     ds_regis : xr.Dataset
         The REGIS modellayer that contains the vertex grid.
-    ix : nlmod.Index
+    data_dir : Path
+        The directory containing the data. Contains folders `dikte_aquitard` and `top_aquitard`.
+    ix : flopy.utils.GridIntersect
         The index of the model grid.
     transition_length : float
         The length of the transition zone in meters.
@@ -45,6 +45,7 @@ def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index,
 
     for name in layer_names:
         # Compute where the layer is _not_ present
+        logger.info(f"Interpolating aquitard layer {name} data and its transition zone")
         fp_mask = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask_combined.geojson"
         gdf_mask = gpd.read_file(fp_mask)
 
@@ -74,8 +75,11 @@ def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index,
         )
         xq = ds_regis.x.values[~data[f"{name}_mask"]]
         yq = ds_regis.y.values[~data[f"{name}_mask"]]
+        krieging_result = ok.execute("points", xq, yq)
         data[f"D{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"D{name}_value"][~data[f"{name}_mask"]] = ok.execute("points", xq, yq)[0]
+        data[f"D{name}_value"][~data[f"{name}_mask"]] = krieging_result[0]
+        data[f"D{name}_value_unc"] = np.zeros(ds_regis.sizes["icell2d"])
+        data[f"D{name}_value_unc"][~data[f"{name}_mask"]] = krieging_result[1]
 
         # Interpolate top aquitard points using Krieging
         fp_pts = data_dir / "top_aquitard" / f"T{name}" / f"T{name}_interpolation_points.geojson"
@@ -88,6 +92,10 @@ def get_pwn_aquitard_data(data_dir: Path, ds_regis: xr.Dataset, ix: nlmod.Index,
             verbose=False,
             enable_plotting=False,
         )
+        krieging_result = ok.execute("points", xq, yq)
         data[f"T{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"T{name}_value"][~data[f"D{name}_mask"]] = ok.execute("points", xq, yq)[0]
+        data[f"T{name}_value"][~data[f"D{name}_mask"]] = krieging_result[0]
+        data[f"T{name}_value_unc"] = np.zeros(ds_regis.sizes["icell2d"])
+        data[f"T{name}_value_unc"][~data[f"D{name}_mask"]] = krieging_result[1]
     return data
+

From 90296f458a463c2180e3a8ff11487e430377410b Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sun, 15 Dec 2024 16:41:34 +0100
Subject: [PATCH 05/12] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 9f0700e..3f357c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 scratch
+.DS_Store
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

From c5d8cae9a976f8f395e7faaddc0272c57d3bda79 Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sun, 15 Dec 2024 19:55:41 +0100
Subject: [PATCH 06/12] Removed .DS_Store

---
 src/.DS_Store            | Bin 6148 -> 0 bytes
 src/nhflotools/.DS_Store | Bin 6148 -> 0 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/.DS_Store
 delete mode 100644 src/nhflotools/.DS_Store

diff --git a/src/.DS_Store b/src/.DS_Store
deleted file mode 100644
index 148ece91c2a9f0147c678ee905a90bd71783cce1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKQA+|r5S~@byCm?Tz{i7Lg)~HxUJkQA&_z8|*j;CNblrw`=7R|LTz{xv((h?z
zcNIf>iwK#4nQz>g*<-$iT?YU}cN{hWssLc25*BPUYlPyYD^f5kMMR<YkPw0o8Tetk
zlFbYMkpVh88xk181H`a&e)n-8WsJuPJ4@jVQzRy)!6bbNVi{okBY26UG;1_IMWI;Q
z+AdpVt76@HLpkv>KO3iAe{@a1E^xzO?)$-IG??_Od#5tV{3scWR6!IDFy-bdN<um5
z%5f5=s@Bs1%eL%(wKko$JI%V&Y9G()&a`vXAip)6+1CES;mLXbDSl4mo90_k4<x%9
z7WN&zwY|N1qa>EeBgW^oiz0TI0cL<1*aQa5<z`hjp&9Oh8DIu}l>xdR98^NrV6IVJ
z9ca`DfLOw1CFs*EK{bv+*I=#@BPc?bBI;6MMhv0LaosU_uEAWRE(c+T4`IG6%!DG;
z*KvPGrGs!avSkLCf$t2^?1xgN_y6qY`ClcmVFs9i&1687dS0)Ky_vUlrAvBi9n?Ei
q63WXpE=tf*Pci1wQ(Qw;g6j@Bh_1n0Bd(zE9|1)J8)o278TbOuomGSY

diff --git a/src/nhflotools/.DS_Store b/src/nhflotools/.DS_Store
deleted file mode 100644
index db115a1b9a6f4cb11c5ef34cc93e9815f2c966ea..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKJ5EC}5S)b+5ouCV`U-FYD@snl1^9&Si69~r(Y=auaWrN>1<`{pG|{ZI9(%oG
z%Tv643&2+I+Z$jBU`}_$rw>!}efNo7RK$pMp7DSWJmU>}+)lE84><P<uQ=cZ!%zNp
z|1>=Ac7rF#q<|EV0#ZN<NP&|nP^ERgIN6ChND4@Sb5X#*4~_2F3&+Ixba03kfH-3~
zjPvLvh|L4UUN|N)LbIe2lWNssSkf79mDdZ$#H7Qj`LMd#szb53o#(emhxJ5_Qa}nE
zD{z_1rPu!r{g3|tn52~ykOF6=fX&u->lL3=wRQ41ueFVSPxqWJx*O*~;SlAR80DA?
fFUQxBlzGkP-0y{BV$c~6I#E9Zu8T|x{I>$%knI&%


From e6cbdee830c3fdaed6e510fd140a98a9c792029b Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sun, 15 Dec 2024 20:00:33 +0100
Subject: [PATCH 07/12] Krieging -> kriging

---
 src/nhflotools/pwnlayers2/layers.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index b5f5461..36de30d 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -62,7 +62,7 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
         data[f"{name}_transition"] = np.zeros(ds_regis.sizes["icell2d"], dtype=bool)
         data[f"{name}_transition"][ids_trans] = True
 
-        # Interpolate thickness points using Krieging
+        # Interpolate thickness points using Kriging
         fp_pts = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_interpolation_points.geojson"
         gdf_pts = gpd.read_file(fp_pts)
         ok = pykrige.ok.OrdinaryKriging(
@@ -75,13 +75,13 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
         )
         xq = ds_regis.x.values[~data[f"{name}_mask"]]
         yq = ds_regis.y.values[~data[f"{name}_mask"]]
-        krieging_result = ok.execute("points", xq, yq)
+        kriging_result = ok.execute("points", xq, yq)
         data[f"D{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"D{name}_value"][~data[f"{name}_mask"]] = krieging_result[0]
+        data[f"D{name}_value"][~data[f"{name}_mask"]] = kriging_result[0]
         data[f"D{name}_value_unc"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"D{name}_value_unc"][~data[f"{name}_mask"]] = krieging_result[1]
+        data[f"D{name}_value_unc"][~data[f"{name}_mask"]] = kriging_result[1]
 
-        # Interpolate top aquitard points using Krieging
+        # Interpolate top aquitard points using Kriging
         fp_pts = data_dir / "top_aquitard" / f"T{name}" / f"T{name}_interpolation_points.geojson"
         gdf_pts = gpd.read_file(fp_pts)
         ok = pykrige.ok.OrdinaryKriging(
@@ -92,10 +92,10 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
             verbose=False,
             enable_plotting=False,
         )
-        krieging_result = ok.execute("points", xq, yq)
+        kriging_result = ok.execute("points", xq, yq)
         data[f"T{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"T{name}_value"][~data[f"D{name}_mask"]] = krieging_result[0]
+        data[f"T{name}_value"][~data[f"D{name}_mask"]] = kriging_result[0]
         data[f"T{name}_value_unc"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"T{name}_value_unc"][~data[f"D{name}_mask"]] = krieging_result[1]
+        data[f"T{name}_value_unc"][~data[f"D{name}_mask"]] = kriging_result[1]
     return data
 

From 0d487d9c4dfae7ee4ae35c800ef9427a154fb153 Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sun, 15 Dec 2024 21:01:13 +0100
Subject: [PATCH 08/12] Pipe kriging verbose print statements to logger

---
 src/nhflotools/pwnlayers2/layers.py | 51 +++++++++++++++--------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index 36de30d..994a71a 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -1,5 +1,6 @@
 """Module containing functions to retrieve PWN bodemlagen."""
 
+from contextlib import redirect_stdout
 import logging
 from pathlib import Path
 
@@ -65,37 +66,37 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
         # Interpolate thickness points using Kriging
         fp_pts = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_interpolation_points.geojson"
         gdf_pts = gpd.read_file(fp_pts)
-        ok = pykrige.ok.OrdinaryKriging(
-            gdf_pts.geometry.x.values,
-            gdf_pts.geometry.y.values,
-            gdf_pts.value.values,
-            variogram_model="linear",
-            verbose=False,
-            enable_plotting=False,
-        )
+
+        with redirect_stdout(logging.StreamHandler(logger)):
+            ok = pykrige.ok.OrdinaryKriging(
+                gdf_pts.geometry.x.values,
+                gdf_pts.geometry.y.values,
+                gdf_pts.value.values,
+                variogram_model="linear",
+                verbose=logger.level <= logging.DEBUG,
+                enable_plotting=logger.level <= logging.DEBUG,
+            )
         xq = ds_regis.x.values[~data[f"{name}_mask"]]
         yq = ds_regis.y.values[~data[f"{name}_mask"]]
         kriging_result = ok.execute("points", xq, yq)
-        data[f"D{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"D{name}_value"][~data[f"{name}_mask"]] = kriging_result[0]
-        data[f"D{name}_value_unc"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"D{name}_value_unc"][~data[f"{name}_mask"]] = kriging_result[1]
+        data[f"D{name}_value"] = np.where(~data[f"{name}_mask"], kriging_result[0], 0.)
+        data[f"D{name}_value_unc"] = np.where(~data[f"{name}_mask"], kriging_result[1], np.nan)
 
         # Interpolate top aquitard points using Kriging
         fp_pts = data_dir / "top_aquitard" / f"T{name}" / f"T{name}_interpolation_points.geojson"
         gdf_pts = gpd.read_file(fp_pts)
-        ok = pykrige.ok.OrdinaryKriging(
-            gdf_pts.geometry.x.values,
-            gdf_pts.geometry.y.values,
-            gdf_pts.value.values,
-            variogram_model="linear",
-            verbose=False,
-            enable_plotting=False,
-        )
+
+        with redirect_stdout(logging.StreamHandler(logger)):
+            ok = pykrige.ok.OrdinaryKriging(
+                gdf_pts.geometry.x.values,
+                gdf_pts.geometry.y.values,
+                gdf_pts.value.values,
+                variogram_model="linear",
+                verbose=logger.level <= logging.DEBUG,
+                enable_plotting=logger.level <= logging.DEBUG,
+            )
         kriging_result = ok.execute("points", xq, yq)
-        data[f"T{name}_value"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"T{name}_value"][~data[f"D{name}_mask"]] = kriging_result[0]
-        data[f"T{name}_value_unc"] = np.zeros(ds_regis.sizes["icell2d"])
-        data[f"T{name}_value_unc"][~data[f"D{name}_mask"]] = kriging_result[1]
-    return data
+        data[f"T{name}_value"] = np.where(~data[f"{name}_mask"], kriging_result[0], np.nan)
+        data[f"T{name}_value_unc"] = np.where(~data[f"{name}_mask"], kriging_result[1], np.nan)
 
+    return data

From 893ea1516e07e5b6c178e8c2f6c10cfdd28c9361 Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Sun, 15 Dec 2024 21:05:47 +0100
Subject: [PATCH 09/12] Refactor aquitard data interpolation logging and
 verbosity handling

---
 src/nhflotools/pwnlayers2/layers.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index 994a71a..4a68206 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -1,7 +1,7 @@
 """Module containing functions to retrieve PWN bodemlagen."""
 
-from contextlib import redirect_stdout
 import logging
+from contextlib import redirect_stdout
 from pathlib import Path
 
 import geopandas as gpd
@@ -41,12 +41,14 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
     dict
         A dictionary containing the interpolated values of the aquitard layers.
     """
+    verbose = logger.level <= logging.DEBUG
+
     layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"]
     data = {}
 
     for name in layer_names:
         # Compute where the layer is _not_ present
-        logger.info(f"Interpolating aquitard layer {name} data and its transition zone")
+        logger.info("Interpolating aquitard layer %s data and its transition zone", name)
         fp_mask = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask_combined.geojson"
         gdf_mask = gpd.read_file(fp_mask)
 
@@ -73,8 +75,8 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
                 gdf_pts.geometry.y.values,
                 gdf_pts.value.values,
                 variogram_model="linear",
-                verbose=logger.level <= logging.DEBUG,
-                enable_plotting=logger.level <= logging.DEBUG,
+                verbose=verbose,
+                enable_plotting=verbose,
             )
         xq = ds_regis.x.values[~data[f"{name}_mask"]]
         yq = ds_regis.y.values[~data[f"{name}_mask"]]
@@ -92,8 +94,8 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
                 gdf_pts.geometry.y.values,
                 gdf_pts.value.values,
                 variogram_model="linear",
-                verbose=logger.level <= logging.DEBUG,
-                enable_plotting=logger.level <= logging.DEBUG,
+                verbose=verbose,
+                enable_plotting=verbose,
             )
         kriging_result = ok.execute("points", xq, yq)
         data[f"T{name}_value"] = np.where(~data[f"{name}_mask"], kriging_result[0], np.nan)

From b1fe7aa186a27af01c32f2318b75f28e6dcf54fe Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Wed, 18 Dec 2024 10:10:14 +0100
Subject: [PATCH 10/12] Flexibility towards getting your grid from ds_regis,
 ix, modelgrid

---
 src/nhflotools/pwnlayers2/layers.py | 74 ++++++++++++++++-------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index 4a68206..e9b185d 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -1,20 +1,21 @@
 """Module containing functions to retrieve PWN bodemlagen."""
 
 import logging
-from contextlib import redirect_stdout
 from pathlib import Path
 
 import geopandas as gpd
 import numpy as np
 import pykrige.ok
 import xarray as xr
+from flopy.discretization.vertexgrid import VertexGrid
 from flopy.utils.gridintersect import GridIntersect
+from nlmod.dims.grid import modelgrid_from_ds
 from shapely.ops import unary_union
 
 logger = logging.getLogger(__name__)
 
 
-def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersect, transition_length: float) -> dict:
+def get_pwn_aquitard_data(ds_regis: xr.Dataset, ix: GridIntersect, modelgrid: VertexGrid, data_dir: Path, transition_length: float) -> dict:
     """
     Interpolate the thickness of the aquitard layers and the top of the aquitard layers using Kriging.
 
@@ -29,10 +30,12 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
     ----------
     ds_regis : xr.Dataset
         The REGIS modellayer that contains the vertex grid.
-    data_dir : Path
-        The directory containing the data. Contains folders `dikte_aquitard` and `top_aquitard`.
     ix : flopy.utils.GridIntersect
         The index of the model grid.
+    modelgrid : flopy.discretization.VertexGrid
+        The model grid.
+    data_dir : Path
+        The directory containing the data. Contains folders `dikte_aquitard` and `top_aquitard`.
     transition_length : float
         The length of the transition zone in meters.
 
@@ -43,6 +46,13 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
     """
     verbose = logger.level <= logging.DEBUG
 
+    if ix is None and modelgrid is None and ds_regis is not None:
+        modelgrid = modelgrid_from_ds(ds_regis)
+
+    if ix is None and modelgrid is not None:
+        ix = GridIntersect(modelgrid, method="vertex")
+
+    ncell = len(ix.mfgrid.cell2d)
     layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"]
     data = {}
 
@@ -54,7 +64,7 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
 
         multipolygon = unary_union(gdf_mask.geometry)
         ids = ix.intersect(multipolygon, contains_centroid=False, min_area_fraction=0.5).cellids.astype(int)
-        data[f"{name}_mask"] = np.zeros(ds_regis.sizes["icell2d"], dtype=bool)
+        data[f"{name}_mask"] = np.zeros(ncell, dtype=bool)
         data[f"{name}_mask"][ids] = True
 
         # Compute where the layer transitions to REGIS
@@ -62,43 +72,43 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, data_dir: Path, ix: GridIntersec
         ids_trans = ix.intersect(
             multipolygon_transition, contains_centroid=False, min_area_fraction=0.5
         ).cellids.astype(int)
-        data[f"{name}_transition"] = np.zeros(ds_regis.sizes["icell2d"], dtype=bool)
+        data[f"{name}_transition"] = np.zeros(ncell, dtype=bool)
         data[f"{name}_transition"][ids_trans] = True
 
         # Interpolate thickness points using Kriging
         fp_pts = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_interpolation_points.geojson"
         gdf_pts = gpd.read_file(fp_pts)
-
-        with redirect_stdout(logging.StreamHandler(logger)):
-            ok = pykrige.ok.OrdinaryKriging(
-                gdf_pts.geometry.x.values,
-                gdf_pts.geometry.y.values,
-                gdf_pts.value.values,
-                variogram_model="linear",
-                verbose=verbose,
-                enable_plotting=verbose,
-            )
-        xq = ds_regis.x.values[~data[f"{name}_mask"]]
-        yq = ds_regis.y.values[~data[f"{name}_mask"]]
+        ok = pykrige.ok.OrdinaryKriging(
+            gdf_pts.geometry.x.values,
+            gdf_pts.geometry.y.values,
+            gdf_pts.value.values,
+            variogram_model="linear",
+            verbose=verbose,
+            enable_plotting=False,
+        )
+        xq = ix.mfgrid.xcellcenters[~data[f"{name}_mask"]]
+        yq = ix.mfgrid.ycellcenters[~data[f"{name}_mask"]]
         kriging_result = ok.execute("points", xq, yq)
-        data[f"D{name}_value"] = np.where(~data[f"{name}_mask"], kriging_result[0], 0.)
-        data[f"D{name}_value_unc"] = np.where(~data[f"{name}_mask"], kriging_result[1], np.nan)
+        data[f"D{name}_value"] = np.zeros(ncell)
+        data[f"D{name}_value"][~data[f"{name}_mask"]] = kriging_result[0]
+        data[f"D{name}_value_unc"] = np.zeros(ncell)
+        data[f"D{name}_value_unc"][~data[f"{name}_mask"]] = kriging_result[1]
 
         # Interpolate top aquitard points using Kriging
         fp_pts = data_dir / "top_aquitard" / f"T{name}" / f"T{name}_interpolation_points.geojson"
         gdf_pts = gpd.read_file(fp_pts)
-
-        with redirect_stdout(logging.StreamHandler(logger)):
-            ok = pykrige.ok.OrdinaryKriging(
-                gdf_pts.geometry.x.values,
-                gdf_pts.geometry.y.values,
-                gdf_pts.value.values,
-                variogram_model="linear",
-                verbose=verbose,
-                enable_plotting=verbose,
-            )
+        ok = pykrige.ok.OrdinaryKriging(
+            gdf_pts.geometry.x.values,
+            gdf_pts.geometry.y.values,
+            gdf_pts.value.values,
+            variogram_model="linear",
+            verbose=verbose,
+            enable_plotting=False,
+        )
         kriging_result = ok.execute("points", xq, yq)
-        data[f"T{name}_value"] = np.where(~data[f"{name}_mask"], kriging_result[0], np.nan)
-        data[f"T{name}_value_unc"] = np.where(~data[f"{name}_mask"], kriging_result[1], np.nan)
+        data[f"T{name}_value"] = np.zeros(ncell)
+        data[f"T{name}_value"][~data[f"{name}_mask"]] = kriging_result[0]
+        data[f"T{name}_value_unc"] = np.zeros(ncell)
+        data[f"T{name}_value_unc"][~data[f"{name}_mask"]] = kriging_result[1]
 
     return data

From 9c85653fde0a07893a38e3a0904f1519337ca87a Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Wed, 18 Dec 2024 10:12:16 +0100
Subject: [PATCH 11/12] Rename parameter ds_regis to ds in
 get_pwn_aquitard_data function for clarity

---
 src/nhflotools/pwnlayers2/layers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/nhflotools/pwnlayers2/layers.py b/src/nhflotools/pwnlayers2/layers.py
index e9b185d..32ae454 100644
--- a/src/nhflotools/pwnlayers2/layers.py
+++ b/src/nhflotools/pwnlayers2/layers.py
@@ -15,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 
-def get_pwn_aquitard_data(ds_regis: xr.Dataset, ix: GridIntersect, modelgrid: VertexGrid, data_dir: Path, transition_length: float) -> dict:
+def get_pwn_aquitard_data(ds: xr.Dataset, ix: GridIntersect, modelgrid: VertexGrid, data_dir: Path, transition_length: float) -> dict:
     """
     Interpolate the thickness of the aquitard layers and the top of the aquitard layers using Kriging.
 
@@ -28,8 +28,8 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, ix: GridIntersect, modelgrid: Ve
 
     Parameters
     ----------
-    ds_regis : xr.Dataset
-        The REGIS modellayer that contains the vertex grid.
+    ds : xr.Dataset
+        The model dataset that contains the vertex grid information.
     ix : flopy.utils.GridIntersect
         The index of the model grid.
     modelgrid : flopy.discretization.VertexGrid
@@ -46,8 +46,8 @@ def get_pwn_aquitard_data(ds_regis: xr.Dataset, ix: GridIntersect, modelgrid: Ve
     """
     verbose = logger.level <= logging.DEBUG
 
-    if ix is None and modelgrid is None and ds_regis is not None:
-        modelgrid = modelgrid_from_ds(ds_regis)
+    if ix is None and modelgrid is None and ds is not None:
+        modelgrid = modelgrid_from_ds(ds)
 
     if ix is None and modelgrid is not None:
         ix = GridIntersect(modelgrid, method="vertex")

From d784cb586c5ba64076e52016663d6deb5ae40c76 Mon Sep 17 00:00:00 2001
From: Bas des Tombe <bdestombe@gmail.com>
Date: Fri, 27 Dec 2024 09:50:31 +0100
Subject: [PATCH 12/12] Fix top required in cache2d scripts

---
 .gitignore                     |  2 +-
 src/nhflotools/pwnlayers/io.py | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3f357c9..d82364a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -133,6 +133,6 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
-
 bin/
 *.code-workspace
+**/model_ws
diff --git a/src/nhflotools/pwnlayers/io.py b/src/nhflotools/pwnlayers/io.py
index 0afa8dd..0446bc0 100644
--- a/src/nhflotools/pwnlayers/io.py
+++ b/src/nhflotools/pwnlayers/io.py
@@ -54,7 +54,7 @@ def read_pwn_data2(
     ds_mask_transition : xarray Dataset
         mask dataset. True in transition zone.
     """
-    modelgrid = nlmod.dims.grid.modelgrid_from_ds(ds)
+    modelgrid = nlmod.dims.grid.modelgrid_from_ds(ds, rotated=False)
     ix = GridIntersect(modelgrid, method="vertex")
 
     ds_out = xr.Dataset(
@@ -163,7 +163,7 @@ def read_pwn_data2(
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_bergen_basis_aquitards(
     ds,
     pathname=None,
@@ -271,7 +271,7 @@ def _read_bergen_basis_aquitards(
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_bergen_c_aquitards(ds, pathname, length_transition=100.0, ix=None):
     """Read vertical resistance of layers.
 
@@ -313,7 +313,7 @@ def _read_bergen_c_aquitards(ds, pathname, length_transition=100.0, ix=None):
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_bergen_thickness_aquitards(
     ds,
     pathname=None,
@@ -419,7 +419,7 @@ def _read_bergen_thickness_aquitards(
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_top_of_aquitards(ds, pathname, length_transition=100.0, ix=None):
     """Read top of aquitards.
 
@@ -461,7 +461,7 @@ def _read_top_of_aquitards(ds, pathname, length_transition=100.0, ix=None):
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_thickness_of_aquitards(ds, pathname, length_transition=100.0, ix=None):
     """Read thickness of aquitards.
 
@@ -505,7 +505,7 @@ def _read_thickness_of_aquitards(ds, pathname, length_transition=100.0, ix=None)
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_kd_of_aquitards(ds, pathname, length_transition=100.0, ix=None):
     """Read kd of aquitards.
 
@@ -546,7 +546,7 @@ def _read_kd_of_aquitards(ds, pathname, length_transition=100.0, ix=None):
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_mask_of_aquifers(ds, pathname, length_transition=100.0, ix=None):
     """Read mask of aquifers.
 
@@ -593,7 +593,7 @@ def _read_mask_of_aquifers(ds, pathname, length_transition=100.0, ix=None):
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_layer_kh(ds, pathname, length_transition=100.0, ix=None):
     """Read hydraulic conductivity of layers.
 
@@ -636,7 +636,7 @@ def _read_layer_kh(ds, pathname, length_transition=100.0, ix=None):
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_kv_area(ds, pathname, length_transition=100.0, ix=None):  # noqa: ARG001
     """Read vertical resistance of layers.
 
@@ -711,7 +711,7 @@ def _read_kv_area(ds, pathname, length_transition=100.0, ix=None):  # noqa: ARG0
     return ds_out
 
 
-@cache.cache_netcdf(coords_2d=True)
+@cache.cache_netcdf(datavars=["top"], coords_2d=True)
 def _read_topsysteem(ds, pathname):
     """Read topsysteem.