From a9cfc7a47517c391832308438978c7f98fabc1aa Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 15 Dec 2024 16:36:49 +0100 Subject: [PATCH] Add the functions to create the GIS files --- .../v1.0.0/interpolate_layer_boundaries.py | 255 +++++++ .../v1.0.0/interpolation_helper_functions.py | 635 ++++++++++++++++++ .../bodemlagen_pwn_2024/v1.0.0/merge_masks.py | 35 + 3 files changed, 925 insertions(+) create mode 100644 src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolate_layer_boundaries.py create mode 100644 src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolation_helper_functions.py create mode 100644 src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/merge_masks.py diff --git a/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolate_layer_boundaries.py b/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolate_layer_boundaries.py new file mode 100644 index 0000000..6dc80d2 --- /dev/null +++ b/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolate_layer_boundaries.py @@ -0,0 +1,255 @@ +import os + +import geopandas as gpd +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import xarray as xr +from nhflotools.pwnlayers2.prepare_data.interpolation_helper_functions import ( + CRS_RD, + get_point_values, + interpolate_gdf, + polyline_from_points, +) + +try: + import pyvista as pv +except ImportError as e: + msg = "pyvista is not installed. Please install it to run this script." + raise ImportError(msg) from e + +# Define the interpolation grid (to be replaced with the model grid in NHFLO) +xmin, ymin = 95000, 496000 +xmax, ymax = 115000, 533000 +dx = 100.0 +xi = np.arange(xmin, xmax + dx, dx) +yi = np.arange(ymin, ymax + dx, dx) +X, Y = np.meshgrid(xi, yi) + +# Create a GeoDataFrame with the points of the interpolation +# grid. The values are set to zero, which get used as helper +# points in the interpolation of the thickness in the areas +# where the layer is reported as absent by Kosten (1997) + +pts = gpd.points_from_xy(X.ravel(), Y.ravel()) +gdf_pt = gpd.GeoDataFrame( + geometry=pts, + data={"value": [0] * len(pts)}, + crs=CRS_RD, +) + +# Names of the layers to be interpolated +layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"] + +# Define colours for the 3D plot +cmap_t = mpl.colormaps["Blues"] +colors_t = cmap_t(np.linspace(0.5, 1, len(layer_names))) + +cmap_b = mpl.colormaps["Oranges"] +colors_b = cmap_b(np.linspace(0.5, 1, len(layer_names))) + +fig, ax = plt.subplots() +# cf = plt.contourf(X, Y, zint_t.reshape(X.shape)) +# plt.colorbar(mappable=cf) +# plt.plot(x, y,'o', ms=3, mec='w', mfc='none' ) +# plt.axis('square') +# plt.show() + +# Define a line for creating a cross section that shows the +# projection of the layer top/bottoms along a line. +ln_xs = polyline_from_points( + np.array([ + [(xmin + xmax) / 2, ymin, 0], + [(xmin + xmax) / 2, ymax, 0], + [(xmin + xmax) / 2, ymax, -15000], + [(xmin + xmax) / 2, ymin, -15000], + ]) +) + +# Create a plotter instance for the 3D plot +plotter = pv.Plotter() +# Same for the cross section +plotter_xs = pv.Plotter() + +overlap = np.zeros(X.shape) + +da = xr.DataArray( + data=overlap, + dims=["lat", "lon"], + coords={ + "lat": yi, + "lon": xi, + }, +) + +# Load the polygon to fill the nans below the North Sea with nearest neighbour interpolation values +fpath_shp = os.path.join(__file__, "..", "noordzee_clip", "noordzee_clip.shp") +gdf_ns = gpd.read_file(fpath_shp) + +# Create a list with the names of the subfolders where the interpolation result will be stored +subdirs = ["top_aquitard", "dikte_aquitard", "bot_aquitard"] + +# Loop over the layers +fpath_gpkg = os.path.join(__file__, "..", "interpolation_points.gpkg") +for c, layer_name in enumerate(layer_names): + # Create GeoDataFrames with the data points of the top and thicknesses + gdf_t = get_point_values(f"T{layer_name}") + # gdf_t.set_crs(CRS_RD) + gdf_d = get_point_values(f"D{layer_name}") + # gdf_d.set_crs(CRS_RD) + + # Read the polygons that indicate the absence of a layer (0.01 m polygons in the Koster (1997) shapefiles) + fpath_shp = os.path.join(__file__, "..", "dikte_aquitard", f"D{layer_name}", f"D{layer_name}_mask.shp") + gdf_msk = gpd.read_file(fpath_shp) + gdf_msk = gdf_msk[["geometry", "VALUE"]] + gdf_msk = gdf_msk.rename(columns={"VALUE": "value"}) + gdf_within = gpd.sjoin(gdf_pt, gdf_msk, predicate="within") + gdf_d = pd.concat([gdf_d, gdf_within[["geometry", "value_left"]]]) + + gdf_t = gdf_t.drop_duplicates() + gdf_d = gdf_d.drop_duplicates() + + # Store the interpolation points (layer top) so that they can be visualised in QGIS + # Experimental, commented out for the time being + # gdf_t.to_file( + # fpath_gpkg, + # driver="GPKG", + # mode="a", + # layer=layer_name, + # ) + + # gdf_d.to_file( + # fpath_gpkg, + # driver="GPKG", + # mode="a", + # layer=layer_name, + # ) + + # # Store the interpolation points (layer top) so that they can be visualised in QGIS + fpath_shp = os.path.join( + __file__, + "..", + "top_aquitard", + f"T{layer_name}", + f"T{layer_name}_interpolation_points.shp", + ) + gdf_t.set_crs(CRS_RD) + # gdf_t.to_file(fpath_shp) + + # # Store the interpolation points (layer thickness) so that they can be visualised in QGIS + fpath_shp = os.path.join( + __file__, + "..", + "dikte_aquitard", + f"D{layer_name}", + f"D{layer_name}_interpolation_points.shp", + ) + gdf_d.set_crs(CRS_RD) + # gdf_d.to_file(fpath_shp) + + # Interpolate the top + zint_t = interpolate_gdf(gdf_pt, gdf_t, gdf_ns=gdf_ns) + # Interpolate the thickness + zint_d = interpolate_gdf(gdf_pt, gdf_d, gdf_ns=gdf_ns, gdf_msk=gdf_msk) + + # Check if a mask exists for the Bergen area + fpath_shp = os.path.join( + __file__, + "..", + "dikte_aquitard", + f"D{layer_name}", + f"D{layer_name}_mask_bergen_area.shp", + ) + if os.path.isfile(fpath_shp): + # Read the shapefile + gdf_msk_bergen = gpd.read_file(fpath_shp) + # Check which grid points are within the clipping polygons + gdf_within = gpd.sjoin(gdf_pt, gdf_msk_bergen, predicate="within") + # Convert their indices to a list + idx_msk = gdf_within.index.to_list() + # Set the interpolated values to NaN + zint_t[idx_msk] = np.nan + zint_d[idx_msk] = np.nan + + # Calculate the layer bottom using the interpolated values + zint_b = zint_t - zint_d + + # Store the interpolated values for visualization in QGIS + for subdir, zint in zip(subdirs, [zint_t, zint_d, zint_b], strict=False): + da.values = zint.reshape(X.shape) + fstem = f"{subdir[0].capitalize()}{layer_name}" + fpath = os.path.join("..", "gis", "kaarten_2024_geinterpoleerd", subdir, fstem) + os.makedirs(fpath, exist_ok=True) + fpath = os.path.join(fpath, f"{fstem}.nc") + da.to_netcdf(fpath) + + # Determine the areas where the bottom of a layer is below the top of the underlying layer + if c > 0: + dz = zint_b0 - zint_t # Note that zint_b0 is not defined until the first layer has been processed # noqa: F821 + dz[dz > 0] = np.nan + da.values = dz.reshape(X.shape) + + fpath = os.path.join( + "..", "gis", "kaarten_2024_geinterpoleerd", "overlap", f"overlap_{layer_names[c - 1]}_{layer_name}.nc" + ) + da.to_netcdf(fpath) + + zint_b0 = zint_b # Store the bottom of the current layer for comparison with the top of the next layer + # zint_t[np.isnan(zint_t)] = 0 + # zint_b[np.isnan(zint_b)] = 0 + + # Add the top to the 3D plot + grid_t = pv.StructuredGrid(X, Y, zint_t.reshape(X.shape) * 100) + for i in np.where(np.isnan(zint_t))[0]: + grid_t.BlankPoint(i) + + plotter.add_mesh( + grid_t, + color=colors_t[c], + style="surface", + show_edges=False, + nan_opacity=0, + # scalars=grid.points[:, -1], + # scalar_bar_args={'vertical': True}, + ) + + # Add the top to the 3D cross section with the projected top and bottom elevations + line_slice_t = grid_t.slice_along_line(ln_xs) + plotter_xs.add_mesh( + line_slice_t, + line_width=1, + render_lines_as_tubes=False, + color=colors_t[c], + ) + + # Add the bottom to the 3D plot + grid_b = pv.StructuredGrid(X, Y, zint_b.reshape(X.shape) * 100) + for i in np.where(np.isnan(zint_b))[0]: + grid_b.BlankPoint(i) + + plotter.add_mesh( + grid_b, + color=colors_b[c], + style="surface", + show_edges=False, + nan_opacity=0, + # scalars=grid.points[:, -1], + # scalar_bar_args={'vertical': True}, + ) + + # Add the bottom to the 3D cross section with the projected top and bottom elevations + line_slice_b = grid_b.slice_along_line(ln_xs) + plotter_xs.add_mesh( + line_slice_b, + line_width=1, + render_lines_as_tubes=False, + color=colors_b[c], + ) + +# Activate the 3D plots +plotter.show_grid() +plotter.show() + +plotter_xs.add_mesh(ln_xs, line_width=1, color="grey") +plotter_xs.show() diff --git a/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolation_helper_functions.py b/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolation_helper_functions.py new file mode 100644 index 0000000..ac11feb --- /dev/null +++ b/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/interpolation_helper_functions.py @@ -0,0 +1,635 @@ +from pathlib import Path + +import geopandas as gpd +import numpy as np +import pandas as pd +import pyvista as pv +from scipy.interpolate import griddata + +# Default CRS, Amersfoort RD +CRS_RD = 28992 + +# These dictonaries map the polygon values in the hnflo data +# version of the Koster shapefiles to values for the contour +# lines. The value that is assigned to the contour line is the +# value that occurs in both lists that correspond to a polygon +# value. So for example, a line that borders a polygon with +# -1.50 and 0 for TS11 will be assigned a value of -0.50 m. +LEGEND_DICTS = { + "TS11": { + -3.75: [-2.5], + -1.50: [-2.50, -0.50], + 0: [-0.50, 0.50], + 1.50: [0.50, 2.50], + 3.75: [2.50, 5.00], + 5.50: [5.00], + }, + "DS11": { + 0.13: [0.25], + 0.38: [0.25, 0.50], + 0.75: [0.50, 1.00], + 1.50: [1.00, 2.00], + 2.50: [2.00, 3.00], + 4.00: [3.00, 5.00], + 6.00: [5.00, 7.50], + 7.75: [7.50], + }, + "TS12": { + 0.5: [0.00], + -2.50: [-5.00, 0.00], + -7.50: [-10.00, -5.00], + -12.50: [-15.00, -10.00], + -17.50: [-15.00], + }, + "DS12": { + 0.75: [1.00], + 2.00: [1.00, 3.00], + 4.00: [3.00, 5.00], + 7.50: [5.00, 10.00], + 12.50: [10.00, 15.00], + 17.50: [15.00, 20.00], + 22.50: [20.00, 25.00], + 27.50: [25.00], + }, + "TS13": { + -12.50: [-15.00], + -17.50: [-20.00, -15.00], + -22.50: [-25.00, -20.00], + -27.50: [-30.00, -25.00], + -32.50: [-35.00, -30.00], + -36.00: [-35.00], + }, + "DS13": { + 0.25: [0.50], + 0.75: [0.50, 1.00], + 2.00: [1.00, 3.00], + 4.00: [3.00, 5.00], + 7.50: [5.00, 10.00], + 12.50: [10.00, 15.00], + 17.50: [15.00, 20.00], + 22.50: [20.00, 25.00], + 27.50: [25.00], + }, + "TS21": { + -25.00: [-30.00, -20.00], + -35.00: [-40.00, -30.00], + -45.00: [-50.00, -40.00], + -55.00: [-60.00, -50.00], + -65.00: [-60.00], + }, + "DS21": { + 2.00: [3.00], + 4.00: [3.00, 5.00], + 7.50: [5.00, 10.00], + 12.50: [10.00, 15.00], + 17.50: [15.00, 20.00], + 22.50: [20.00, 25.00], + 27.50: [25.00, 30.00], + 32.50: [30.00, 35.00], + 37.50: [35.00, 40.00], + 42.50: [40.00], + }, + "TS22": { + -25.00: [-30.00], + -35.00: [-40.00, -30.00], + -45.00: [-50.00, -40.00], + -55.00: [-60.00, -50.00], + -65.00: [-70.00, -60.00], + -75.00: [-80.00, -70.00], + -85.00: [-90.00, -80.00], + -95.00: [-90.00], + }, + "DS22": { + 2.50: [5.00], + 7.50: [5.00, 10.00], + 12.50: [10.00, 15.00], + 17.50: [15.00, 20.00], + 22.50: [20.00, 25.00], + 27.50: [25.00, 30.00], + 32.50: [30.00, 35.00], + 37.50: [35.00, 40.00], + 42.50: [40.00, 45.00], + 47.50: [45.00, 50.00], + 52.50: [50.00], + }, + "TS31": { + -45.00: [-50.00], + -55.00: [-60.00, -50.00], + -65.00: [-70.00, -60.00], + -75.00: [-80.00, -70.00], + -85.00: [-90.00, -80.00], + -95.00: [-100.0, -90.00], + -105.0: [-100.0], + }, + "DS31": { + 0.25: [0.50], + 0.75: [0.50, 1.00], + 2.00: [1.00, 3.00], + 4.00: [3.00, 5.00], + 7.50: [5.00, 10.00], + 12.50: [10.00, 15.00], + 17.50: [15.00], + }, + "TS32": { + -77.50: [-80.00], + -82.50: [-85.00, -80.00], + -87.50: [-90.00, -85.00], + -92.50: [-95.00 - 90.00], + -97.50: [-100.0, -95.00], + -102.5: [-105.0, -100.00], + -107.5: [-110.0, -105.00], + -112.5: [-110.0], + }, + "DS32": { + 0.25: [0.50], + 0.75: [0.50, 1.00], + 2.00: [1.00, 3.00], + 4.00: [3.00, 5.00], + 7.50: [5.00, 10.00], + 12.50: [10.00, 15.00], + 17.50: [15.00], + }, +} + + +def get_internal_contour_lines(gdf_ln, gdf_pl): + """This function looks for linestrings in gdf_ln that do not + overlap with the polygon boundaries in gdf_pl. The purpose is + to separate the lines that represent a thickness from the lines + that represent the limit of occurrence of a layer. Only used + for thicknesses, not for the tops. + + Parameters + ---------- + gdf_ln : GeoDataFrame + GeoDataFrame containing the linestrings of the thickness + contours + gdf_pl : GeoDataFrame + GeoDataFrame with the polygons of the thickness + + Returns + ------- + GeoDataFrame + Returns gdf_ln without the linestrings that overlap with the + limit of occurrence + """ + # Select only the polygons which indicate the regions where the layer does not occur + idx = gdf_pl["VALUE"] == 0.01 + + # Create a new GeoDataFrame containing the polygon boundaries as (Multi)LineStrings + gdf_bnd = gpd.GeoDataFrame( + geometry=gdf_pl.loc[idx, "geometry"].boundary, + crs=CRS_RD, + ) + # Explode so that MultiLineStrings become LineStrings + gdf_bnd = gdf_bnd.explode() + # Create a buffer around the polygon boundaries because in rare cases there + # are minor differences between the line vertices and the polygon vertices + gdf_bnd["geometry"] = gdf_bnd["geometry"].buffer(2.0) + + # Do a spatial join to find out which linestrings in gdf_ln are within + # the polygons of gdf_bnd. Those that are, should not be returned by + # the function. + gdf_jn = gpd.sjoin(gdf_bnd, gdf_ln, how="left", predicate="contains") + # Use the index of gdf_ln to create an GeoSeries to slice gdf_ln with + idx = gdf_ln.index.isin(gdf_jn["index_right"]) + + # Return gdf_ln without the linestrings that overlap with the 0.01 m + # thickness polygon boundaries. + return gdf_ln.loc[~idx] + + +def assign_poly_values_to_linestrings( + gdf_ln, # GeoDataFrame with contour lines + gdf_pl, # GeoDataFrame with the Koster (1997) polygons + layer_name, # Name of the layer +): + """This function tries to identify which polygons in gdf_pl border + a contour line in gdf_ln. Ideally, a contour line forms the separation + between two polygons but this is not always the case due to topology + errors and the addition of polygons to the Koster (1997) shapefiles + from other sources (occurs mostly in the southern part of the area). + + Parameters + ---------- + gdf_ln : GeoDataFrame + GeoDataFrame containing the linestrings of the thickness + contours + gdf_pl : GeoDataFrame + GeoDataFrame with the polygons of the thickness + layer_name : str + Name of the layer being processed. This is needed to look up + the dictionary in LEGEND_DICTS that maps the Koster (1997) + legend entries to the top/thickness values assigned to the + polygons. + + Returns + ------- + GeoDataFrame + A GeoDataFrame with for each linestring the assigned value, + the number of bordering polygons found and any remarks. + """ + # Get the legend_dict for the current layer + legend_dict = LEGEND_DICTS[layer_name] + + # Check for polygons with VALUE attribute of 0.01 m, signals where layer is absent + # Only returns rows for DS files, no effect for TS files + idx = gdf_pl["VALUE"] == 0.01 + # Remove the 0.01 m polygons + gdf_pl = gdf_pl.loc[~idx] + # Renumber the index + gdf_pl = gdf_pl.reindex(index=range(len(gdf_pl))) + # Make geometries valid by using the buffer(0) trick + gdf_pl["geometry"] = gdf_pl.buffer(0) + + # Determine which linestrings in gdf_ln intersect which polygons in gdf_pl + gdf_int = gpd.sjoin(gdf_ln, gdf_pl, how="left", predicate="intersects") + + data = [] + # The index of gdf_int contains duplicates because most lines will + # intersect multiple polygons. + for i in gdf_int.index.unique(): + # Select the rows for the contour linestring and store in a separate + # GeoDataFrame + idx = gdf_int.index == i + gdf_i = gdf_int.loc[idx] + + # Determine the length of the DataFrame, i.e. the number of polygons that intersect the linestring + N = len(gdf_i) + + # Default remark for N == 2 + remark = "Value assigned automatically in Python script." + + # The linestring's geometry is the same for all rows of gdf_i. Only one is + # needed to build the GeoDataFrame returned by the function. + geom = gdf_i["geometry"].values[0] + + # Ideally each contour has a polygon to either side, so N == 2. This is not always the case, + # hence the need for these conditional statements + if N == 2: + # Get the VALUE attribute of each polygon + v0 = gdf_i["VALUE"].values[0] + v1 = gdf_i["VALUE"].values[1] + # Get the legend range from legend_dict + list0 = legend_dict.get(v0) + list1 = legend_dict.get(v1) + # The value may not correspond to a range in the original Koster (1997) + # as polygons from other sources were later added to the shapefiles. In + # that case (one of) the list(s) will be None. + if None in {list0, list1}: + # Do not assign a value to the linestring and change the remark that will + # appear in the shapefile attribute table. + v = [None] + remark = "Poly not in mapping dict. Assign value manually." + else: + # If both lists are not None then find the item they have in common. + # Ideally this is a single value. When this is not the case, no value + # is assigned and the remark is changed to reflect this problem. + v = list({item0 for item0 in list0 if item0 in list1}) + if len(v) != 1: + v = [None] + remark = f"Ambiguous result {len(v)}. Assign value manually." + # A linestring can intersect more than two polygons due to + # - overlapping polygons in the original shapefiles + # - the start- and end points of the contour lines touch (digitizing mistake) + # - when polygons from another data source were added to the original Koster (1997) polygons. + # In these cases N is larger than 2 and it cannot be determined + # automatically what the value for the linestring must be. + elif N > 2: + v = [None] + remark = "Line intersects more than 2 polygons. Assign value manually." + # print(gdf_i["index_right"].to_list()) + gdf_i["index_right"].dropna().astype(int).values + # A line can intersect no or a single polygon. This is most frequently the case for + # the lines that were added to the Koster (1997) linestrings for the Bergen area + # (based on the Stuyfzand figures). + elif N < 2: + v = [None] + remark = "Line intersects less than 2 polygons. Assign value manually." + + # Append one line to the data for each linestring in gdf_ln + data.append([*v, N, remark, geom]) + + # Return a GeoDataFrame of linestrings + return gpd.GeoDataFrame( + data=data, + columns=["value", "N", "remark", "geometry"], + crs=CRS_RD, + ) + + +def combine_lists(lists): + """Combine lists that share common items. + + Parameters + ---------- + lists : list + A list of lists to be analyzed + + Returns + ------- + list + A list in which the input lists with common items have + been combined. + """ + # Start by assuming each list is a separate group + groups = [set(lst) for lst in lists] + + merged = True + while merged: + merged = False + for i in range(len(groups)): + for j in range(i + 1, len(groups)): + # If two groups share any common elements, merge them + if groups[i].intersection(groups[j]): + groups[i] = groups[i].union(groups[j]) + groups.pop(j) + merged = True + break + if merged: + break + + # Convert sets back to lists for final output + return [list(group) for group in groups] + + +def join_contour_line_segments(gdf_ln): + """This function combines the individual linestrings that appear + in the original Koster (1997) contour line files into larger + linestrings. + + Parameters + ---------- + gdf_ln : GeoDataFrame + GeoDataFrame with individual linestrings. + + Returns + ------- + GeoDataFrame + GeoDataFrame with the comined linestrings. + """ + # Use a spatial join to identify the linstrings with touching endpoints + gdf_tch = gpd.sjoin(gdf_ln, gdf_ln, how="left", predicate="touches") + + # Create a dictionary for every line segment, which will store the + # index numbers of the linestrings that it touches + l_dict = {i: [] for i in gdf_ln.index} + # Loop through the index items of gdf_tch + for i0 in gdf_tch.index: + # Get the rows for the current line segment + idx = gdf_tch.index == i0 + # Get the index numbers of the touching linestrings + irs = gdf_tch.loc[idx, "index_right"] + irs = irs.dropna() + # Combine the linestring index number with the + # index numbers of the touchings linestrings into a single list + i_list = [i0, *irs.astype(int).tolist()] + # Update each item in the l_dict dictionary by adding i_list + for i1 in i_list: + l_dict[i1] += i_list + + # Combine the linestring segments that form a single contour line. This + # will result in a nested list in which each item is a list containing + # the index numbers of the line segments that together form a contour line + unique_lines = combine_lists(list(l_dict.values())) + + # Loop through the list with the index numbers of the segments + # and use these to create single contour lines + lns = [] + ln_vals = [] + for idx in unique_lines: + if len(idx) == 0: + pass + else: + # Combine the line segments into a single linstring + lns.append(gdf_ln.loc[idx, "geometry"].union_all()) + # Each line segment has a top/thickness value associated + # with it. Ideally they are all the same but this is not + # guaranteed. The next two lines select the most frequently + # occurring value, which will be used as the value attribute + # in the GeoDataFrame that will be returned. + vals_lst = gdf_ln.loc[idx, "value"].to_list() + v = max(vals_lst, key=vals_lst.count) + ln_vals.append(v) + + # Return a GeoDataFrame with the combined linestrings and their + # top/thickness value. + return gpd.GeoDataFrame( + geometry=lns, + data={ + "script_value": ln_vals, + "value": ln_vals, + }, + crs=CRS_RD, + ) + + +def get_point_values(layer_name): + """This function is called from interpolate_layer_boundaries.py to convert + line segments to points and combine it with point data from the geo_daw + data (top/thickness values for borehole interpreted by Koster, 1997) and + the point values for the Bergen area (digitized from the figures in the + Stuyfzand, 1987 report). + + Parameters + ---------- + layer_name : str + Name of the layer. + + Returns + ------- + GeoDataFrame + GeoDataFrame with points and their corresponding top/thickness values + """ + # Folder with the contour lines + src_dir = Path("..", "gis", "kaarten_2024_voor_interpolatie") + # Set the paths to the files to be read + if layer_name.find("T") == 0: + fpath_shp = Path(src_dir, "top_aquitard", layer_name, f"{layer_name}_union_with_values_edited.shp") + fpath_shp_ber = Path(src_dir, "top_aquitard", layer_name, f"{layer_name}_bergen_points.shp") + elif layer_name.find("D") == 0: + fpath_shp = Path(src_dir, "dikte_aquitard", layer_name, f"{layer_name}_union_with_values_edited.shp") + fpath_shp_ber = Path(src_dir, "dikte_aquitard", layer_name, f"{layer_name}_bergen_points.shp") + + # Import the contour lines + gdf_ln = gpd.read_file(fpath_shp) + # Convert any multilinestrings to linestrings + gdf_ln = gdf_ln.explode() + # Add the line vertices as a list of coordinates to each row of the GeoDataFrame + gdf_ln["points"] = gdf_ln.apply(lambda x: list(x["geometry"].coords), axis=1) + + # Convert the coordinates to points and assign values + values = [] + pts = [] + # Loop through each row of gdf_ln + for _index, row in gdf_ln.iterrows(): + # Skip NULL values that can occur for Bergen -999 polygons + if np.isnan(row["value"]): + continue + # Get the coordinates created by the lambda function above and + # convert them to Point objects + xy_arr = np.array(row["points"]) + pts_i = list(gpd.points_from_xy(x=xy_arr[:, 0], y=xy_arr[:, 1])) + # Add to the list of existing poins + pts += pts_i + # Expand the list with values + values += [row["value"]] * len(pts_i) + + # Convert to a GeoDataFrame + gdf_pts = gpd.GeoDataFrame( + data=values, + geometry=pts, + columns=["value"], + crs=CRS_RD, + ) + + # Check if a shapefile with point data exists for the Bergen area + if fpath_shp_ber.exists() is True: + # Read the file + gdf_pts_ber = gpd.read_file(fpath_shp_ber) + # Discard columns other than 'VALUE' and 'geometry' + gdf_pts_ber = gdf_pts_ber[["geometry", "VALUE"]] + # Rename the 'VALUE' column to 'value' to be compatible with gdf_pts + gdf_pts_ber = gdf_pts_ber.rename(columns={"VALUE": "value"}) + # Add the Bergen points to gdf_pts + gdf_pts = pd.concat([gdf_pts, gdf_pts_ber]) + + # Read the point data for the boreholes + fpath_daw = Path("..", "gis", "koster_1997", "daw_bestanden", "daw_data_TS_DS", "daw_data_TS_DS.shp") + gdf_daw = gpd.read_file(fpath_daw) + # Select the column for the layer being processed + gdf_daw = gdf_daw[[layer_name, "geometry"]].dropna() + # Rename the column from layer name to 'value' + gdf_daw = gdf_daw.rename(columns={layer_name: "value"}) + + # Add the points to gdf_pts + return pd.concat([gdf_pts, gdf_daw]) + + +def interpolate_gdf(gdf_pt, gdf, gdf_ns=None, gdf_msk=None): + """Interpolate the point values of a layer to a (model) grid. + + Parameters + ---------- + gdf : GeoDataFrame + GeoDataFrame with the points of the interpolation grid + gdf : GeoDataFrame + GeoDataFrame with the values to be interpolated + gdf_ns : GeoDataFrame, optional + GeoDataFrame with a polygon used to fill the grid below the North Sea + with nearest neighbour values after the interpolation. Not used + if None is passed (default). + gdf_msk : GeoDataFrame, optional + GeoDataFrame with polygons that indicates where a layer is absent, as + indicated by the 0.01 m thickness values in the Koster (1997) + shapefiles. Points inside these polygons will be assigned a zero + thickness. Should be set to None (default) when interpolating the + layer top. + + Returns + ------- + _type_ + _description_ + """ + # Create 1D arrays for the interpolation points + xi = gdf_pt["geometry"].x.to_numpy() + yi = gdf_pt["geometry"].y.to_numpy() + + # Convert the data point coordinates and values to NumPy arrays + x = gdf["geometry"].x.to_numpy() + y = gdf["geometry"].y.to_numpy() + z = gdf["value"].to_numpy() + + # Call SciPy's griddata to perform the interpolation. Note that zint + # is assigned NaN outside the convex hull of the data point + # cloud + zint = griddata( + points=(x, y), + values=z, + xi=(xi[None, :], yi[None, :]), + method="linear", # Note: cubic gives very poor results + ) + + # Repeat the interpolation for the interpolation points below + # the North Sea if the polygon is supplied + if gdf_ns is not None: + # Take the existing interpolation result and make array 1D + zint = zint.ravel() + + # Identify the points in the interpolaton grid that are within + # the North Sea polygon + gdf_within = gpd.sjoin(gdf_pt, gdf_ns, predicate="within") + # Convert their indices to a list + idx_ns = gdf_within.index.to_list() + # Use the list to slice the arrays with interpolation point + # x and y values + xi_ns = xi[idx_ns] + yi_ns = yi[idx_ns] + + # Find the points in the previous interpolation result that + # have NaN values (these were outside the convex hull of the + # data points) + idx = np.isnan(zint) + # Keep only the non-NaN values for the interpolation that will + # assign the nearest neighbour value to the points below the + # North Sea + x = xi[~idx] + y = yi[~idx] + z = zint[~idx] + + # Perform interpolation + zint_ns = griddata( + points=(x, y), + values=z, + xi=(xi_ns[None, :], yi_ns[None, :]), + method="nearest", + ) + + # Replace the values of the points in zint that are below the + # North Sea to the nearest neighbour values. + zint[idx_ns] = zint_ns + + # Return the interpolated values + return zint + + +# from sklearn.neighbors import KNeighborsRegressor + +# def interpolate_gdf(X, Y, gdf): +# x = gdf["geometry"].x.to_numpy() +# y = gdf["geometry"].y.to_numpy() +# z = gdf["value"].to_numpy() + +# zint = griddata( +# points=(x, y), +# values=z, +# xi=(X.ravel()[None, :], Y.ravel()[None, :]), +# method='linear', +# ) + +# return zint + +# neigh = KNeighborsRegressor() + +# neigh.fit(X, y) + + +def polyline_from_points(points): + """_summary_. + + Parameters + ---------- + points : _type_ + _description_ + + Returns + ------- + _type_ + _description_ + """ + poly = pv.PolyData() + poly.points = points + the_cell = np.arange(0, len(points), dtype=int) + the_cell = np.insert(the_cell, 0, len(points)) + poly.lines = the_cell + return poly diff --git a/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/merge_masks.py b/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/merge_masks.py new file mode 100644 index 0000000..e17a192 --- /dev/null +++ b/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0/merge_masks.py @@ -0,0 +1,35 @@ +"""Merges the masks of the aquitard thickness layers from Koster (1997) and Stuyfzand (1970) into a single file.""" + +from pathlib import Path + +import geopandas as gpd +import pandas as pd + +data_dir = Path("/Users/bdestombe/Projects/NHFLO/data/src/nhflodata/data/mockup/bodemlagen_pwn_2024/v1.0.0") +layer_names = ["S11", "S12", "S13", "S21", "S22", "S31", "S32"] + +# Load the masks +for name in layer_names: + # dikte aquitard + fp_koster = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask.geojson" + koster_mask = gpd.read_file(fp_koster, columns=["geometry", "VALUE"]) + koster_mask["source"] = "Koster (1997)" + koster_mask = koster_mask.rename(columns={"VALUE": "value"}) + + fp_out = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask_combined.geojson" + fp_stuyfzand = data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask_bergen_area.geojson" + if fp_stuyfzand.exists(): + stuyfzand_mask = gpd.read_file(fp_stuyfzand, columns=["geometry", "value"]) + stuyfzand_mask["source"] = "Stuyfzand (1970)" + + # Combine the masks + pd.concat((koster_mask, stuyfzand_mask)).to_file( + data_dir / "dikte_aquitard" / f"D{name}" / f"D{name}_mask_combined.geojson", + driver="GeoJSON", + ) + + else: + koster_mask.to_file( + fp_out, + driver="GeoJSON", + )