diff --git a/sgeop/nodes.py b/sgeop/nodes.py index 95c2b98..c96d521 100644 --- a/sgeop/nodes.py +++ b/sgeop/nodes.py @@ -1,3 +1,5 @@ +import typing + import geopandas as gpd import momepy import networkx as nx @@ -35,17 +37,21 @@ def split( split_points = gpd.GeoSeries(split_points, crs=crs) for split in split_points.drop_duplicates(): _, ix = cleaned_roads.sindex.nearest(split, max_distance=eps) - edge = cleaned_roads.geometry.iloc[ix] + row = cleaned_roads.iloc[ix] + edge = row.geometry if edge.shape[0] == 1: + row = row.iloc[0] lines_split = _snap_n_split(edge.item(), split, eps) if lines_split.shape[0] > 1: gdf_split = gpd.GeoDataFrame(geometry=lines_split, crs=crs) + for c in row.index.drop(["geometry", "_status"], errors="ignore"): + gdf_split[c] = row[c] gdf_split["_status"] = "changed" cleaned_roads = pd.concat( [cleaned_roads.drop(edge.index[0]), gdf_split], ignore_index=True, ) - else: + elif edge.shape[0] > 1: to_be_dropped = [] to_be_added = [] for i, e in edge.items(): @@ -55,14 +61,26 @@ def split( to_be_added.append(lines_split) if to_be_added: - gdf_split = gpd.GeoDataFrame( - geometry=np.concatenate(to_be_added), crs=crs + gdf_split = pd.DataFrame( + {"geometry": to_be_added, "_orig": to_be_dropped} + ).explode("geometry") + gdf_split = pd.concat( + [ + gdf_split.drop(columns="_orig").reset_index(drop=True), + row.drop(columns="geometry") + .loc[gdf_split["_orig"]] + .reset_index(drop=True), + ], + axis=1, ) gdf_split["_status"] = "changed" cleaned_roads = pd.concat( [cleaned_roads.drop(to_be_dropped), gdf_split], ignore_index=True, ) + cleaned_roads = gpd.GeoDataFrame( + cleaned_roads, geometry="geometry", crs=crs + ) return cleaned_roads.reset_index(drop=True) @@ -485,7 +503,7 @@ def consolidate_nodes( # TODO: It is temporarily fixed by that explode in return geom.iloc[inds] = geom.iloc[inds].difference(cookie) - status.iloc[inds] = "snapped" + status.iloc[inds] = "changed" midpoint = np.mean(shapely.get_coordinates(cluster), axis=0) midpoints.append(midpoint) mids = np.array([midpoint] * len(pts)) @@ -504,7 +522,12 @@ def consolidate_nodes( geoms = np.hstack(spiders) gdf = pd.concat([gdf, gpd.GeoDataFrame(geometry=geoms, crs=geom.crs)]) + agg: dict[str, str | typing.Callable] = {"_status": _status} + for c in gdf.columns.drop(gdf.active_geometry_name): + if c != "_status": + agg[c] = "first" return remove_false_nodes( gdf[~gdf.geometry.is_empty].explode(), - aggfunc={"_status": _status}, + # NOTE: this aggfunc needs to be able to process all the columns + aggfunc=agg, ) diff --git a/sgeop/simplify.py b/sgeop/simplify.py index 0b99343..bc4d1c9 100644 --- a/sgeop/simplify.py +++ b/sgeop/simplify.py @@ -1,4 +1,5 @@ import logging +import typing import warnings import geopandas as gpd @@ -253,7 +254,8 @@ def simplify_singletons( stacklevel=2, ) - # Split lines on new nodes + cleaned_roads = roads.drop(to_drop) + # split lines on new nodes cleaned_roads = split(split_points, roads.drop(to_drop), roads.crs) if to_add: @@ -266,12 +268,20 @@ def simplify_singletons( new["_status"] = "new" new.geometry = new.simplify(max_segment_length * simplification_factor) new_roads = pd.concat([cleaned_roads, new], ignore_index=True) + agg: dict[str, str | typing.Callable] = {"_status": _status} + for c in cleaned_roads.columns.drop(cleaned_roads.active_geometry_name): + if c != "_status": + agg[c] = "first" non_empties = new_roads[~(new_roads.is_empty | new_roads.geometry.isna())] - new_roads = remove_false_nodes(non_empties, aggfunc={"_status": _status}) + new_roads = remove_false_nodes(non_empties, aggfunc=agg) - return new_roads + final = new_roads else: - return cleaned_roads + final = cleaned_roads + + if "coins_group" in final.columns: + final = final.drop(columns=[c for c in roads.columns if c.startswith("coins_")]) + return final def simplify_pairs( @@ -358,6 +368,17 @@ def simplify_pairs( # Determine typology dispatch if artifacts are present if not artifacts_w_info.empty: + agg = { + "coins_group": "first", + "coins_end": lambda x: x.any(), + "_status": _status, + } + for c in roads.columns.drop( + [roads.active_geometry_name, "coins_count"], errors="ignore" + ): + if c not in agg: + agg[c] = "first" + sol_drop = "solution == 'drop_interline'" sol_iter = "solution == 'iterate'" @@ -368,11 +389,7 @@ def simplify_pairs( # Re-run node cleaning on subset of fresh road edges roads_cleaned = remove_false_nodes( _drop_roads, - aggfunc={ - "coins_group": "first", - "coins_end": lambda x: x.any(), - "_status": _status, - }, + aggfunc=agg, ) # Isolate drops to create merged pairs @@ -398,9 +415,7 @@ def simplify_pairs( _1st = pd.DataFrame() _2nd = pd.DataFrame() for_skeleton = pd.DataFrame() - roads_cleaned = roads[ - ["coins_group", "coins_end", "_status", roads.geometry.name] - ] + roads_cleaned = roads # Generate counts of COINs groups for edges coins_count = ( @@ -536,8 +551,12 @@ def simplify_clusters( max_segment_length * simplification_factor ) new_roads = pd.concat([cleaned_roads, new], ignore_index=True).explode() + agg: dict[str, str | typing.Callable] = {"_status": _status} + for c in new_roads.columns.drop(new_roads.active_geometry_name): + if c != "_status": + agg[c] = "first" new_roads = remove_false_nodes( - new_roads[~new_roads.is_empty], aggfunc={"_status": _status} + new_roads[~new_roads.is_empty], aggfunc=agg ).drop_duplicates("geometry") return new_roads @@ -718,9 +737,10 @@ def simplify_network( geopandas.GeoDataFrame The final, simplified road network line data. """ - + # NOTE: this keeps attributes but resets index roads = fix_topology(roads, eps=eps) # Merge nearby nodes (up to double of distance used in skeleton). + # NOTE: this drops attributes and resets index roads = consolidate_nodes(roads, tolerance=max_segment_length * 2.1) # Identify artifacts @@ -859,6 +879,7 @@ def simplify_loop( clusters = artifacts.loc[artifacts["comp"].isin(counts[counts > 2].index)].copy() if not singles.empty: + # NOTE: this drops attributes roads = simplify_singletons( singles, roads, @@ -887,4 +908,6 @@ def simplify_loop( consolidation_tolerance=consolidation_tolerance, ) + if "coins_group" in roads.columns: + roads = roads.drop(columns=[c for c in roads.columns if c.startswith("coins_")]) return roads diff --git a/sgeop/tests/data/apalachicola_simplified_exclusion_mask.parquet b/sgeop/tests/data/apalachicola_simplified_exclusion_mask.parquet index bae6f0d..db095d7 100644 Binary files a/sgeop/tests/data/apalachicola_simplified_exclusion_mask.parquet and b/sgeop/tests/data/apalachicola_simplified_exclusion_mask.parquet differ diff --git a/sgeop/tests/data/apalachicola_simplified_standard.parquet b/sgeop/tests/data/apalachicola_simplified_standard.parquet index 399fc40..4303893 100644 Binary files a/sgeop/tests/data/apalachicola_simplified_standard.parquet and b/sgeop/tests/data/apalachicola_simplified_standard.parquet differ diff --git a/sgeop/tests/test_simplify.py b/sgeop/tests/test_simplify.py index 1a1a1ed..b8687c8 100644 --- a/sgeop/tests/test_simplify.py +++ b/sgeop/tests/test_simplify.py @@ -87,6 +87,7 @@ def test_simplify_network_full_fua(aoi, tol, known_length): geopandas.read_parquet(full_fua_data / aoi / "original.parquet") ) observed_length = observed.geometry.length.sum() + assert "highway" in observed.columns # storing GH artifacts artifact_dir = ci_artifacts / aoi