Skip to content

Commit

Permalink
ENH: keep the attributes around (#132)
Browse files Browse the repository at this point in the history
* basic passthrough using 'first'

* fix split

* fix issues

* lint

* overwritten _status in agg func

* fix lost attributes when there are no doubles to deal with

* assert presence of highway

* fix mypy tyoing

---------

Co-authored-by: James Gaboardi <[email protected]>
  • Loading branch information
martinfleis and jGaboardi authored Dec 4, 2024
1 parent 903a253 commit 59344c5
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 20 deletions.
35 changes: 29 additions & 6 deletions sgeop/nodes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import typing

import geopandas as gpd
import momepy
import networkx as nx
Expand Down Expand Up @@ -35,17 +37,21 @@ def split(
split_points = gpd.GeoSeries(split_points, crs=crs)
for split in split_points.drop_duplicates():
_, ix = cleaned_roads.sindex.nearest(split, max_distance=eps)
edge = cleaned_roads.geometry.iloc[ix]
row = cleaned_roads.iloc[ix]
edge = row.geometry
if edge.shape[0] == 1:
row = row.iloc[0]
lines_split = _snap_n_split(edge.item(), split, eps)
if lines_split.shape[0] > 1:
gdf_split = gpd.GeoDataFrame(geometry=lines_split, crs=crs)
for c in row.index.drop(["geometry", "_status"], errors="ignore"):
gdf_split[c] = row[c]
gdf_split["_status"] = "changed"
cleaned_roads = pd.concat(
[cleaned_roads.drop(edge.index[0]), gdf_split],
ignore_index=True,
)
else:
elif edge.shape[0] > 1:
to_be_dropped = []
to_be_added = []
for i, e in edge.items():
Expand All @@ -55,14 +61,26 @@ def split(
to_be_added.append(lines_split)

if to_be_added:
gdf_split = gpd.GeoDataFrame(
geometry=np.concatenate(to_be_added), crs=crs
gdf_split = pd.DataFrame(
{"geometry": to_be_added, "_orig": to_be_dropped}
).explode("geometry")
gdf_split = pd.concat(
[
gdf_split.drop(columns="_orig").reset_index(drop=True),
row.drop(columns="geometry")
.loc[gdf_split["_orig"]]
.reset_index(drop=True),
],
axis=1,
)
gdf_split["_status"] = "changed"
cleaned_roads = pd.concat(
[cleaned_roads.drop(to_be_dropped), gdf_split],
ignore_index=True,
)
cleaned_roads = gpd.GeoDataFrame(
cleaned_roads, geometry="geometry", crs=crs
)

return cleaned_roads.reset_index(drop=True)

Expand Down Expand Up @@ -485,7 +503,7 @@ def consolidate_nodes(
# TODO: It is temporarily fixed by that explode in return
geom.iloc[inds] = geom.iloc[inds].difference(cookie)

status.iloc[inds] = "snapped"
status.iloc[inds] = "changed"
midpoint = np.mean(shapely.get_coordinates(cluster), axis=0)
midpoints.append(midpoint)
mids = np.array([midpoint] * len(pts))
Expand All @@ -504,7 +522,12 @@ def consolidate_nodes(
geoms = np.hstack(spiders)
gdf = pd.concat([gdf, gpd.GeoDataFrame(geometry=geoms, crs=geom.crs)])

agg: dict[str, str | typing.Callable] = {"_status": _status}
for c in gdf.columns.drop(gdf.active_geometry_name):
if c != "_status":
agg[c] = "first"
return remove_false_nodes(
gdf[~gdf.geometry.is_empty].explode(),
aggfunc={"_status": _status},
# NOTE: this aggfunc needs to be able to process all the columns
aggfunc=agg,
)
51 changes: 37 additions & 14 deletions sgeop/simplify.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import typing
import warnings

import geopandas as gpd
Expand Down Expand Up @@ -253,7 +254,8 @@ def simplify_singletons(
stacklevel=2,
)

# Split lines on new nodes
cleaned_roads = roads.drop(to_drop)
# split lines on new nodes
cleaned_roads = split(split_points, roads.drop(to_drop), roads.crs)

if to_add:
Expand All @@ -266,12 +268,20 @@ def simplify_singletons(
new["_status"] = "new"
new.geometry = new.simplify(max_segment_length * simplification_factor)
new_roads = pd.concat([cleaned_roads, new], ignore_index=True)
agg: dict[str, str | typing.Callable] = {"_status": _status}
for c in cleaned_roads.columns.drop(cleaned_roads.active_geometry_name):
if c != "_status":
agg[c] = "first"
non_empties = new_roads[~(new_roads.is_empty | new_roads.geometry.isna())]
new_roads = remove_false_nodes(non_empties, aggfunc={"_status": _status})
new_roads = remove_false_nodes(non_empties, aggfunc=agg)

return new_roads
final = new_roads
else:
return cleaned_roads
final = cleaned_roads

if "coins_group" in final.columns:
final = final.drop(columns=[c for c in roads.columns if c.startswith("coins_")])
return final


def simplify_pairs(
Expand Down Expand Up @@ -358,6 +368,17 @@ def simplify_pairs(

# Determine typology dispatch if artifacts are present
if not artifacts_w_info.empty:
agg = {
"coins_group": "first",
"coins_end": lambda x: x.any(),
"_status": _status,
}
for c in roads.columns.drop(
[roads.active_geometry_name, "coins_count"], errors="ignore"
):
if c not in agg:
agg[c] = "first"

sol_drop = "solution == 'drop_interline'"
sol_iter = "solution == 'iterate'"

Expand All @@ -368,11 +389,7 @@ def simplify_pairs(
# Re-run node cleaning on subset of fresh road edges
roads_cleaned = remove_false_nodes(
_drop_roads,
aggfunc={
"coins_group": "first",
"coins_end": lambda x: x.any(),
"_status": _status,
},
aggfunc=agg,
)

# Isolate drops to create merged pairs
Expand All @@ -398,9 +415,7 @@ def simplify_pairs(
_1st = pd.DataFrame()
_2nd = pd.DataFrame()
for_skeleton = pd.DataFrame()
roads_cleaned = roads[
["coins_group", "coins_end", "_status", roads.geometry.name]
]
roads_cleaned = roads

# Generate counts of COINs groups for edges
coins_count = (
Expand Down Expand Up @@ -536,8 +551,12 @@ def simplify_clusters(
max_segment_length * simplification_factor
)
new_roads = pd.concat([cleaned_roads, new], ignore_index=True).explode()
agg: dict[str, str | typing.Callable] = {"_status": _status}
for c in new_roads.columns.drop(new_roads.active_geometry_name):
if c != "_status":
agg[c] = "first"
new_roads = remove_false_nodes(
new_roads[~new_roads.is_empty], aggfunc={"_status": _status}
new_roads[~new_roads.is_empty], aggfunc=agg
).drop_duplicates("geometry")

return new_roads
Expand Down Expand Up @@ -718,9 +737,10 @@ def simplify_network(
geopandas.GeoDataFrame
The final, simplified road network line data.
"""

# NOTE: this keeps attributes but resets index
roads = fix_topology(roads, eps=eps)
# Merge nearby nodes (up to double of distance used in skeleton).
# NOTE: this drops attributes and resets index
roads = consolidate_nodes(roads, tolerance=max_segment_length * 2.1)

# Identify artifacts
Expand Down Expand Up @@ -859,6 +879,7 @@ def simplify_loop(
clusters = artifacts.loc[artifacts["comp"].isin(counts[counts > 2].index)].copy()

if not singles.empty:
# NOTE: this drops attributes
roads = simplify_singletons(
singles,
roads,
Expand Down Expand Up @@ -887,4 +908,6 @@ def simplify_loop(
consolidation_tolerance=consolidation_tolerance,
)

if "coins_group" in roads.columns:
roads = roads.drop(columns=[c for c in roads.columns if c.startswith("coins_")])
return roads
Binary file modified sgeop/tests/data/apalachicola_simplified_exclusion_mask.parquet
Binary file not shown.
Binary file modified sgeop/tests/data/apalachicola_simplified_standard.parquet
Binary file not shown.
1 change: 1 addition & 0 deletions sgeop/tests/test_simplify.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def test_simplify_network_full_fua(aoi, tol, known_length):
geopandas.read_parquet(full_fua_data / aoi / "original.parquet")
)
observed_length = observed.geometry.length.sum()
assert "highway" in observed.columns

# storing GH artifacts
artifact_dir = ci_artifacts / aoi
Expand Down

0 comments on commit 59344c5

Please sign in to comment.