mypy type checking review, workflow, hook (#134)

* mypy - conftest.py * mypy - conftest.py [2] * mypy - continuity.py, simplify.py * mypy - nodes.py * mypy - geometry.py * mypy - artifacts.py * add mypy workflow & pre-commit * single -> double quotes * typo in ci path * remove mypy from pre-commit - bump ruff version
uscuni · Dec 4, 2024 · 903a253 · 903a253
1 parent ed6bf40
commit 903a253
Show file tree

Hide file tree

Showing 9 changed files with 95 additions and 45 deletions.
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
@@ -0,0 +1,40 @@
+ name: MyPy Type Checking
+
+ on:
+   push:
+     branches: [main]
+   pull_request:
+     branches:
+       - "*"
+   schedule:
+     - cron: "59 23 * * *"
+   workflow_dispatch:
+    inputs:
+      version:
+        description: Manual Type Checking
+        default: type_checking
+        required: false
+
+ jobs:
+  mypy:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: setup micromamba
+        uses: mamba-org/setup-micromamba@v2
+        with:
+          environment-file: ci/py313_sgeop-latest.yaml
+          create-args: >-
+            mypy
+
+      - name: Install package
+        run: pip install .
+
+      - name: Check package
+        run: |
+            mypy sgeop/ --ignore-missing-imports --install-types --non-interactive
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -7,7 +7,7 @@
      branches:
        - "*"
    schedule:
-     - cron: '59 23 * * *'
+     - cron: "59 23 * * *"
    workflow_dispatch:
     inputs:
       version:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 files: "sgeop\/"
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.8.0"
+    rev: "v0.8.1"
     hooks:
       - id: ruff
       - id: ruff-format

diff --git a/sgeop/artifacts.py b/sgeop/artifacts.py
@@ -1,4 +1,5 @@
 import logging
+import typing
 import warnings
 
 import geopandas as gpd
@@ -74,7 +75,7 @@ def get_artifacts(
         May also be the returned value of ``threshold`` or ``threshold_fallback``.
     """
 
-    def _relate(neighs: tuple, cond: callable) -> bool:
+    def _relate(neighs: tuple, cond: typing.Callable) -> bool:
         """Helper for relating artifacts."""
         return len(neighs) > 0 and cond(polys.loc[list(neighs), "is_artifact"])
 
@@ -186,7 +187,7 @@ def ccss_special_case(
 
     Returns
     -------
-    np.ndarray
+    numpy.ndarray
         New linestrings for reconnections.
     """
 
@@ -243,15 +244,15 @@ def ccss_special_case(
         shapely.shortest_line(missing.geometry, combined_linework).tolist()
     )
 
-    return new_connections
+    return np.array(new_connections)
 
 
 def filter_connections(
     primes: gpd.GeoSeries,
     relevant_targets: gpd.GeoDataFrame,
     conts_groups: gpd.GeoDataFrame,
     new_connections: np.ndarray,
-) -> tuple[np.ndarray]:
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
     """The skeleton returns connections to all the nodes. We need to keep only
     some, if there are multiple connections to a single C. We don't touch the other.
 
@@ -268,7 +269,7 @@ def filter_connections(
 
     Returns
     -------
-    tuple[np.ndarray]
+    tuple[np.ndarray, np.ndarray, np.ndarray]
         - Updated ``new_connections``
         - Connections intersecting ``C``
         - Connections intersecting ``primes``
@@ -615,7 +616,7 @@ def one_remaining_c(
     split_points: list,
     clip_limit: float | int,
     consolidation_tolerance: float | int = 10,
-) -> list:
+) -> np.ndarray:
     """Resolve situations where there is 1 highest hierarchy and 1 remaining node.
     This function is called within ``artifacts.nx_gx()``:
         * first SUBRANCH of BRANCH 3:
@@ -651,7 +652,7 @@ def one_remaining_c(
 
     Returns
     -------
-    geopandas.GeoDataFrame
+    numpy.ndarray
         Newly resolved edges. The ``split_points`` parameter is also updated inplace.
     """
 
@@ -744,7 +745,7 @@ def loop(
         logger.debug("SNAP TO primes")
         snap_to = primes
 
-    possible_dangle, splitters = voronoi_skeleton(
+    _possible_dangle, splitters = voronoi_skeleton(
         segments,  # use edges that are being dropped
         poly=artifact.geometry,
         snap_to=snap_to,
@@ -755,7 +756,7 @@ def loop(
     split_points.extend(splitters)
 
     possible_dangle = gpd.GeoDataFrame(
-        geometry=possible_dangle[shapely.disjoint(possible_dangle, dropped)]
+        geometry=_possible_dangle[shapely.disjoint(_possible_dangle, dropped)]
     )
     if not possible_dangle.empty:
         comps = graph.Graph.build_contiguity(
@@ -1448,11 +1449,11 @@ def nx_gx_cluster(
     # if we used only segments, we need to remove dangles
     if len(connection) == 1:
         connection = connection.item()
-        skel = gpd.GeoSeries(skel)
-        skel = skel[
-            skel.disjoint(edges_on_boundary.union_all()) | skel.intersects(connection)
+        _skel = gpd.GeoSeries(skel)
+        _skel = _skel[
+            _skel.disjoint(edges_on_boundary.union_all()) | _skel.intersects(connection)
         ]
-        welded = gpd.GeoSeries(weld_edges(skel))
+        welded = gpd.GeoSeries(weld_edges(_skel))
         skel = welded[
             ~(
                 ((welded.length < min_dangle_length) & (is_dangle(welded)))

diff --git a/sgeop/continuity.py b/sgeop/continuity.py
@@ -47,7 +47,7 @@ def continuity(
 def get_stroke_info(
     artifacts: geopandas.GeoSeries | geopandas.GeoDataFrame,
     roads: geopandas.GeoSeries | geopandas.GeoDataFrame,
-) -> tuple[list[int]]:
+) -> tuple[list[int], list[int], list[int], list[int]]:
     """Generate information about strokes within ``artifacts`` and the
     resulting lists can be assigned as columns to ``artifacts``. Classifies
     the strokes within the CES typology.

diff --git a/sgeop/geometry.py b/sgeop/geometry.py
@@ -14,7 +14,9 @@
 from .nodes import consolidate_nodes
 
 
-def _is_within(line: np.ndarray, poly: shapely.Polygon, rtol: float = 1e-4) -> bool:
+def _is_within(
+    line: np.ndarray, poly: shapely.Polygon, rtol: float = 1e-4
+) -> np.ndarray:
     """Check if the line is within a polygon with a set relative tolerance.
 
     Parameters
@@ -28,7 +30,7 @@ def _is_within(line: np.ndarray, poly: shapely.Polygon, rtol: float = 1e-4) -> b
 
     Returns
     -------
-    np.ndarray[bool]
+    np.ndarray
         ``True`` if ``line`` is either entirely within ``poly`` or if
         ``line`` is within `poly`` based on a relaxed ``rtol`` relative tolerance.
     """
@@ -104,7 +106,7 @@ def voronoi_skeleton(
     secondary_snap_to: None | gpd.GeoSeries = None,
     clip_limit: None | float | int = 2,
     consolidation_tolerance: None | float | int = None,
-) -> tuple[np.ndarray]:
+) -> tuple[np.ndarray, np.ndarray]:
     """
     Returns average geometry.
 
@@ -171,7 +173,7 @@ def voronoi_skeleton(
     rigde_vertices = np.array(voronoi_diagram.ridge_vertices)
 
     # iterate over segment-pairs and keep rigdes between input geometries
-    edgelines = []
+    _edgelines = []
     to_add = []
     splitters = []
 
@@ -234,9 +236,9 @@ def voronoi_skeleton(
                 ),
             )
         # add final edgeline to the list
-        edgelines.append(edgeline)
+        _edgelines.append(edgeline)
 
-    edgelines = np.array(edgelines)[~(shapely.is_empty(edgelines))]
+    edgelines = np.array(_edgelines)[~(shapely.is_empty(_edgelines))]
 
     if edgelines.shape[0] > 0:
         # if there is no explicit snapping target, snap to the boundary of the polygon
@@ -270,7 +272,7 @@ def voronoi_skeleton(
     edgelines = _as_parts(edgelines)
     edgelines = _consolidate(edgelines, consolidation_tolerance)
 
-    return edgelines, splitters
+    return edgelines, np.array(splitters)
 
 
 def _remove_sliver(
@@ -326,8 +328,8 @@ def snap_to_targets(
         Lines to add and points where to split.
     """
 
-    to_add = []
-    to_split = []
+    to_add: list = []
+    to_split: list = []
 
     # generate graph from lines
     comp_labels, comp_counts, components = _prep_components(edgelines)
@@ -377,7 +379,9 @@ def snap_to_targets(
     return to_add, to_split
 
 
-def _prep_components(lines: np.ndarray) -> tuple[pd.Series, pd.Series, gpd.GeoSeries]:
+def _prep_components(
+    lines: np.ndarray | gpd.GeoSeries,
+) -> tuple[pd.Series, pd.Series, gpd.GeoSeries]:
     """Helper for preparing graph components & labels in PySAL."""
 
     # cast edgelines to gdf
@@ -398,7 +402,7 @@ def _prep_components(lines: np.ndarray) -> tuple[pd.Series, pd.Series, gpd.GeoSe
     return comp_labels, comp_counts, components
 
 
-def _split_add(line: shapely.LineString, splits: list, adds: list) -> tuple[list]:
+def _split_add(line: shapely.LineString, splits: list, adds: list) -> tuple[list, list]:
     """Helper for preparing splitter points & added lines."""
     splits.append(shapely.get_point(line, -1))
     adds.append(line)

diff --git a/sgeop/nodes.py b/sgeop/nodes.py
@@ -143,7 +143,7 @@ def get_components(
 def weld_edges(
     edgelines: list | np.ndarray | gpd.GeoSeries,
     ignore: None | gpd.GeoSeries = None,
-) -> list:
+) -> list | np.ndarray | gpd.GeoSeries:
     """Combine lines sharing an endpoint (if only 2 lines share that point).
     Lightweight version of ``remove_false_nodes()``.
 
@@ -221,7 +221,7 @@ def _identify_degree_mismatch(
 
 def _makes_loop_contact(
     edges: gpd.GeoDataFrame, sindex_kws: dict
-) -> tuple[gpd.GeoSeries]:
+) -> tuple[gpd.GeoSeries, gpd.GeoSeries]:
     """Helper to identify:
     1. loop nodes intersecting non-loops
     2. loop nodes intersecting other loops
@@ -247,7 +247,9 @@ def _makes_loop_contact(
     return nodes_non_loops.geometry, nodes_loops.geometry
 
 
-def _loops_and_non_loops(edges: gpd.GeoDataFrame) -> tuple[gpd.GeoDataFrame]:
+def _loops_and_non_loops(
+    edges: gpd.GeoDataFrame,
+) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
     """Bifurcate edge gdf into loops and non-loops."""
     loop_mask = edges.is_ring
     not_loops = edges[~loop_mask]

diff --git a/sgeop/simplify.py b/sgeop/simplify.py
@@ -5,6 +5,7 @@
 import momepy
 import numpy as np
 import pandas as pd
+import shapely
 from libpysal import graph
 from scipy import sparse
 
@@ -34,8 +35,8 @@ def _link_nodes_artifacts(
     step: str,
     roads: gpd.GeoDataFrame,
     artifacts: gpd.GeoDataFrame,
-    eps: float,
-) -> tuple[gpd.GeoDataFrame]:
+    eps: None | float,
+) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
     """Helper to prep nodes & artifacts when simplifying singletons & pairs."""
 
     # Get nodes from the network
@@ -179,9 +180,9 @@ def simplify_singletons(
     artifacts["ces_type"] = ces_type
 
     # Collect changes
-    to_drop = []
-    to_add = []
-    split_points = []
+    to_drop: list[int] = []
+    to_add: list[int] = []
+    split_points: list[shapely.Point] = []
 
     # Isolate planar artifacts
     planar = artifacts[~artifacts["non_planar"]].copy()
@@ -500,8 +501,8 @@ def simplify_clusters(
     nodes = momepy.nx_to_gdf(momepy.node_degree(momepy.gdf_to_nx(roads)), lines=False)
 
     # Collect changes
-    to_drop = []
-    to_add = []
+    to_drop: list[int] = []
+    to_add: list[int] = []
 
     for _, artifact in artifacts.groupby("comp"):
         # Get artifact cluster polygon

diff --git a/sgeop/tests/conftest.py b/sgeop/tests/conftest.py
@@ -12,7 +12,7 @@
 # See gh#121
 sgeop.simplify.DEBUGGING = True
 
-line_collection = (
+line_collection = (  # type: ignore[valid-type, misc]
     list[shapely.LineString]
     | tuple[shapely.LineString]
     | numpy.ndarray
@@ -50,7 +50,8 @@
 
 
 def polygonize(
-    collection: line_collection, as_geom: bool = True
+    collection: line_collection,  # type: ignore[valid-type]
+    as_geom: bool = True,  # type: ignore[valid-type]
 ) -> shapely.Polygon | geopandas.GeoSeries:
     """Testing helper -- Create polygon from collection of lines."""
     if isinstance(collection, pandas.Series | geopandas.GeoSeries):
@@ -63,13 +64,13 @@ def polygonize(
         return shapely.polygonize(collection).buffer(0)
 
 
-def is_geopandas(collection: geometry_collection) -> bool:
+def is_geopandas(collection: geometry_collection) -> bool:  # type: ignore[valid-type]
     return isinstance(collection, geopandas.GeoSeries | geopandas.GeoDataFrame)
 
 
 def geom_test(
-    collection1: geometry_collection,
-    collection2: geometry_collection,
+    collection1: geometry_collection,  # type: ignore[valid-type]
+    collection2: geometry_collection,  # type: ignore[valid-type]
     tolerance: float = 1e-1,
     aoi: None | str = None,
 ) -> bool:
@@ -81,8 +82,8 @@ def geom_test(
     if not is_geopandas(collection2):
         collection2 = geopandas.GeoSeries(collection2)
 
-    geoms1 = collection1.geometry.normalize()
-    geoms2 = collection2.geometry.normalize()
+    geoms1 = collection1.geometry.normalize()  # type: ignore[attr-defined]
+    geoms2 = collection2.geometry.normalize()  # type: ignore[attr-defined]
 
     if aoi and aoi.startswith("apalachicola"):
         # Varied index order across OSs.
@@ -99,7 +100,7 @@ def geom_test(
             g2 = geoms2.loc[ix]
             if (
                 not shapely.equals_exact(g1, g2, tolerance=tolerance)
-                and ix not in KNOWN_BAD_GEOMS[aoi]
+                and ix not in KNOWN_BAD_GEOMS[aoi]  # type: ignore[index]
             ):
                 unexpected_bad[ix] = {
                     "n_coords": {
@@ -112,6 +113,7 @@ def geom_test(
             raise AssertionError(
                 f"Problem in '{aoi}' – check locs: {unexpected_bad}"
             ) from None
+    return True
 
 
 def pytest_addoption(parser):