Skip to content

Commit

Permalink
feat: implement distance_to_anomaly
Browse files Browse the repository at this point in the history
Performance issues are a problem which can be solved by using the
alternative functionality that uses gdal_proximity function instead of
existing distance_computation implementation. The GDAL implementation is
orders of magnitude faster.
  • Loading branch information
nialov committed Feb 14, 2024
1 parent d280035 commit c3c40b9
Show file tree
Hide file tree
Showing 6 changed files with 544 additions and 7 deletions.
3 changes: 3 additions & 0 deletions docs/raster_processing/distance_to_anomaly.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Distance computation

::: eis_toolkit.vector_processing.distance_computation
199 changes: 199 additions & 0 deletions eis_toolkit/raster_processing/distance_to_anomaly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
from itertools import chain
from pathlib import Path
from tempfile import TemporaryDirectory

import geopandas as gpd
import numpy as np
import rasterio
from beartype import beartype
from beartype.typing import Literal, Tuple, Union
from rasterio import profiles, transform

from eis_toolkit.exceptions import InvalidParameterValueException
from eis_toolkit.utilities.miscellaneous import row_points, toggle_gdal_exceptions
from eis_toolkit.vector_processing.distance_computation import distance_computation

THRESHOLD_CRITERIA_VALUE_TYPE = Union[Tuple[float, float], float]
THRESHOLD_CRITERIA_TYPE = Literal["lower", "higher", "in_between", "outside"]


@beartype
def distance_to_anomaly(
raster_profile: Union[profiles.Profile, dict],
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: THRESHOLD_CRITERIA_VALUE_TYPE,
threshold_criteria: THRESHOLD_CRITERIA_TYPE,
) -> np.ndarray:
"""Calculate distance from raster cell to nearest anomaly.
The criteria for what is anomalous can be defined as a single number and
criteria text of "higher" or "lower". Alternatively, the definition can be
a range where values inside (criteria text of "within") or outside are
marked as anomalous (criteria text of "outside").
Args:
raster_profile: The raster profile of which the distances
to the nearest anomalous value are determined.
anomaly_raster: The raster in which the distances
to the nearest anomalous value are determined.
threshold_criteria_value: Value(s) used to define anomalous
threshold_criteria: Method to define anomalous
Returns:
A 2D numpy array with the distances to anomalies computed.
"""
raster_width = raster_profile.get("width")
raster_height = raster_profile.get("height")

if not isinstance(raster_width, int) or not isinstance(raster_height, int):
raise InvalidParameterValueException(
f"Expected raster_profile to contain integer width and height. {raster_profile}"
)

raster_transform = raster_profile.get("transform")

if not isinstance(raster_transform, transform.Affine):
raise InvalidParameterValueException(
f"Expected raster_profile to contain an affine transformation. {raster_profile}"
)

return _distance_to_anomaly(
raster_profile=raster_profile,
anomaly_raster_profile=anomaly_raster_profile,
anomaly_raster_data=anomaly_raster_data,
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
)


@beartype
def distance_to_anomaly_gdal(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: THRESHOLD_CRITERIA_VALUE_TYPE,
threshold_criteria: THRESHOLD_CRITERIA_TYPE,
output_path: Path,
verbose: bool = False,
) -> Path:
"""Calculate distance from raster cell to nearest anomaly.
Distance is calculated for each cell in the anomaly raster and saved to a
new raster at output_path. The criteria for what is anomalous can be
defined as a single number and criteria text of "higher" or "lower".
Alternatively, the definition can be a range where values inside
(criteria text of "within") or outside are marked as anomalous
(criteria text of "outside").
Does not work on Windows.
Args:
anomaly_raster: The raster in which the distances
to the nearest anomalous value are determined.
threshold_criteria_value: Value(s) used to define anomalous
threshold_criteria: Method to define anomalous
output_path: The path to the raster with the distances to anomalies
calculated.
verbose: Whether to print gdal_proximity output.
Returns:
The path to the raster with the distances to anomalies calculated.
"""
return _distance_to_anomaly_gdal(
output_path=output_path,
anomaly_raster_profile=anomaly_raster_profile,
anomaly_raster_data=anomaly_raster_data,
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
verbose=verbose,
)


def _fits_criteria(
threshold_criteria_value: THRESHOLD_CRITERIA_VALUE_TYPE,
threshold_criteria: THRESHOLD_CRITERIA_TYPE,
anomaly_raster_data: np.ndarray,
) -> np.ndarray:
criteria_dict = {
"lower": lambda anomaly_raster_data: anomaly_raster_data < threshold_criteria_value,
"higher": lambda anomaly_raster_data: anomaly_raster_data > threshold_criteria_value,
"in_between": lambda anomaly_raster_data: np.where(
np.logical_and(anomaly_raster_data > threshold_criteria[0], anomaly_raster_data < threshold_criteria[1])
),
"outside": lambda anomaly_raster_data: np.where(
np.logical_or(anomaly_raster_data < threshold_criteria[0], anomaly_raster_data > threshold_criteria[1])
),
}
return np.where(np.isnan(anomaly_raster_data), False, criteria_dict[threshold_criteria](anomaly_raster_data))


def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fits_criteria: np.ndarray):
anomaly_raster_binary_path = tmp_dir / "anomaly_raster_binary.tif"

anomaly_raster_binary_profile = anomaly_raster_profile
anomaly_raster_binary_profile.update(dtype=rasterio.uint8, count=1, nodata=None)
with rasterio.open(anomaly_raster_binary_path, mode="w", **anomaly_raster_binary_profile) as anomaly_raster_binary:
anomaly_raster_binary.write(data_fits_criteria.astype(rasterio.uint8), 1)

return anomaly_raster_binary_path


def _distance_to_anomaly_gdal(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[float, float], float],
threshold_criteria: THRESHOLD_CRITERIA_TYPE,
output_path: Path,
verbose: bool,
):
from osgeo_utils import gdal_proximity

data_fits_criteria = _fits_criteria(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
anomaly_raster_data=anomaly_raster_data,
)

with TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
anomaly_raster_binary_path = _write_binary_anomaly_raster(
tmp_dir=tmp_dir, anomaly_raster_profile=anomaly_raster_profile, data_fits_criteria=data_fits_criteria
)
with toggle_gdal_exceptions():
gdal_proximity.gdal_proximity(
src_filename=str(anomaly_raster_binary_path),
dst_filename=str(output_path),
alg_options=("VALUES=1", "DISTUNITS=GEO"),
quiet=not verbose,
)

return output_path


def _distance_to_anomaly(
raster_profile: Union[profiles.Profile, dict],
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[float, float], float],
threshold_criteria: THRESHOLD_CRITERIA_TYPE,
) -> np.ndarray:
data_fits_criteria = _fits_criteria(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
anomaly_raster_data=anomaly_raster_data,
)

cols = np.arange(anomaly_raster_data.shape[1])
rows = np.arange(anomaly_raster_data.shape[0])

all_points_by_rows = [
row_points(row=row, cols=cols[data_fits_criteria[row]], raster_transform=anomaly_raster_profile["transform"])
for row in rows
]
all_points = list(chain(*all_points_by_rows))
all_points_gdf = gpd.GeoDataFrame(geometry=all_points, crs=anomaly_raster_profile["crs"])

distance_array = distance_computation(raster_profile=raster_profile, geometries=all_points_gdf)

return distance_array
50 changes: 50 additions & 0 deletions eis_toolkit/utilities/miscellaneous.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from contextlib import contextmanager
from numbers import Number

import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Any, List, Optional, Sequence, Tuple, Union
from osgeo import gdal
from rasterio import transform
from shapely.geometry import Point

from eis_toolkit.exceptions import InvalidColumnException, InvalidColumnIndexException
from eis_toolkit.utilities.checks.dataframe import check_columns_valid
Expand Down Expand Up @@ -327,3 +331,49 @@ def rename_columns(df: pd.DataFrame, colnames=Sequence[str]) -> pd.DataFrame:
names[columns[i]] = colnames[i]

return df.rename(columns=names)


def row_points(
row: int,
cols: np.ndarray,
raster_transform: transform.Affine,
) -> List[Point]:
"""Transform raster row cells to shapely Points.
Args:
row: Row index of cells to transfer
cols: Array of column indexes to transfer
raster_transform: Affine transformation matrix of the raster
Returns:
List of shapely Points
"""
# transform.xy accepts either cols or rows as an array. The other then has
# to be an integer. The resulting x and y point coordinates are therefore
# in a 1D array

if len(cols) == 0:
return []

point_xs, point_ys = zip(*[raster_transform * (col + 0.5, row + 0.5) for col in cols])
# point_xs, point_ys = transform.xy(transform=raster_transform, cols=cols, rows=row)
return [Point(x, y) for x, y in zip(point_xs, point_ys)]


@contextmanager
def toggle_gdal_exceptions():
"""Toggle GDAL exceptions using a context manager.
If the exceptions are already enabled, this function will do nothing.
"""
already_has_exceptions_enabled = False
try:

gdal.UseExceptions()
if gdal.GetUseExceptions() != 0:
already_has_exceptions_enabled = True
yield

finally:
if not already_has_exceptions_enabled:
gdal.DontUseExceptions()
14 changes: 7 additions & 7 deletions eis_toolkit/vector_processing/distance_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from beartype import beartype
from beartype.typing import Union
from rasterio import profiles, transform
from shapely.geometry import Point
from shapely.geometry.base import BaseGeometry, BaseMultipartGeometry

from eis_toolkit.exceptions import EmptyDataFrameException, InvalidParameterValueException, NonMatchingCrsException
from eis_toolkit.utilities.miscellaneous import row_points


@beartype
Expand Down Expand Up @@ -53,12 +53,12 @@ def _calculate_row_distances(
raster_transform: transform.Affine,
geometries_unary_union: Union[BaseGeometry, BaseMultipartGeometry],
) -> np.ndarray:
# transform.xy accepts either cols or rows as an array. The other then has
# to be an integer. The resulting x and y point coordinates are therefore
# in a 1D array
point_xs, point_ys = transform.xy(transform=raster_transform, cols=cols, rows=row)
row_points = [Point(x, y) for x, y in zip(point_xs, point_ys)]
row_distances = np.array([point.distance(geometries_unary_union) for point in row_points])
row_distances = np.array(
[
point.distance(geometries_unary_union)
for point in row_points(row=row, cols=cols, raster_transform=raster_transform)
]
)
return row_distances


Expand Down
160 changes: 160 additions & 0 deletions notebooks/distance_to_anomaly.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit c3c40b9

Please sign in to comment.