Skip to content

Commit

Permalink
Merge pull request #324 from nialov/209-add-distance-to-anomaly-v2
Browse files Browse the repository at this point in the history
Implement distance_to_anomaly (Closes #209)
  • Loading branch information
nmaarnio authored Mar 15, 2024
2 parents f3d84ed + fea0258 commit e01bf58
Show file tree
Hide file tree
Showing 7 changed files with 920 additions and 19 deletions.
3 changes: 3 additions & 0 deletions docs/raster_processing/distance_to_anomaly.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Distance to anomaly

::: eis_toolkit.raster_processing.distance_to_anomaly
235 changes: 235 additions & 0 deletions eis_toolkit/raster_processing/distance_to_anomaly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
from itertools import chain
from numbers import Number
from pathlib import Path
from tempfile import TemporaryDirectory

import geopandas as gpd
import numpy as np
import rasterio
from beartype import beartype
from beartype.typing import Literal, Optional, Tuple, Union
from rasterio import profiles

from eis_toolkit.exceptions import EmptyDataException, InvalidParameterValueException
from eis_toolkit.utilities.checks.raster import check_raster_profile
from eis_toolkit.utilities.miscellaneous import row_points, toggle_gdal_exceptions
from eis_toolkit.vector_processing.distance_computation import distance_computation


def _check_threshold_criteria_and_value(threshold_criteria, threshold_criteria_value):
if threshold_criteria in {"lower", "higher"} and not isinstance(threshold_criteria_value, Number):
raise InvalidParameterValueException(
f"Expected threshold_criteria_value {threshold_criteria_value} "
"to be a single number rather than a tuple."
)
if threshold_criteria in {"in_between", "outside"}:
if not isinstance(threshold_criteria_value, tuple):
raise InvalidParameterValueException(
f"Expected threshold_criteria_value ({threshold_criteria_value}) to be a tuple rather than a number."
)
if threshold_criteria_value[0] >= threshold_criteria_value[1]:
raise InvalidParameterValueException(
f"Expected first value in threshold_criteria_value ({threshold_criteria_value})"
"tuple to be lower than the second."
)


@beartype
def distance_to_anomaly(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]:
"""Calculate distance from raster cell to nearest anomaly.
The criteria for what is anomalous can be defined as a single number and
criteria text of "higher" or "lower". Alternatively, the definition can be
a range where values inside (criteria text of "within") or outside are
marked as anomalous (criteria text of "outside"). If anomaly_raster_profile does
contain "nodata" key, np.nan is assumed to correspond to nodata values.
Args:
anomaly_raster_profile: The raster profile in which the distances
to the nearest anomalous value are determined.
anomaly_raster_data: The raster data in which the distances
to the nearest anomalous value are determined.
threshold_criteria_value: Value(s) used to define anomalous.
If the threshold criteria requires a tuple of values,
the first value should be the minimum and the second
the maximum value.
threshold_criteria: Method to define anomalous.
Returns:
A 2D numpy array with the distances to anomalies computed
and the original anomaly raster profile.
"""
check_raster_profile(raster_profile=anomaly_raster_profile)
_check_threshold_criteria_and_value(
threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value
)

out_image = _distance_to_anomaly(
anomaly_raster_profile=anomaly_raster_profile,
anomaly_raster_data=anomaly_raster_data,
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
)
return out_image, anomaly_raster_profile


@beartype
def distance_to_anomaly_gdal(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
output_path: Path,
verbose: bool = False,
) -> Path:
"""Calculate distance from raster cell to nearest anomaly.
Distance is calculated for each cell in the anomaly raster and saved to a
new raster at output_path. The criteria for what is anomalous can be
defined as a single number and criteria text of "higher" or "lower".
Alternatively, the definition can be a range where values inside
(criteria text of "within") or outside are marked as anomalous
(criteria text of "outside"). If anomaly_raster_profile does
contain "nodata" key, np.nan is assumed to correspond to nodata values.
Does not work on Windows.
Args:
anomaly_raster_profile: The raster profile in which the distances
to the nearest anomalous value are determined.
anomaly_raster_data: The raster data in which the distances
to the nearest anomalous value are determined.
threshold_criteria_value: Value(s) used to define anomalous.
threshold_criteria: Method to define anomalous.
output_path: The path to the raster with the distances to anomalies
calculated.
verbose: Whether to print gdal_proximity output.
Returns:
The path to the raster with the distances to anomalies calculated.
"""
check_raster_profile(raster_profile=anomaly_raster_profile)
_check_threshold_criteria_and_value(
threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value
)

return _distance_to_anomaly_gdal(
output_path=output_path,
anomaly_raster_profile=anomaly_raster_profile,
anomaly_raster_data=anomaly_raster_data,
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
verbose=verbose,
)


def _fits_criteria(
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
anomaly_raster_data: np.ndarray,
nodata_value: Optional[Number],
) -> np.ndarray:

criteria_dict = {
"lower": lambda anomaly_raster_data: anomaly_raster_data < threshold_criteria_value,
"higher": lambda anomaly_raster_data: anomaly_raster_data > threshold_criteria_value,
"in_between": lambda anomaly_raster_data: np.logical_and(
anomaly_raster_data > threshold_criteria_value[0], # type: ignore
anomaly_raster_data < threshold_criteria_value[1], # type: ignore
),
"outside": lambda anomaly_raster_data: np.logical_or(
anomaly_raster_data < threshold_criteria_value[0], # type: ignore
anomaly_raster_data > threshold_criteria_value[1], # type: ignore
),
}
mask = anomaly_raster_data == nodata_value if nodata_value is not None else np.isnan(anomaly_raster_data)

return np.where(mask, False, criteria_dict[threshold_criteria](anomaly_raster_data))


def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fits_criteria: np.ndarray):
anomaly_raster_binary_path = tmp_dir / "anomaly_raster_binary.tif"

anomaly_raster_binary_profile = {**anomaly_raster_profile, **dict(dtype=rasterio.uint8, count=1, nodata=None)}
with rasterio.open(anomaly_raster_binary_path, mode="w", **anomaly_raster_binary_profile) as anomaly_raster_binary:
anomaly_raster_binary.write(data_fits_criteria.astype(rasterio.uint8), 1)

return anomaly_raster_binary_path


def _distance_to_anomaly_gdal(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
output_path: Path,
verbose: bool,
):
from osgeo_utils import gdal_proximity

data_fits_criteria = _fits_criteria(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
anomaly_raster_data=anomaly_raster_data,
nodata_value=anomaly_raster_profile.get("nodata"),
)

with TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
anomaly_raster_binary_path = _write_binary_anomaly_raster(
tmp_dir=tmp_dir, anomaly_raster_profile=anomaly_raster_profile, data_fits_criteria=data_fits_criteria
)
with toggle_gdal_exceptions():
gdal_proximity.gdal_proximity(
src_filename=str(anomaly_raster_binary_path),
dst_filename=str(output_path),
alg_options=("VALUES=1", "DISTUNITS=GEO"),
quiet=not verbose,
)

return output_path


def _distance_to_anomaly(
anomaly_raster_profile: Union[profiles.Profile, dict],
anomaly_raster_data: np.ndarray,
threshold_criteria_value: Union[Tuple[Number, Number], Number],
threshold_criteria: Literal["lower", "higher", "in_between", "outside"],
) -> np.ndarray:
data_fits_criteria = _fits_criteria(
threshold_criteria=threshold_criteria,
threshold_criteria_value=threshold_criteria_value,
anomaly_raster_data=anomaly_raster_data,
nodata_value=anomaly_raster_profile.get("nodata"),
)
if np.sum(data_fits_criteria) == 0:
raise EmptyDataException(
" ".join(
[
"Expected the passed threshold criteria to match at least some data.",
f"Check that the values of threshold_criteria ({threshold_criteria})",
f"and threshold_criteria_value {threshold_criteria_value}",
"match at least part of the data.",
]
)
)

cols = np.arange(anomaly_raster_data.shape[1])
rows = np.arange(anomaly_raster_data.shape[0])

all_points_by_rows = [
row_points(row=row, cols=cols[data_fits_criteria[row]], raster_transform=anomaly_raster_profile["transform"])
for row in rows
]
all_points = list(chain(*all_points_by_rows))
all_points_gdf = gpd.GeoDataFrame(geometry=all_points, crs=anomaly_raster_profile["crs"])

distance_array = distance_computation(raster_profile=anomaly_raster_profile, geometries=all_points_gdf)

return distance_array
29 changes: 29 additions & 0 deletions eis_toolkit/utilities/checks/raster.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import rasterio
import rasterio.profiles
import rasterio.transform
from beartype import beartype
from beartype.typing import Iterable, Sequence, Union

from eis_toolkit.exceptions import InvalidParameterValueException


@beartype
def check_matching_cell_size(
Expand Down Expand Up @@ -166,3 +170,28 @@ def check_quadratic_pixels(raster: rasterio.io.DatasetReader) -> bool:
return True
else:
return False


@beartype
def check_raster_profile(
raster_profile: Union[rasterio.profiles.Profile, dict],
):
"""Check raster profile values.
Checks that width and height are sensible and that the profile contains a
transform.
"""
raster_width = raster_profile.get("width")
raster_height = raster_profile.get("height")

if not isinstance(raster_width, int) or not isinstance(raster_height, int):
raise InvalidParameterValueException(
f"Expected raster_profile to contain integer width and height. {raster_profile}"
)

raster_transform = raster_profile.get("transform")

if not isinstance(raster_transform, rasterio.transform.Affine):
raise InvalidParameterValueException(
f"Expected raster_profile to contain an affine transformation. {raster_profile}"
)
48 changes: 48 additions & 0 deletions eis_toolkit/utilities/miscellaneous.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from contextlib import contextmanager
from numbers import Number

import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Any, List, Optional, Sequence, Tuple, Union
from osgeo import gdal
from rasterio import transform
from shapely.geometry import Point

from eis_toolkit.exceptions import InvalidColumnException, InvalidColumnIndexException, InvalidDataShapeException
from eis_toolkit.utilities.checks.dataframe import check_columns_valid
Expand Down Expand Up @@ -362,3 +366,47 @@ def rename_columns(df: pd.DataFrame, colnames=Sequence[str]) -> pd.DataFrame:
names[columns[i]] = colnames[i]

return df.rename(columns=names)


def row_points(
row: int,
cols: np.ndarray,
raster_transform: transform.Affine,
) -> List[Point]:
"""Transform raster row cells to shapely Points.
Args:
row: Row index of cells to transfer
cols: Array of column indexes to transfer
raster_transform: Affine transformation matrix of the raster
Returns:
List of shapely Points
"""
# transform.xy accepts either cols or rows as an array. The other then has
# to be an integer. The resulting x and y point coordinates are therefore
# in a 1D array

if len(cols) == 0:
return []

point_xs, point_ys = zip(*[raster_transform * (col + 0.5, row + 0.5) for col in cols])
# point_xs, point_ys = transform.xy(transform=raster_transform, cols=cols, rows=row)
return [Point(x, y) for x, y in zip(point_xs, point_ys)]


@contextmanager
def toggle_gdal_exceptions():
"""Toggle GDAL exceptions using a context manager.
If the exceptions are already enabled, this function will do nothing.
"""
already_has_exceptions_enabled = False
try:
if gdal.GetUseExceptions() != 0:
already_has_exceptions_enabled = True
gdal.UseExceptions()
yield
finally:
if not already_has_exceptions_enabled:
gdal.DontUseExceptions()
30 changes: 11 additions & 19 deletions eis_toolkit/vector_processing/distance_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from beartype import beartype
from beartype.typing import Union
from rasterio import profiles, transform
from shapely.geometry import Point
from shapely.geometry.base import BaseGeometry, BaseMultipartGeometry

from eis_toolkit.exceptions import EmptyDataFrameException, InvalidParameterValueException, NonMatchingCrsException
from eis_toolkit.exceptions import EmptyDataFrameException, NonMatchingCrsException
from eis_toolkit.utilities.checks.raster import check_raster_profile
from eis_toolkit.utilities.miscellaneous import row_points


@beartype
Expand All @@ -27,21 +28,12 @@ def distance_computation(raster_profile: Union[profiles.Profile, dict], geometri
if geometries.shape[0] == 0:
raise EmptyDataFrameException("Expected GeoDataFrame to not be empty.")

check_raster_profile(raster_profile=raster_profile)

raster_width = raster_profile.get("width")
raster_height = raster_profile.get("height")

if not isinstance(raster_width, int) or not isinstance(raster_height, int):
raise InvalidParameterValueException(
f"Expected raster_profile to contain integer width and height. {raster_profile}"
)

raster_transform = raster_profile.get("transform")

if not isinstance(raster_transform, transform.Affine):
raise InvalidParameterValueException(
f"Expected raster_profile to contain an affine transformation. {raster_profile}"
)

return _distance_computation(
raster_width=raster_width, raster_height=raster_height, raster_transform=raster_transform, geometries=geometries
)
Expand All @@ -53,12 +45,12 @@ def _calculate_row_distances(
raster_transform: transform.Affine,
geometries_unary_union: Union[BaseGeometry, BaseMultipartGeometry],
) -> np.ndarray:
# transform.xy accepts either cols or rows as an array. The other then has
# to be an integer. The resulting x and y point coordinates are therefore
# in a 1D array
point_xs, point_ys = transform.xy(transform=raster_transform, cols=cols, rows=row)
row_points = [Point(x, y) for x, y in zip(point_xs, point_ys)]
row_distances = np.array([point.distance(geometries_unary_union) for point in row_points])
row_distances = np.array(
[
point.distance(geometries_unary_union)
for point in row_points(row=row, cols=cols, raster_transform=raster_transform)
]
)
return row_distances


Expand Down
Loading

0 comments on commit e01bf58

Please sign in to comment.