-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Performance issues are a problem which can be solved by using the alternative functionality that uses gdal_proximity function instead of existing distance_computation implementation. The GDAL implementation is orders of magnitude faster.
- Loading branch information
Showing
6 changed files
with
544 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Distance computation | ||
|
||
::: eis_toolkit.vector_processing.distance_computation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
from itertools import chain | ||
from pathlib import Path | ||
from tempfile import TemporaryDirectory | ||
|
||
import geopandas as gpd | ||
import numpy as np | ||
import rasterio | ||
from beartype import beartype | ||
from beartype.typing import Literal, Tuple, Union | ||
from rasterio import profiles, transform | ||
|
||
from eis_toolkit.exceptions import InvalidParameterValueException | ||
from eis_toolkit.utilities.miscellaneous import row_points, toggle_gdal_exceptions | ||
from eis_toolkit.vector_processing.distance_computation import distance_computation | ||
|
||
THRESHOLD_CRITERIA_VALUE_TYPE = Union[Tuple[float, float], float] | ||
THRESHOLD_CRITERIA_TYPE = Literal["lower", "higher", "in_between", "outside"] | ||
|
||
|
||
@beartype | ||
def distance_to_anomaly( | ||
raster_profile: Union[profiles.Profile, dict], | ||
anomaly_raster_profile: Union[profiles.Profile, dict], | ||
anomaly_raster_data: np.ndarray, | ||
threshold_criteria_value: THRESHOLD_CRITERIA_VALUE_TYPE, | ||
threshold_criteria: THRESHOLD_CRITERIA_TYPE, | ||
) -> np.ndarray: | ||
"""Calculate distance from raster cell to nearest anomaly. | ||
The criteria for what is anomalous can be defined as a single number and | ||
criteria text of "higher" or "lower". Alternatively, the definition can be | ||
a range where values inside (criteria text of "within") or outside are | ||
marked as anomalous (criteria text of "outside"). | ||
Args: | ||
raster_profile: The raster profile of which the distances | ||
to the nearest anomalous value are determined. | ||
anomaly_raster: The raster in which the distances | ||
to the nearest anomalous value are determined. | ||
threshold_criteria_value: Value(s) used to define anomalous | ||
threshold_criteria: Method to define anomalous | ||
Returns: | ||
A 2D numpy array with the distances to anomalies computed. | ||
""" | ||
raster_width = raster_profile.get("width") | ||
raster_height = raster_profile.get("height") | ||
|
||
if not isinstance(raster_width, int) or not isinstance(raster_height, int): | ||
raise InvalidParameterValueException( | ||
f"Expected raster_profile to contain integer width and height. {raster_profile}" | ||
) | ||
|
||
raster_transform = raster_profile.get("transform") | ||
|
||
if not isinstance(raster_transform, transform.Affine): | ||
raise InvalidParameterValueException( | ||
f"Expected raster_profile to contain an affine transformation. {raster_profile}" | ||
) | ||
|
||
return _distance_to_anomaly( | ||
raster_profile=raster_profile, | ||
anomaly_raster_profile=anomaly_raster_profile, | ||
anomaly_raster_data=anomaly_raster_data, | ||
threshold_criteria=threshold_criteria, | ||
threshold_criteria_value=threshold_criteria_value, | ||
) | ||
|
||
|
||
@beartype | ||
def distance_to_anomaly_gdal( | ||
anomaly_raster_profile: Union[profiles.Profile, dict], | ||
anomaly_raster_data: np.ndarray, | ||
threshold_criteria_value: THRESHOLD_CRITERIA_VALUE_TYPE, | ||
threshold_criteria: THRESHOLD_CRITERIA_TYPE, | ||
output_path: Path, | ||
verbose: bool = False, | ||
) -> Path: | ||
"""Calculate distance from raster cell to nearest anomaly. | ||
Distance is calculated for each cell in the anomaly raster and saved to a | ||
new raster at output_path. The criteria for what is anomalous can be | ||
defined as a single number and criteria text of "higher" or "lower". | ||
Alternatively, the definition can be a range where values inside | ||
(criteria text of "within") or outside are marked as anomalous | ||
(criteria text of "outside"). | ||
Does not work on Windows. | ||
Args: | ||
anomaly_raster: The raster in which the distances | ||
to the nearest anomalous value are determined. | ||
threshold_criteria_value: Value(s) used to define anomalous | ||
threshold_criteria: Method to define anomalous | ||
output_path: The path to the raster with the distances to anomalies | ||
calculated. | ||
verbose: Whether to print gdal_proximity output. | ||
Returns: | ||
The path to the raster with the distances to anomalies calculated. | ||
""" | ||
return _distance_to_anomaly_gdal( | ||
output_path=output_path, | ||
anomaly_raster_profile=anomaly_raster_profile, | ||
anomaly_raster_data=anomaly_raster_data, | ||
threshold_criteria=threshold_criteria, | ||
threshold_criteria_value=threshold_criteria_value, | ||
verbose=verbose, | ||
) | ||
|
||
|
||
def _fits_criteria( | ||
threshold_criteria_value: THRESHOLD_CRITERIA_VALUE_TYPE, | ||
threshold_criteria: THRESHOLD_CRITERIA_TYPE, | ||
anomaly_raster_data: np.ndarray, | ||
) -> np.ndarray: | ||
criteria_dict = { | ||
"lower": lambda anomaly_raster_data: anomaly_raster_data < threshold_criteria_value, | ||
"higher": lambda anomaly_raster_data: anomaly_raster_data > threshold_criteria_value, | ||
"in_between": lambda anomaly_raster_data: np.where( | ||
np.logical_and(anomaly_raster_data > threshold_criteria[0], anomaly_raster_data < threshold_criteria[1]) | ||
), | ||
"outside": lambda anomaly_raster_data: np.where( | ||
np.logical_or(anomaly_raster_data < threshold_criteria[0], anomaly_raster_data > threshold_criteria[1]) | ||
), | ||
} | ||
return np.where(np.isnan(anomaly_raster_data), False, criteria_dict[threshold_criteria](anomaly_raster_data)) | ||
|
||
|
||
def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fits_criteria: np.ndarray): | ||
anomaly_raster_binary_path = tmp_dir / "anomaly_raster_binary.tif" | ||
|
||
anomaly_raster_binary_profile = anomaly_raster_profile | ||
anomaly_raster_binary_profile.update(dtype=rasterio.uint8, count=1, nodata=None) | ||
with rasterio.open(anomaly_raster_binary_path, mode="w", **anomaly_raster_binary_profile) as anomaly_raster_binary: | ||
anomaly_raster_binary.write(data_fits_criteria.astype(rasterio.uint8), 1) | ||
|
||
return anomaly_raster_binary_path | ||
|
||
|
||
def _distance_to_anomaly_gdal( | ||
anomaly_raster_profile: Union[profiles.Profile, dict], | ||
anomaly_raster_data: np.ndarray, | ||
threshold_criteria_value: Union[Tuple[float, float], float], | ||
threshold_criteria: THRESHOLD_CRITERIA_TYPE, | ||
output_path: Path, | ||
verbose: bool, | ||
): | ||
from osgeo_utils import gdal_proximity | ||
|
||
data_fits_criteria = _fits_criteria( | ||
threshold_criteria=threshold_criteria, | ||
threshold_criteria_value=threshold_criteria_value, | ||
anomaly_raster_data=anomaly_raster_data, | ||
) | ||
|
||
with TemporaryDirectory() as tmp_dir_str: | ||
tmp_dir = Path(tmp_dir_str) | ||
anomaly_raster_binary_path = _write_binary_anomaly_raster( | ||
tmp_dir=tmp_dir, anomaly_raster_profile=anomaly_raster_profile, data_fits_criteria=data_fits_criteria | ||
) | ||
with toggle_gdal_exceptions(): | ||
gdal_proximity.gdal_proximity( | ||
src_filename=str(anomaly_raster_binary_path), | ||
dst_filename=str(output_path), | ||
alg_options=("VALUES=1", "DISTUNITS=GEO"), | ||
quiet=not verbose, | ||
) | ||
|
||
return output_path | ||
|
||
|
||
def _distance_to_anomaly( | ||
raster_profile: Union[profiles.Profile, dict], | ||
anomaly_raster_profile: Union[profiles.Profile, dict], | ||
anomaly_raster_data: np.ndarray, | ||
threshold_criteria_value: Union[Tuple[float, float], float], | ||
threshold_criteria: THRESHOLD_CRITERIA_TYPE, | ||
) -> np.ndarray: | ||
data_fits_criteria = _fits_criteria( | ||
threshold_criteria=threshold_criteria, | ||
threshold_criteria_value=threshold_criteria_value, | ||
anomaly_raster_data=anomaly_raster_data, | ||
) | ||
|
||
cols = np.arange(anomaly_raster_data.shape[1]) | ||
rows = np.arange(anomaly_raster_data.shape[0]) | ||
|
||
all_points_by_rows = [ | ||
row_points(row=row, cols=cols[data_fits_criteria[row]], raster_transform=anomaly_raster_profile["transform"]) | ||
for row in rows | ||
] | ||
all_points = list(chain(*all_points_by_rows)) | ||
all_points_gdf = gpd.GeoDataFrame(geometry=all_points, crs=anomaly_raster_profile["crs"]) | ||
|
||
distance_array = distance_computation(raster_profile=raster_profile, geometries=all_points_gdf) | ||
|
||
return distance_array |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.