diff --git a/riskmapjnr/benchmark/__init__.py b/riskmapjnr/benchmark/__init__.py new file mode 100644 index 0000000..b8a2885 --- /dev/null +++ b/riskmapjnr/benchmark/__init__.py @@ -0,0 +1,8 @@ +"""Benchmark functions.""" + +from .download_gadm import download_gadm +from .rasterize_subjurisdictions import rasterize_subjurisdictions +from .vulnerability_classes import vulnerability_classes +from .defrate_per_class import defrate_per_class + +# End diff --git a/riskmapjnr/benchmark/defrate_per_class.py b/riskmapjnr/benchmark/defrate_per_class.py new file mode 100644 index 0000000..0fe9cba --- /dev/null +++ b/riskmapjnr/benchmark/defrate_per_class.py @@ -0,0 +1,169 @@ +"""Compute deforestation rates per vulnerability class.""" + +import numpy as np +from osgeo import gdal +import pandas as pd + +from ..misc import progress_bar, makeblock + + +def defrate_per_class( + fcc_file, + vulnerability_file, + time_interval, + period="calibration", + tab_file_defrate="defrate_per_class.csv", + blk_rows=128, + verbose=True): + """Compute deforestation rates per vulnerability class. + + This function computes the historical deforestation rates for each + vulnerability class. + + A ``.csv`` file with deforestation rates for each vulnerability + class is created (see ``tab_file_defrate``). + + :param fcc_file: Input raster file of forest cover change at three + dates (123). 1: first period deforestation, 2: second period + deforestation, 3: remaining forest at the end of the second + period. No data value must be 0 (zero). + + :param vulnerability_file: Input file with vulnerability classes. + + :param time_interval: Time interval (in years) for forest cover + change observations. + + :param period: Either "calibration" (from t1 to t2), "validation" + (or "confirmation" from t2 to t3), or "historical" (full + historical period from t1 to t3). Default to "calibration". + + :param tab_file_defrate: Path to the ``.csv`` output file with + estimates of deforestation rates for each vulnerability class. + + :param blk_rows: If > 0, number of rows for computation by block. + + :param verbose: Logical. Whether to print messages or not. Default + to ``True``. + + """ + + # ============================================================== + # Input rasters + # ============================================================== + + # Get fcc raster data + fcc_ds = gdal.Open(fcc_file) + fcc_band = fcc_ds.GetRasterBand(1) + + # Landscape variables + gt = fcc_ds.GetGeoTransform() + xres = gt[1] + yres = -gt[5] + + # Get defor_cat raster data + defor_cat_ds = gdal.Open(vulnerability_file) + defor_cat_band = defor_cat_ds.GetRasterBand(1) + + # Make blocks + blockinfo = makeblock(fcc_file, blk_rows=blk_rows) + nblock = blockinfo[0] + nblock_x = blockinfo[1] + x = blockinfo[3] + y = blockinfo[4] + nx = blockinfo[5] + ny = blockinfo[6] + + # ============================================== + # Compute deforestation rates per cat + # ============================================== + + # Number of deforestation categories + n_cat_max = 30999 + cat = [c + 1 for c in range(n_cat_max)] + + # Create a table to save the results + data = {"cat": cat, "nfor": 0, "ndefor": 0, + "rate_obs": 0.0, "rate_mod": 0.0} + df = pd.DataFrame(data) + + # Loop on blocks of data + for b in range(nblock): + # Progress bar + if verbose: + progress_bar(nblock, b + 1) + # Position + px = b % nblock_x + py = b // nblock_x + # Data + fcc_data = fcc_band.ReadAsArray(x[px], y[py], nx[px], ny[py]) + defor_cat_data = defor_cat_band.ReadAsArray( + x[px], y[py], nx[px], ny[py]) + # Defor data on period + if period == "calibration": + data_for = defor_cat_data[fcc_data > 0] + data_defor = defor_cat_data[fcc_data == 1] + elif period in ["validation", "confirmation"]: + data_for = defor_cat_data[fcc_data > 1] + data_defor = defor_cat_data[fcc_data == 2] + elif period == "historical": + data_for = defor_cat_data[fcc_data > 0] + data_defor = defor_cat_data[np.isin(fcc_data, [1, 2])] + # nfor_per_cat + cat_for = pd.Categorical(data_for.flatten(), categories=cat) + df["nfor"] += cat_for.value_counts().values + # ndefor_per_cat + cat_defor = pd.Categorical(data_defor.flatten(), categories=cat) + df["ndefor"] += cat_defor.value_counts().values + + # Remove classes with no forest + df = df[df["nfor"] != 0] + + # Annual deforestation rates per category + df["rate_obs"] = 1 - (1 - df["ndefor"] / df["nfor"]) ** (1 / time_interval) + + # Relative spatial deforestation probability from model + df["rate_mod"] = df["rate_obs"] + # df["rate_mod"] = ((df["cat"] - 1) * 999999 / 65534 + 1) * 1e-6 + # # Set proba of deforestation to 0 for category 1 + # df.loc[df["cat"] == 1, "rate_mod"] = 0 + + # Correction factor, either ndefor / sum_i p_i + # or theta * nfor / sum_i p_i + sum_ndefor = df["ndefor"].sum() + sum_pi = (df["nfor"] * df["rate_mod"]).sum() + correction_factor = sum_ndefor / sum_pi + + # Absolute deforestation probability + df["rate_abs"] = df["rate_mod"] * correction_factor + + # Time interval + df["time_interval"] = time_interval + + # Pixel area + pixel_area = xres * yres / 10000 + df["pixel_area"] = pixel_area + + # Deforestation density (ha/pixel/yr) + df["defor_dens"] = df["rate_abs"] * pixel_area / time_interval + + # Export the table of results + df.to_csv(tab_file_defrate, sep=",", header=True, + index=False, index_label=False) + + # Dereference drivers + del fcc_ds, defor_cat_ds + + +# # Test +# import os +# os.chdir("/home/ghislain/deforisk/MTQ_2000_2010_2020_jrc_7221/") +# defrate_per_class( +# fcc_file="data/forest/fcc123.tif", +# vulnerability_file="outputs/benchmark_model/vulnerability_classes.tif", +# time_interval=10, +# period="calibration", +# tab_file_defrate="outputs/benchmark_model/defrate_per_class.csv", +# blk_rows=128, +# verbose=True) + +# End diff --git a/riskmapjnr/benchmark/download_gadm.py b/riskmapjnr/benchmark/download_gadm.py new file mode 100644 index 0000000..d0c7d80 --- /dev/null +++ b/riskmapjnr/benchmark/download_gadm.py @@ -0,0 +1,28 @@ +"""Download GADM data.""" + +import os +from urllib.request import urlretrieve + + +def download_gadm(iso3, output_file): + """Download GADM data for a country. + + Download GADM (Global Administrative Areas) for a specific + country. See ``_\\ . + + :param iso3: Country ISO 3166-1 alpha-3 code. + + :param output_file: Path to output GPKG file. + + """ + + # Check for existing file + if not os.path.isfile(output_file): + + # Download the file from gadm.org + url = ("https://geodata.ucdavis.edu/gadm/gadm4.1/" + f"gpkg/gadm41_{iso3}.gpkg") + urlretrieve(url, output_file) + + +# End diff --git a/riskmapjnr/benchmark/rasterize_subjurisdictions.py b/riskmapjnr/benchmark/rasterize_subjurisdictions.py new file mode 100644 index 0000000..8124781 --- /dev/null +++ b/riskmapjnr/benchmark/rasterize_subjurisdictions.py @@ -0,0 +1,69 @@ +"""Resterizing subjurisdictions.""" + +import os + +from osgeo import gdal + + +def rasterize_subjurisdictions(input_file, fcc_file, output_file, + verbose=False): + """Rasterizing subjurisdictions. + + :param input_file: Input GPKG vector file with subjurisdictions. + :param fcc_file: Input fcc file for resolution and extent. + :param output_file: Output raster file with integer id for + subjurisdictions. + :param verbose: Logical. Whether to print messages or not. Default + to ``False``. + + """ + + # Output dir + out_dir = os.path.dirname(output_file) + + # Callback + cback = gdal.TermProgress if verbose else 0 + + # Raster info: extent, resolution, proj + fcc_ds = gdal.Open(fcc_file, gdal.GA_ReadOnly) + gt = fcc_ds.GetGeoTransform() + xmin = gt[0] + xres = gt[1] + ymax = gt[3] + yres = -gt[5] + xmax = xmin + xres * fcc_ds.RasterXSize + ymin = ymax - yres * fcc_ds.RasterYSize + extent = (xmin, ymin, xmax, ymax) + proj = fcc_ds.GetProjectionRef() + + # SQL statement to get id + sql_statement = ("select *, row_number() over () " + "as id from adm_adm_1") + + # Rasterize + param = gdal.RasterizeOptions( + outputBounds=extent, + targetAlignedPixels=True, + attribute="id", + outputSRS=proj, + noData=0, + xRes=xres, + yRes=yres, + SQLStatement=sql_statement, + SQLDialect="SQLite", + outputType=gdal.GDT_Byte, + creationOptions=["COMPRESS=DEFLATE", "BIGTIFF=YES"], + callback=cback) + gdal.Rasterize(output_file, input_file, options=param, callback=cback) + + +# # Test +# import os +# os.chdir("/home/ghislain/deforisk/MTQ_2000_2010_2020_jrc_7221/") +# rasterize_subjurisdictions( +# input_file="data_raw/gadm41_MTQ_0.gpkg", +# fcc_file="data/fcc.tif", +# output_file="outputs/benchmark_model/subj.tif", +# verbose=True) + +# End diff --git a/riskmapjnr/benchmark/vulnerability_classes.py b/riskmapjnr/benchmark/vulnerability_classes.py new file mode 100644 index 0000000..d34f7c3 --- /dev/null +++ b/riskmapjnr/benchmark/vulnerability_classes.py @@ -0,0 +1,157 @@ +"""Map with vulnerability classes.""" + +import math + +from osgeo import gdal +import pandas as pd + +from ..misc import progress_bar, makeblock + + +def vulnerability_classes( + dist_file, + dist_thresh, + subj_file, + output_file="dist_edge_cat.tif", + blk_rows=128, + verbose=True): + """Map with vulnerability classes. + + A geometric classification is used to convert distance to forest + edge into vulnerability classes. A raster file with vulnerability + classes is created combining distance to forest edge classes and + subjurisdiction identifiers. High values indicate higher + vulnerability. Raster type is UInt16 ([0, 65535]). NoData value is + set to 0. + + :param dist_file: Input file of distance to forest edge. + + :param dist_thresh: The distance threshold. Pixels beyond this + distance are the least vulnerable (category 1). + + :param subj_file: Input raster file with subjurisdiction + identifiers. + + :param blk_rows: If > 0, number of rows for computation by block. + + :param verbose: Logical. Whether to print messages or not. Default + to ``True``. + + :return: Bins used to categorize the deforestation risk based on + the distance to forest edge. + + """ + + # ============================================================== + # Input rasters + # ============================================================== + + # Distance to forest edge raster file + dist_ds = gdal.Open(dist_file) + dist_band = dist_ds.GetRasterBand(1) + # Raster size + xsize = dist_band.XSize + ysize = dist_band.YSize + + # Subjurisdictions raster file + subj_ds = gdal.Open(subj_file) + subj_band = subj_ds.GetRasterBand(1) + + # Minimal distance + gt = dist_ds.GetGeoTransform() + xres = gt[1] + yres = -gt[5] + dist_min = min(xres, yres) + + # Make blocks + blockinfo = makeblock(dist_file, blk_rows=blk_rows) + nblock = blockinfo[0] + nblock_x = blockinfo[1] + x = blockinfo[3] + y = blockinfo[4] + nx = blockinfo[5] + ny = blockinfo[6] + + # ================================================= + # Categorical raster file for vulnerability classes + # ================================================= + + # Create categorical (cat) raster file for deforestation risk + driver = gdal.GetDriverByName("GTiff") + cat_ds = driver.Create(output_file, xsize, ysize, 1, + gdal.GDT_UInt16, + ["COMPRESS=DEFLATE", "PREDICTOR=2", + "BIGTIFF=YES"]) + cat_ds.SetProjection(dist_ds.GetProjection()) + cat_ds.SetGeoTransform(dist_ds.GetGeoTransform()) + cat_band = cat_ds.GetRasterBand(1) + cat_band.SetNoDataValue(0) + + # ================= + # Compute bins + # ================= + + # Distance classes with geometric classification + n_classes = 29 + ratio = math.pow(dist_min / dist_thresh, 1/n_classes) + bins = [dist_thresh * math.pow(ratio, n_classes - i) + for i in range(n_classes + 1)] + # Correction for dist_min + bins[0] = dist_min + + # ================= + # Categorizing + # ================= + + # Loop on blocks of data + for b in range(nblock): + # Progress bar + if verbose: + progress_bar(nblock, b + 1) + # Position + px = b % nblock_x + py = b // nblock_x + # Data + dist_data = dist_band.ReadAsArray(x[px], y[py], nx[px], ny[py]) + subj_data = subj_band.ReadAsArray(x[px], y[py], nx[px], ny[py]) + # Categorize + cat_data = pd.cut(dist_data.flatten(), bins=bins, + labels=False, include_lowest=True, + right=True) + cat_data = cat_data.reshape(dist_data.shape) + cat_data = n_classes + 1 - cat_data + # Set classes 0 and 1 + cat_data[dist_data == 0] = 0 + cat_data[dist_data > dist_thresh] = 1 + # Adding subjurisdiction info + cat_data = cat_data * 1000 + subj_data + cat_data[cat_data <= 1000] = 0 + # Write to file + cat_band.WriteArray(cat_data, x[px], y[py]) + + # Compute statistics + cat_band.FlushCache() + cb = gdal.TermProgress if verbose else 0 + cat_band.ComputeStatistics(False, cb) + + # Dereference drivers + cat_band = None + dist_band = None + subj_band = None + del cat_ds, dist_ds, subj_ds + + return bins + + +# # Test +# import os +# os.chdir("/home/ghislain/deforisk/MTQ_2000_2010_2020_jrc_7221/") +# bins = vulnerability_classes( +# dist_file="data/dist_edge.tif", +# dist_thresh=240, +# subj_file="outputs/benchmark_model/subjurisdictions.tif", +# output_file="outputs/benchmark_model/vulnerability_classes.tif", +# blk_rows=128, +# verbose=False) + +# End diff --git a/riskmapjnr/misc/__init__.py b/riskmapjnr/misc/__init__.py index 217b685..9b6903a 100644 --- a/riskmapjnr/misc/__init__.py +++ b/riskmapjnr/misc/__init__.py @@ -1,17 +1,9 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# =================================================================== -# author :Ghislain Vieilledent -# email :ghislain.vieilledent@cirad.fr, ghislainv@gmail.com -# web :https://ecology.ghislainv.fr -# python_version :>=3 -# license :GPLv3 -# =================================================================== +"""Miscellaneous functions.""" from .miscellaneous import invlogit, make_dir, tree from .miscellaneous import makeblock, progress_bar from .miscellaneous import make_square, rescale +from .get_vector_extent import get_vector_extent from .countpix import countpix # EOF diff --git a/riskmapjnr/misc/get_vector_extent.py b/riskmapjnr/misc/get_vector_extent.py new file mode 100644 index 0000000..969a455 --- /dev/null +++ b/riskmapjnr/misc/get_vector_extent.py @@ -0,0 +1,52 @@ +"""Get the extent of a shapefile.""" + +from osgeo import ogr + + +def get_vector_extent(input_file): + """Compute the extent of a vector file. + + This function computes the extent (xmin, ymin, xmax, ymax) of a + shapefile. + + :param input_file: Path to the input vector file. + + :return: The extent as a tuple (xmin, ymin, xmax, ymax). + + """ + + in_data_dource = ogr.Open(input_file) + in_layer = in_data_dource.GetLayer() + extent = in_layer.GetExtent() + extent = (extent[0], extent[2], extent[1], extent[3]) + + return extent # (xmin, ymin, xmax, ymax) + + +def extent_shp(input_file): + """Compute the extent of a vector file. + + .. deprecated:: 1.2 + Use :func:`forestatrisk.get_vector_extent`. + + .. warning:: + Will be removed in future versions. + + This function computes the extent (xmin, ymin, xmax, ymax) of a + shapefile. + + :param input_file: Path to the input vector file. + + :return: The extent as a tuple (xmin, ymin, xmax, ymax). + + """ + + in_data_dource = ogr.Open(input_file) + in_layer = in_data_dource.GetLayer() + extent = in_layer.GetExtent() + extent = (extent[0], extent[2], extent[1], extent[3]) + + return extent # (xmin, ymin, xmax, ymax) + + +# End diff --git a/setup.py b/setup.py index f1dbbbc..70fa9fb 100644 --- a/setup.py +++ b/setup.py @@ -16,15 +16,17 @@ # find_version -def find_version(): +def find_version(pkg_name): """Finding package version.""" - with open("riskmapjnr/__init__.py", encoding="utf-8") as init_file: + with open(f"{pkg_name}/__init__.py", encoding="utf-8") as init_file: init_text = init_file.read() - far_version = re.search('^__version__\\s*=\\s*"(.*)"', init_text, re.M).group(1) - return far_version + _version = (re.search('^__version__\\s*=\\s*"(.*)"', + init_text, re.M) + .group(1)) + return _version -version = find_version() +version = find_version("riskmapjnr") # reStructuredText README file with io.open("README.rst", encoding="utf-8") as f: @@ -57,7 +59,7 @@ def find_version(): "redd risk tropics vcs", python_requires=">=3.6", packages=find_packages(), - package_dir={"riskmapjnr": "./riskmapjnr"}, + package_dir={"riskmapjnr": "riskmapjnr"}, package_data={ "riskmapjnr": ["data/fcc123_GLP.tif", "data/ctry_border_GLP.gpkg"] @@ -66,8 +68,13 @@ def find_version(): entry_points={ "console_scripts": ["riskmapjnr = riskmapjnr.riskmapjnr:main"] }, - install_requires=["gdal", "numpy", "matplotlib", - "pandas", "scipy"], + install_requires=[ + "gdal", + "numpy", + "matplotlib", + "pandas", + "scipy", + ], extras_require={ "interactive": ["jupyter", "geopandas", "descartes", "folium", "tabulate"]