Wrap grdhisteq (#1433)

Meghan Jones · weiji14 · seisman · web-flow · commit 418640059915 · 2022-03-10T00:07:01.000+08:00
Co-authored-by: Wei Ji &lt;23487320+weiji14@users.noreply.github.com&gt;
Co-authored-by: Dongdong Tian &lt;seisman.info@gmail.com&gt;
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -138,6 +138,9 @@ Operations on raster data
     grdfill
     grdfilter
     grdgradient
+    grdhisteq
+    grdhisteq.equalize_grid
+    grdhisteq.compute_bins
     grdlandmask
     grdproject
     grdsample
diff --git a/pygmt/__init__.py b/pygmt/__init__.py
@@ -40,6 +40,7 @@
     grdfill,
     grdfilter,
     grdgradient,
+    grdhisteq,
     grdinfo,
     grdlandmask,
     grdproject,
diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py
@@ -17,6 +17,7 @@
 from pygmt.src.grdfill import grdfill
 from pygmt.src.grdfilter import grdfilter
 from pygmt.src.grdgradient import grdgradient
+from pygmt.src.grdhisteq import grdhisteq
 from pygmt.src.grdimage import grdimage
 from pygmt.src.grdinfo import grdinfo
 from pygmt.src.grdlandmask import grdlandmask
diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py
@@ -0,0 +1,349 @@
+"""
+grdhisteq - Perform histogram equalization for a grid.
+"""
+import warnings
+
+import numpy as np
+import pandas as pd
+from pygmt.clib import Session
+from pygmt.exceptions import GMTInvalidInput
+from pygmt.helpers import (
+    GMTTempFile,
+    build_arg_string,
+    fmt_docstring,
+    kwargs_to_strings,
+    use_alias,
+)
+from pygmt.io import load_dataarray
+
+__doctest_skip__ = ["grdhisteq.*"]
+
+
+class grdhisteq:  # pylint: disable=invalid-name
+    r"""
+    Perform histogram equalization for a grid.
+
+    Two common use cases of :meth:`pygmt.grdhisteq` are to find data values
+    that divide a grid into patches of equal area
+    (:meth:`pygmt.grdhisteq.compute_bins`) or to write a grid with
+    statistics based on some kind of cumulative distribution function
+    (:meth:`pygmt.grdhisteq.equalize_grid`).
+
+    Histogram equalization provides a way to highlight data that has most
+    values clustered in a small portion of the dynamic range, such as a
+    grid of flat topography with a mountain in the middle. Ordinary gray
+    shading of this grid (using :meth:`pygmt.Figure.grdimage` or
+    :meth:`pygmt.Figure.grdview`) with a linear mapping from topography to
+    graytone will result in most of the image being very dark gray, with the
+    mountain being almost white. :meth:`pygmt.grdhisteq.compute_bins` can
+    provide a list of data values that divide the data range into divisions
+    which have an equal area in the image [Default is 16 if ``divisions`` is
+    not set]. The :class:`pandas.DataFrame` or ASCII file output can be used to
+    make a colormap with :meth:`pygmt.makecpt` and an image with
+    :meth:`pygmt.Figure.grdimage` that has all levels of gray occuring
+    equally.
+
+    :meth:`pygmt.grdhisteq.equalize_grid` provides a way to write a grid with
+    statistics based on a cumulative distribution function. In this
+    application, the ``outgrid`` has relative highs and lows in the same
+    (x,y) locations as the ``grid``, but the values are changed to reflect
+    their place in the cumulative distribution.
+    """
+
+    @staticmethod
+    @fmt_docstring
+    @use_alias(
+        C="divisions",
+        D="outfile",
+        G="outgrid",
+        R="region",
+        N="gaussian",
+        Q="quadratic",
+        V="verbose",
+        h="header",
+    )
+    @kwargs_to_strings(R="sequence")
+    def _grdhisteq(grid, output_type, **kwargs):
+        r"""
+        Perform histogram equalization for a grid.
+
+        Must provide ``outfile`` or ``outgrid``.
+
+        Full option list at :gmt-docs:`grdhisteq.html`
+
+        {aliases}
+
+        Parameters
+        ----------
+        grid : str or xarray.DataArray
+            The file name of the input grid or the grid loaded as a DataArray.
+        outgrid : str or bool or None
+            The name of the output netCDF file with extension .nc to store the
+            grid in.
+        outfile : str or bool or None
+            The name of the output ASCII file to store the results of the
+            histogram equalization in.
+        output_type: str
+            Determines the output type. Use "file", "xarray", "pandas", or
+            "numpy".
+        divisions : int
+            Set the number of divisions of the data range [Default is 16].
+
+        {R}
+        {V}
+        {h}
+
+        Returns
+        -------
+        ret: pandas.DataFrame or xarray.DataArray or None
+            Return type depends on whether the ``outgrid`` parameter is set:
+
+            - xarray.DataArray if ``output_type`` is "xarray""
+            - numpy.ndarray if ``output_type`` is "numpy"
+            - pandas.DataFrame if ``output_type`` is "pandas"
+            - None if ``output_type`` is "file" (output is stored in
+              ``outgrid`` or ``outfile``)
+
+        See Also
+        -------
+        :meth:`pygmt.grd2cpt`
+        """
+
+        with Session() as lib:
+            file_context = lib.virtualfile_from_data(check_kind="raster", data=grid)
+            with file_context as infile:
+                arg_str = " ".join([infile, build_arg_string(kwargs)])
+                lib.call_module("grdhisteq", arg_str)
+
+        if output_type == "file":
+            return None
+        if output_type == "xarray":
+            return load_dataarray(kwargs["G"])
+
+        result = pd.read_csv(
+            filepath_or_buffer=kwargs["D"],
+            sep="\t",
+            header=None,
+            names=["start", "stop", "bin_id"],
+            dtype={
+                "start": np.float32,
+                "stop": np.float32,
+                "bin_id": np.uint32,
+            },
+        )
+        if output_type == "numpy":
+            return result.to_numpy()
+
+        return result.set_index("bin_id")
+
+    @staticmethod
+    @fmt_docstring
+    def equalize_grid(
+        grid,
+        *,
+        outgrid=True,
+        divisions=None,
+        region=None,
+        gaussian=None,
+        quadratic=None,
+        verbose=None,
+    ):
+        r"""
+        Perform histogram equalization for a grid.
+
+        :meth:`pygmt.grdhisteq.equalize_grid` provides a way to write a grid
+        with statistics based on a cumulative distribution function. The
+        ``outgrid`` has relative highs and lows in the same (x,y) locations as
+        the ``grid``, but the values are changed to reflect their place in the
+        cumulative distribution.
+
+        Full option list at :gmt-docs:`grdhisteq.html`
+
+        Parameters
+        ----------
+        grid : str or xarray.DataArray
+            The file name of the input grid or the grid loaded as a DataArray.
+        outgrid : str or bool or None
+            The name of the output netCDF file with extension .nc to store the
+            grid in.
+        divisions : int
+            Set the number of divisions of the data range.
+        gaussian : bool or int or float
+            *norm*.
+            Produce an output grid with standard normal scores using
+            ``gaussian=True`` or force the scores to fall in the ±\ *norm*
+            range.
+        quadratic: bool
+            Perform quadratic equalization [Default is linear].
+        {R}
+        {V}
+
+        Returns
+        -------
+        ret: xarray.DataArray or None
+            Return type depends on the ``outgrid`` parameter:
+
+            - xarray.DataArray if ``outgrid`` is True or None
+            - None if ``outgrid`` is a str (grid output is stored in
+              ``outgrid``)
+
+        Example
+        -------
+        >>> import pygmt
+        >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to
+        >>> # 30, and a y-range of 15 to 25
+        >>> grid = pygmt.datasets.load_earth_relief(
+        ...     resolution="30m", region=[10, 30, 15, 25]
+        ... )
+        >>> # Create a new grid with a Gaussian data distribution
+        >>> grid = pygmt.grdhisteq.equalize_grid(grid=grid, gaussian=True)
+
+        See Also
+        -------
+        :meth:`pygmt.grd2cpt`
+
+        Notes
+        -----
+        This method does a weighted histogram equalization for geographic
+        grids to account for node area varying with latitude.
+        """
+        # Return an xarray.DataArray if ``outgrid`` is not set
+        with GMTTempFile(suffix=".nc") as tmpfile:
+            if isinstance(outgrid, str):
+                output_type = "file"
+            else:
+                output_type = "xarray"
+                outgrid = tmpfile.name
+            return grdhisteq._grdhisteq(
+                grid=grid,
+                output_type=output_type,
+                outgrid=outgrid,
+                divisions=divisions,
+                region=region,
+                gaussian=gaussian,
+                quadratic=quadratic,
+                verbose=verbose,
+            )
+
+    @staticmethod
+    @fmt_docstring
+    def compute_bins(
+        grid,
+        *,
+        output_type="pandas",
+        outfile=None,
+        divisions=None,
+        quadratic=None,
+        verbose=None,
+        region=None,
+        header=None,
+    ):
+        r"""
+        Perform histogram equalization for a grid.
+
+        Histogram equalization provides a way to highlight data that has most
+        values clustered in a small portion of the dynamic range, such as a
+        grid of flat topography with a mountain in the middle. Ordinary gray
+        shading of this grid (using :meth:`pygmt.Figure.grdimage` or
+        :meth:`pygmt.Figure.grdview`) with a linear mapping from topography to
+        graytone will result in most of the image being very dark gray, with
+        the mountain being almost white. :meth:`pygmt.grdhisteq.compute_bins`
+        can provide a list of data values that divide the data range into
+        divisions which have an equal area in the image [Default is 16 if
+        ``divisions`` is not set]. The :class:`pandas.DataFrame` or ASCII file
+        output can be used to make a colormap with :meth:`pygmt.makecpt` and an
+        image with :meth:`pygmt.Figure.grdimage` that has all levels of gray
+        occuring equally.
+
+        Full option list at :gmt-docs:`grdhisteq.html`
+
+        Parameters
+        ----------
+        grid : str or xarray.DataArray
+            The file name of the input grid or the grid loaded as a DataArray.
+        outfile : str or bool or None
+            The name of the output ASCII file to store the results of the
+            histogram equalization in.
+        output_type : str
+            Determine the format the xyz data will be returned in [Default is
+            ``pandas``]:
+
+                - ``numpy`` - :class:`numpy.ndarray`
+                - ``pandas``- :class:`pandas.DataFrame`
+                - ``file`` - ASCII file (requires ``outfile``)
+        divisions : int
+            Set the number of divisions of the data range.
+        quadratic : bool
+            Perform quadratic equalization [Default is linear].
+        {R}
+        {V}
+        {h}
+
+        Returns
+        -------
+        ret: pandas.DataFrame or None
+            Return type depends on the ``outfile`` parameter:
+
+            - pandas.DataFrame if ``outfile`` is True or None
+            - None if ``outfile`` is a str (file output is stored in
+              ``outfile``)
+
+        Example
+        -------
+        >>> import pygmt
+        >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to
+        >>> # 30, and a y-range of 15 to 25
+        >>> grid = pygmt.datasets.load_earth_relief(
+        ...     resolution="30m", region=[10, 30, 15, 25]
+        ... )
+        >>> # Find elevation intervals that splits the data range into 5
+        >>> # divisions, each of which have an equal area in the original grid.
+        >>> bins = pygmt.grdhisteq.compute_bins(grid=grid, divisions=5)
+        >>> print(bins)
+                start    stop
+        bin_id
+        0       179.0   397.5
+        1       397.5   475.5
+        2       475.5   573.5
+        3       573.5   710.5
+        4       710.5  2103.0
+
+        See Also
+        -------
+        :meth:`pygmt.grd2cpt`
+
+        Notes
+        -----
+        This method does a weighted histogram equalization for geographic
+        grids to account for node area varying with latitude.
+        """
+        # Return a pandas.DataFrame if ``outfile`` is not set
+        if output_type not in ["numpy", "pandas", "file"]:
+            raise GMTInvalidInput(
+                "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'."
+            )
+
+        if header is not None and output_type != "file":
+            raise GMTInvalidInput("'header' is only allowed with output_type='file'.")
+
+        if isinstance(outfile, str) and output_type != "file":
+            msg = (
+                f"Changing 'output_type' from '{output_type}' to 'file' "
+                "since 'outfile' parameter is set. Please use output_type='file' "
+                "to silence this warning."
+            )
+            warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
+            output_type = "file"
+        with GMTTempFile(suffix=".txt") as tmpfile:
+            if output_type != "file":
+                outfile = tmpfile.name
+            return grdhisteq._grdhisteq(
+                grid,
+                output_type=output_type,
+                outfile=outfile,
+                divisions=divisions,
+                quadratic=quadratic,
+                verbose=verbose,
+                region=region,
+                header=header,
+            )
diff --git a/pygmt/tests/test_grdhisteq.py b/pygmt/tests/test_grdhisteq.py