From 86fad60eee4f9dc90df5549ceab96a9e02a7a9ff Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 11 Mar 2024 13:12:23 +0800 Subject: [PATCH 1/6] pygmt.grd2xyz: Refactor using the virtualfile_to_dataset method and get rid of temporary output files --- pygmt/src/grd2xyz.py | 79 +++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index eade93473c2..72d9d09f946 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -2,12 +2,13 @@ grd2xyz - Convert grid to data table """ +from typing import Literal + import pandas as pd import xarray as xr from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, @@ -33,7 +34,12 @@ s="skiprows", ) @kwargs_to_strings(R="sequence", o="sequence_comma") -def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): +def grd2xyz( + grid, + output_type: Literal["pandas", "numpy", "file"] = "pandas", + outfile: str | None = None, + **kwargs, +) -> pd.DataFrame | xr.DataArray | None: r""" Convert grid to data table. @@ -47,15 +53,14 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): Parameters ---------- {grid} - output_type : str - Determine the format the xyz data will be returned in [Default is - ``pandas``]: - - - ``numpy`` - :class:`numpy.ndarray` - - ``pandas``- :class:`pandas.DataFrame` - - ``file`` - ASCII file (requires ``outfile``) - outfile : str - The file name for the output ASCII file. + output_type + Desired output type of the result data. + - ``pandas`` will return a :class:`pandas.DataFrame` object. + - ``numpy`` will return a :class:`numpy.ndarray` object. + - ``file`` will save the result to the file given by the ``outfile`` parameter. + outfile + The file name for saving the result. If specified, ``output_type`` will be + forced to be ``"file"``. cstyle : str [**f**\|\ **i**]. Replace the x- and y-coordinates on output with the corresponding @@ -118,13 +123,11 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): Returns ------- - ret : pandas.DataFrame or numpy.ndarray or None + ret Return type depends on ``outfile`` and ``output_type``: - - - None if ``outfile`` is set (output will be stored in file set by - ``outfile``) - - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is - not set (depends on ``output_type``) + - None if ``outfile`` is set (output will be stored in file set by ``outfile``) + - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set + (depends on ``output_type``) Example ------- @@ -149,31 +152,23 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): "or 'file'." ) - # Set the default column names for the pandas dataframe header - dataframe_header = ["x", "y", "z"] # Let output pandas column names match input DataArray dimension names - if isinstance(grid, xr.DataArray) and output_type == "pandas": + if output_type == "pandas" and isinstance(grid, xr.DataArray): # Reverse the dims because it is rows, columns ordered. - dataframe_header = [grid.dims[1], grid.dims[0], grid.name] - - with GMTTempFile() as tmpfile: - with Session() as lib: - with lib.virtualfile_in(check_kind="raster", data=grid) as vingrd: - if outfile is None: - outfile = tmpfile.name - lib.call_module( - module="grd2xyz", - args=build_arg_string(kwargs, infile=vingrd, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - result = pd.read_csv( - tmpfile.name, sep="\t", names=dataframe_header, comment=">" + column_names = [grid.dims[1], grid.dims[0], grid.name] + else: + # Set the default column names for the pandas dataframe header. + column_names = ["x", "y", "z"] + + with Session() as lib: + with ( + lib.virtualfile_in(check_kind="raster", data=grid) as vingrd, + lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl, + ): + lib.call_module( + module="grd2xyz", + args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl), + ) + return lib.virtualfile_to_dataset( + output_type=output_type, vfname=vouttbl, column_names=column_names ) - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None - - if output_type == "numpy": - result = result.to_numpy() - return result From 9999818ff056507b683b827007b045c419c9e399 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 11 Mar 2024 14:08:11 +0800 Subject: [PATCH 2/6] Remove tests that are not directly related to grd2xyz --- pygmt/tests/test_grd2xyz.py | 41 ------------------------------------- 1 file changed, 41 deletions(-) diff --git a/pygmt/tests/test_grd2xyz.py b/pygmt/tests/test_grd2xyz.py index b6f8e92c1ea..40a22ff339d 100644 --- a/pygmt/tests/test_grd2xyz.py +++ b/pygmt/tests/test_grd2xyz.py @@ -2,14 +2,11 @@ Test pygmt.grd2xyz. """ -from pathlib import Path - import numpy as np import pandas as pd import pytest from pygmt import grd2xyz from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import GMTTempFile from pygmt.helpers.testing import load_static_earth_relief @@ -52,44 +49,6 @@ def test_grd2xyz_format(grid): assert list(xyz_df.columns) == ["lon", "lat", "z"] -def test_grd2xyz_file_output(grid): - """ - Test that grd2xyz returns a file output when it is specified. - """ - with GMTTempFile(suffix=".xyz") as tmpfile: - result = grd2xyz(grid=grid, outfile=tmpfile.name, output_type="file") - assert result is None # return value is None - assert Path(tmpfile.name).stat().st_size > 0 # check that outfile exists - - -def test_grd2xyz_invalid_format(grid): - """ - Test that grd2xyz fails with incorrect format. - """ - with pytest.raises(GMTInvalidInput): - grd2xyz(grid=grid, output_type=1) - - -def test_grd2xyz_no_outfile(grid): - """ - Test that grd2xyz fails when a string output is set with no outfile. - """ - with pytest.raises(GMTInvalidInput): - grd2xyz(grid=grid, output_type="file") - - -def test_grd2xyz_outfile_incorrect_output_type(grid): - """ - Test that grd2xyz raises a warning when an outfile filename is set but the - output_type is not set to 'file'. - """ - with pytest.warns(RuntimeWarning): - with GMTTempFile(suffix=".xyz") as tmpfile: - result = grd2xyz(grid=grid, outfile=tmpfile.name, output_type="numpy") - assert result is None # return value is None - assert Path(tmpfile.name).stat().st_size > 0 # check that outfile exists - - def test_grd2xyz_pandas_output_with_o(grid): """ Test that grd2xyz fails when outcols is set and output_type is set to 'pandas'. From 868529ce456378494888c29cfed5054e7238d9e4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 11 Mar 2024 21:11:31 +0800 Subject: [PATCH 3/6] Fix --- pygmt/src/grd2xyz.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 72d9d09f946..7cf5a249cca 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -55,12 +55,13 @@ def grd2xyz( {grid} output_type Desired output type of the result data. + - ``pandas`` will return a :class:`pandas.DataFrame` object. - ``numpy`` will return a :class:`numpy.ndarray` object. - ``file`` will save the result to the file given by the ``outfile`` parameter. outfile - The file name for saving the result. If specified, ``output_type`` will be - forced to be ``"file"``. + File name for saving the result data. Required if ``output_type`` is ``"file"``. + If specified, ``output_type`` will be forced to be ``"file"``. cstyle : str [**f**\|\ **i**]. Replace the x- and y-coordinates on output with the corresponding @@ -125,6 +126,7 @@ def grd2xyz( ------- ret Return type depends on ``outfile`` and ``output_type``: + - None if ``outfile`` is set (output will be stored in file set by ``outfile``) - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set (depends on ``output_type``) From 37e48b8dce433c2326a679fbb7fc679cb8790ca5 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 11 Mar 2024 23:14:10 +0800 Subject: [PATCH 4/6] Combine two grd2xyz tests into a single one --- pygmt/tests/test_grd2xyz.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/pygmt/tests/test_grd2xyz.py b/pygmt/tests/test_grd2xyz.py index 40a22ff339d..ab3feccf80c 100644 --- a/pygmt/tests/test_grd2xyz.py +++ b/pygmt/tests/test_grd2xyz.py @@ -21,32 +21,17 @@ def fixture_grid(): @pytest.mark.benchmark def test_grd2xyz(grid): """ - Make sure grd2xyz works as expected. + Test the basic functionality of grd2xyz. """ - xyz_data = grd2xyz(grid=grid, output_type="numpy") - assert xyz_data.shape == (112, 3) - + xyz_df = grd2xyz(grid=grid) + assert isinstance(xyz_df, pd.DataFrame) + assert list(xyz_df.columns) == ["lon", "lat", "z"] + assert xyz_df.shape == (112, 3) -def test_grd2xyz_format(grid): - """ - Test that correct formats are returned. - """ - lon = -50.5 - lat = -18.5 + lon, lat = -50.5, -18.5 orig_val = grid.sel(lon=lon, lat=lat).to_numpy() - xyz_default = grd2xyz(grid=grid) - xyz_val = xyz_default[(xyz_default["lon"] == lon) & (xyz_default["lat"] == lat)][ - "z" - ].to_numpy() - assert isinstance(xyz_default, pd.DataFrame) - assert orig_val.size == 1 - assert xyz_val.size == 1 + xyz_val = xyz_df[(xyz_df["lon"] == lon) & (xyz_df["lat"] == lat)]["z"].to_numpy() np.testing.assert_allclose(orig_val, xyz_val) - xyz_array = grd2xyz(grid=grid, output_type="numpy") - assert isinstance(xyz_array, np.ndarray) - xyz_df = grd2xyz(grid=grid, output_type="pandas", outcols=None) - assert isinstance(xyz_df, pd.DataFrame) - assert list(xyz_df.columns) == ["lon", "lat", "z"] def test_grd2xyz_pandas_output_with_o(grid): From 218efcfefd1fa5678b5e6504d4606cd66ca9ef43 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 12 Mar 2024 09:56:05 +0800 Subject: [PATCH 5/6] Type hints for column names --- pygmt/src/grd2xyz.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 7cf5a249cca..7e289d4b587 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -2,7 +2,7 @@ grd2xyz - Convert grid to data table """ -from typing import Literal +from typing import TYPE_CHECKING, Literal import pandas as pd import xarray as xr @@ -16,6 +16,9 @@ validate_output_table_type, ) +if TYPE_CHECKING: + from collections.abc import Hashable + __doctest_skip__ = ["grd2xyz"] @@ -154,13 +157,12 @@ def grd2xyz( "or 'file'." ) + # Set the default column names for the pandas dataframe header. + column_names: list[Hashable] = ["x", "y", "z"] # Let output pandas column names match input DataArray dimension names if output_type == "pandas" and isinstance(grid, xr.DataArray): # Reverse the dims because it is rows, columns ordered. column_names = [grid.dims[1], grid.dims[0], grid.name] - else: - # Set the default column names for the pandas dataframe header. - column_names = ["x", "y", "z"] with Session() as lib: with ( From 5bfe4032cb14ec3eadd4d3c9b82fb7b6b55326db Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 13 Mar 2024 13:10:56 +0800 Subject: [PATCH 6/6] Use placeholders for output_type and outfile so that the docstrings can be reused --- pygmt/helpers/decorators.py | 12 ++++++++++++ pygmt/src/grd2xyz.py | 11 ++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pygmt/helpers/decorators.py b/pygmt/helpers/decorators.py index 046cffa5514..28041911d23 100644 --- a/pygmt/helpers/decorators.py +++ b/pygmt/helpers/decorators.py @@ -254,6 +254,18 @@ input and skip trailing text. **Note**: If ``incols`` is also used then the columns given to ``outcols`` correspond to the order after the ``incols`` selection has taken place.""", + "outfile": """ + outfile + File name for saving the result data. Required if ``output_type="file"``. + If specified, ``output_type`` will be forced to be ``"file"``.""", + "output_type": """ + output_type + Desired output type of the result data. + + - ``pandas`` will return a :class:`pandas.DataFrame` object. + - ``numpy`` will return a :class:`numpy.ndarray` object. + - ``file`` will save the result to the file specified by the ``outfile`` + parameter.""", "outgrid": """ outgrid : str or None Name of the output netCDF grid file. For writing a specific grid diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 7e289d4b587..17cfcb246bc 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -56,15 +56,8 @@ def grd2xyz( Parameters ---------- {grid} - output_type - Desired output type of the result data. - - - ``pandas`` will return a :class:`pandas.DataFrame` object. - - ``numpy`` will return a :class:`numpy.ndarray` object. - - ``file`` will save the result to the file given by the ``outfile`` parameter. - outfile - File name for saving the result data. Required if ``output_type`` is ``"file"``. - If specified, ``output_type`` will be forced to be ``"file"``. + {output_type} + {outfile} cstyle : str [**f**\|\ **i**]. Replace the x- and y-coordinates on output with the corresponding