Skip to content

Refactor info to allow datetime inputs from xarray.Dataset and pandas.DataFrame tables #619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 21, 2020
18 changes: 11 additions & 7 deletions pygmt/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ def info(table, **kwargs):

Parameters
----------
table : pandas.DataFrame or np.ndarray or str
Either a pandas dataframe, a 1D/2D numpy.ndarray or a file name to an
ASCII data table.
table : str or np.ndarray or pandas.DataFrame or xarray.Dataset
Pass in either a file name to an ASCII data table, a 1D/2D numpy array,
a pandas dataframe, or an xarray dataset made up of 1D xarray.DataArray
data variables.
per_column : bool
Report the min/max values per column in separate columns.
spacing : str
Expand All @@ -107,10 +108,13 @@ def info(table, **kwargs):
if kind == "file":
file_context = dummy_context(table)
elif kind == "matrix":
_table = np.asanyarray(table)
if table.ndim == 1: # 1D arrays need to be 2D and transposed
_table = np.transpose(np.atleast_2d(_table))
file_context = lib.virtualfile_from_matrix(_table)
try:
# pandas.DataFrame and xarray.Dataset types
arrays = [array for _, array in table.items()]
except AttributeError:
# Python lists, tuples, and numpy ndarray types
arrays = np.atleast_2d(np.asanyarray(table).T)
file_context = lib.virtualfile_from_vectors(*arrays)
else:
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")

Expand Down
53 changes: 49 additions & 4 deletions pygmt/tests/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@
import pandas as pd
import pytest
import xarray as xr
from packaging.version import Version

from .. import info
from .. import clib, info
from ..exceptions import GMTInvalidInput

TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt")

with clib.Session() as _lib:
gmt_version = Version(_lib.info["version"])


def test_info():
"Make sure info works on file name inputs"
Expand All @@ -33,7 +37,48 @@ def test_info_dataframe():
table = pd.read_csv(POINTS_DATA, sep=" ", header=None)
output = info(table=table)
expected_output = (
"<matrix memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
"<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
)
assert output == expected_output


@pytest.mark.xfail(
condition=gmt_version <= Version("6.1.1"),
reason="UNIX timestamps returned instead of ISO datetime, should work on GMT 6.2.0 "
"after https://github.com/GenericMappingTools/gmt/issues/4241 is resolved",
)
def test_info_pandas_dataframe_time_column():
"Make sure info works on pandas.DataFrame inputs with a time column"
table = pd.DataFrame(
data={
"z": [10, 13, 12, 15, 14],
"time": pd.date_range(start="2020-01-01", periods=5),
}
)
output = info(table=table)
expected_output = (
"<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n"
)
assert output == expected_output
Comment on lines +58 to +62
Copy link
Member Author

@weiji14 weiji14 Sep 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this test currently fails, giving what looks like a UNIX timestamp rather than an ISO datetime output:

E       AssertionError: assert '<vector memo...1578182400>\n' == '<vector memo...5T00:00:00>\n'
E         - <vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>
E         + <vector memory>: N = 5 <10/15> <1577836800/1578182400>

Will need to investigate, but any idea why this is happening? Or is this expected behaviour?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GMT_Put_Vector started to accept datetime strings in PR GenericMappingTools/gmt#3396. In this implementation, all datetime strings are converted to double internally.

Looks like a GMT API bug to me.

Copy link
Member Author

@weiji14 weiji14 Sep 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the inputs are read correctly here, we can pass in datetime types using the GMT_DATETIME enum. The output is also the correct UNIX timestamp range, but I doubt it's something users would expect.

I suppose we can xfail this for 6.1.1, but would prefer to have this reported and/or fixed in GMT master first (i.e. for 6.2.0) Edit: issue opened at GenericMappingTools/gmt#4241. A workaround might be to use np.datetime64(1577836800, 's') to get numpy.datetime64('2020-01-01T00:00:00'), but this isn't a nice solution.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose we can xfail this for 6.1.1,

Yes to me.



@pytest.mark.xfail(
condition=gmt_version <= Version("6.1.1"),
reason="UNIX timestamp returned instead of ISO datetime, should work on GMT 6.2.0 "
"after https://github.com/GenericMappingTools/gmt/issues/4241 is resolved",
)
def test_info_xarray_dataset_time_column():
"Make sure info works on xarray.Dataset 1D inputs with a time column"
table = xr.Dataset(
coords={"index": [0, 1, 2, 3, 4]},
data_vars={
"z": ("index", [10, 13, 12, 15, 14]),
"time": ("index", pd.date_range(start="2020-01-01", periods=5)),
},
)
output = info(table=table)
expected_output = (
"<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n"
)
assert output == expected_output

Expand All @@ -43,15 +88,15 @@ def test_info_2d_array():
table = np.loadtxt(POINTS_DATA)
output = info(table=table)
expected_output = (
"<matrix memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
"<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
)
assert output == expected_output


def test_info_1d_array():
"Make sure info works on 1D numpy.ndarray inputs"
output = info(table=np.arange(20))
expected_output = "<matrix memory>: N = 20 <0/19>\n"
expected_output = "<vector memory>: N = 20 <0/19>\n"
assert output == expected_output


Expand Down