Skip to content

clib.conversion._to_numpy: Add tests for PyArrow's timestamp type #3621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 28, 2024
54 changes: 54 additions & 0 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@

_HAS_PYARROW = True
except ImportError:

class pa: # noqa: N801
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pa.timestamp() with tz doesn't have string aliases, so we can't use strings like timestamp[s, UTC]. So we have to define a dummy function here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we avoid this dummy class by starting from a pandas.Timestamp instead of Python datetime? The timezone info can be carried over:

import pyarrow as pa
import pandas as pd
from zoneinfo import ZoneInfo

array = pa.array(
    [
        pd.Timestamp(2024, 1, 2, 3, 4, 5, tzinfo=ZoneInfo("America/New_York")),
        pd.Timestamp(2024, 1, 2, 3, 4, 6, tzinfo=ZoneInfo("America/New_York")),
    ]
)
print(array.type)
# timestamp[us, tz=America/New_York]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can, but I feel it will make the pytest parametrize more complicated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, let's keep this dummy class then.

"""
A dummy class to mimic pyarrow.
"""

@staticmethod
def timestamp(unit: str, tz: str | None = None):
"""
A dummy function to mimic pyarrow.timestamp.
"""

_HAS_PYARROW = False


Expand Down Expand Up @@ -244,6 +256,7 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_date(dtype, expected_dtype):
# - Date types:
# - date32[day]
# - date64[ms]
# - Timestamp types: timestamp[unit], timestamp[unit, tz]
#
# In PyArrow, array types can be specified in two ways:
#
Expand Down Expand Up @@ -366,3 +379,44 @@ def test_to_numpy_pyarrow_array_pyarrow_dtypes_date(dtype, expected_dtype):
result,
np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=expected_dtype),
)


@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
pytest.param(None, "datetime64[us]", id="None"),
pytest.param("timestamp[s]", "datetime64[s]", id="timestamp[s]"),
pytest.param("timestamp[ms]", "datetime64[ms]", id="timestamp[ms]"),
pytest.param("timestamp[us]", "datetime64[us]", id="timestamp[us]"),
pytest.param("timestamp[ns]", "datetime64[ns]", id="timestamp[ns]"),
pytest.param(
pa.timestamp("s", tz="UTC"), "datetime64[s]", id="timestamp[s, tz=UTC]"
), # pa.timestamp with tz has no string alias.
pytest.param(
pa.timestamp("s", tz="America/New_York"),
"datetime64[s]",
id="timestamp[s, tz=America/New_York]",
),
pytest.param(
pa.timestamp("s", tz="+07:30"),
"datetime64[s]",
id="timestamp[s, tz=+07:30]",
),
],
)
def test_to_numpy_pyarrow_timestamp(dtype, expected_dtype):
"""
Test the _to_numpy function with PyArrow arrays of PyArrow timestamp types.

pyarrow.timestamp(unit, tz=None) can accept units "s", "ms", "us", and "ns".

Reference: https://arrow.apache.org/docs/python/generated/pyarrow.timestamp.html
"""
data = [datetime(2024, 1, 2, 3, 4, 5), datetime(2024, 1, 2, 3, 4, 6)]
array = pa.array(data, type=dtype)
result = _to_numpy(array)
_check_result(result, np.datetime64)
assert result.dtype == expected_dtype
assert result[0] == np.datetime64("2024-01-02T03:04:05")
assert result[1] == np.datetime64("2024-01-02T03:04:06")