From ce2b8309f39115f4cc98ed67c099f75a7475a720 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 11 Nov 2024 22:57:34 +0800 Subject: [PATCH 1/5] clib.conversion._to_numpy: Add tests for PyArrow's timestamp type --- pygmt/tests/test_clib_to_numpy.py | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 4c95277f072..248536eb9cb 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -17,8 +17,10 @@ import pyarrow as pa _HAS_PYARROW = True + pa_timestamp = pa.timestamp except ImportError: _HAS_PYARROW = False + pa_timestamp = None def _check_result(result, expected_dtype): @@ -235,6 +237,7 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_date(dtype, expected_dtype): # - Date types: # - date32[day] # - date64[ms] +# - Datetime types: timestamp # # In PyArrow, array types can be specified in two ways: # @@ -357,3 +360,43 @@ def test_to_numpy_pyarrow_array_pyarrow_dtypes_date(dtype, expected_dtype): result, np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=expected_dtype), ) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed") +@pytest.mark.parametrize( + ("dtype", "expected_dtype"), + [ + pytest.param(None, "datetime64[us]", id="None"), + pytest.param("timestamp[s]", "datetime64[s]", id="timestamp[s]"), + pytest.param("timestamp[ms]", "datetime64[ms]", id="timestamp[ms]"), + pytest.param("timestamp[us]", "datetime64[us]", id="timestamp[us]"), + pytest.param("timestamp[ns]", "datetime64[ns]", id="timestamp[ns]"), + pytest.param( + pa_timestamp("s", tz="UTC"), "datetime64[s]", id="timestamp[s, tz=UTC]" + ), # pa.timestamp with tz has no string alias. + pytest.param( + pa_timestamp("s", tz="America/New_York"), + "datetime64[s]", + id="timestamp[s, tz=America/New_York]", + ), + pytest.param( + pa_timestamp("s", tz="+07:30"), + "datetime64[s]", + id="timestamp[s, tz=+07:30]", + ), + ], +) +def test_to_numpy_pyarrow_timestamp(dtype, expected_dtype): + """ + Test the _to_numpy function with PyArrow arrays of PyArrow datetime types. + + pyarrow.timestamp(unit, tz=None) can accept units "s", "ms", "us", and "ns". + + Reference: https://arrow.apache.org/docs/python/generated/pyarrow.timestamp.html + """ + data = [datetime(2024, 1, 2, 3, 4, 5), datetime(2024, 1, 2, 3, 4, 6)] + array = pa.array(data, type=dtype) + result = _to_numpy(array) + _check_result(result, np.datetime64) + assert result.dtype == expected_dtype + npt.assert_array_equal(result, array) From 214f060a8c71ad9597b2dec3223033e97dfbee59 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 16 Nov 2024 15:37:54 +0800 Subject: [PATCH 2/5] Add a dummy function to mimin pyarrow --- pygmt/tests/test_clib_to_numpy.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 248536eb9cb..17540a449df 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -17,10 +17,20 @@ import pyarrow as pa _HAS_PYARROW = True - pa_timestamp = pa.timestamp except ImportError: + + class pa: # noqa: N801 + """ + A dummy class to mimic pyarrow. + """ + + @staticmethod + def timestamp(*args, **kwargs): + """ + A dummy function to mimic pyarrow.timestamp. + """ + _HAS_PYARROW = False - pa_timestamp = None def _check_result(result, expected_dtype): @@ -372,15 +382,15 @@ def test_to_numpy_pyarrow_array_pyarrow_dtypes_date(dtype, expected_dtype): pytest.param("timestamp[us]", "datetime64[us]", id="timestamp[us]"), pytest.param("timestamp[ns]", "datetime64[ns]", id="timestamp[ns]"), pytest.param( - pa_timestamp("s", tz="UTC"), "datetime64[s]", id="timestamp[s, tz=UTC]" + pa.timestamp("s", tz="UTC"), "datetime64[s]", id="timestamp[s, tz=UTC]" ), # pa.timestamp with tz has no string alias. pytest.param( - pa_timestamp("s", tz="America/New_York"), + pa.timestamp("s", tz="America/New_York"), "datetime64[s]", id="timestamp[s, tz=America/New_York]", ), pytest.param( - pa_timestamp("s", tz="+07:30"), + pa.timestamp("s", tz="+07:30"), "datetime64[s]", id="timestamp[s, tz=+07:30]", ), From c6224385d916245180dee196b50e71ee00fcafef Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 28 Nov 2024 15:44:35 +0800 Subject: [PATCH 3/5] Explicitly check each array element since np.assert_array_equal needs to do conversion --- pygmt/tests/test_clib_to_numpy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 4c0d5da6b41..b3ae1a05925 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -418,4 +418,5 @@ def test_to_numpy_pyarrow_timestamp(dtype, expected_dtype): result = _to_numpy(array) _check_result(result, np.datetime64) assert result.dtype == expected_dtype - npt.assert_array_equal(result, array) + assert result[0] == np.datetime64("2024-01-02T03:04:05") + assert result[1] == np.datetime64("2024-01-02T03:04:06") From 7bc1f6fdfbad4e51b2f1813630f6b20a5b0cf1a8 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 28 Nov 2024 17:41:56 +0800 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/tests/test_clib_to_numpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index b3ae1a05925..6604fd17f4d 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -25,7 +25,7 @@ class pa: # noqa: N801 """ @staticmethod - def timestamp(*args, **kwargs): + def timestamp(unit: str, tz: str | None = None): """ A dummy function to mimic pyarrow.timestamp. """ @@ -407,7 +407,7 @@ def test_to_numpy_pyarrow_array_pyarrow_dtypes_date(dtype, expected_dtype): ) def test_to_numpy_pyarrow_timestamp(dtype, expected_dtype): """ - Test the _to_numpy function with PyArrow arrays of PyArrow datetime types. + Test the _to_numpy function with PyArrow arrays of PyArrow timestamp types. pyarrow.timestamp(unit, tz=None) can accept units "s", "ms", "us", and "ns". From 5c2eb46139b9c6cf08e141106db170e63c101c65 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 28 Nov 2024 17:47:40 +0800 Subject: [PATCH 5/5] Improve comments about timestamp types --- pygmt/tests/test_clib_to_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_clib_to_numpy.py b/pygmt/tests/test_clib_to_numpy.py index 6604fd17f4d..d67381b8045 100644 --- a/pygmt/tests/test_clib_to_numpy.py +++ b/pygmt/tests/test_clib_to_numpy.py @@ -256,7 +256,7 @@ def test_to_numpy_pandas_series_pyarrow_dtypes_date(dtype, expected_dtype): # - Date types: # - date32[day] # - date64[ms] -# - Datetime types: timestamp +# - Timestamp types: timestamp[unit], timestamp[unit, tz] # # In PyArrow, array types can be specified in two ways: #