Skip to content

Commit 25914d8

Browse files
weiji14seisman
andauthored
pyarrow: Check compatibility of pyarrow-backed pandas objects with numeric dtypes (#2774)
* Ensure that pyarrow backed pandas.Series can be read Install pyarrow as an optional dependency, and check that pandas.Series objects backed by pyarrow dtypes (e.g. 'uint8[pyarrow]') can be read by virtualfile_from_vectors. * Ensure that pygmt.info can work with pyarrow int64/float64 dtypes Check that pandas.Series and pandas.DataFrame objects backed by pyarrow dtypes (e.g. 'int64[pyarrow]' and 'float64[pyarrow]') can be read by pygmt.info. * Add xfail test for test_geopandas_plot_int_dtypes casting to pyarrow int Geopandas doesn't support casting to pyarrow dtypes like 'int32[pyarrow]' and 'int64[pyarrow]' yet, but adding an xfail test so that we don't forget to test in the future. * Clarify reason for test_geopandas_plot_int_dtypes xfail Actually, casting to pyarrow integer dtypes work, but writing to the temporary OGR_GMT file doesn't. * Add optional pyarrow dependency to ci_test_dev and ci_tests_legacy Ensure that previous and future versions of GMT are compatible with PyArrow too. * Add note about support of PyArrow dtypes to doc/install.rst Mention that PyGMT does have some initial support of Pandas objects backed by PyArrow-dtype arrays, but only uint/int/float dtypes for now. * Use importlib.util.find_spec instead of try-except block Cleaner way to check if pyarrow is installed or not. --------- Co-authored-by: Dongdong Tian <[email protected]>
1 parent 66d4d5c commit 25914d8

File tree

7 files changed

+57
-10
lines changed

7 files changed

+57
-10
lines changed

.github/workflows/ci_tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
optional-packages: ''
7272
- python-version: '3.12'
7373
numpy-version: '1.26'
74-
optional-packages: ' contextily geopandas ipython rioxarray sphinx-gallery'
74+
optional-packages: ' contextily geopandas ipython pyarrow rioxarray sphinx-gallery'
7575

7676
timeout-minutes: 30
7777
defaults:

.github/workflows/ci_tests_dev.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ jobs:
153153
python -m pip install --pre --prefer-binary \
154154
--extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
155155
numpy pandas xarray netCDF4 packaging \
156-
build contextily dvc geopandas ipython rioxarray \
156+
build contextily dvc geopandas ipython pyarrow rioxarray \
157157
'pytest>=6.0' pytest-cov pytest-doctestplus pytest-mpl \
158158
sphinx-gallery
159159

.github/workflows/ci_tests_legacy.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ jobs:
7272
contextily
7373
geopandas
7474
ipython
75+
pyarrow
7576
rioxarray
7677
sphinx-gallery
7778
build

doc/install.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,15 @@ The following are optional dependencies:
107107
* `GeoPandas <https://geopandas.org>`__: For using and plotting GeoDataFrame objects.
108108
* `RioXarray <https://corteva.github.io/rioxarray>`__: For saving multi-band rasters to GeoTIFFs.
109109

110+
.. note::
111+
112+
If you have `PyArrow <https://arrow.apache.org/docs/python/index.html>`__
113+
installed, PyGMT does have some initial support for ``pandas.Series`` and
114+
``pandas.DataFrame`` objects with Apache Arrow-backed arrays. Specifically,
115+
only uint/int/float dtypes are supported for now. Support for datetime and
116+
string Arrow dtypes are still working in progress. For more details, see
117+
`issue #2800 <https://github.com/GenericMappingTools/pygmt/issues/2800>`__.
118+
110119
Installing GMT and other dependencies
111120
-------------------------------------
112121

pygmt/tests/test_clib_virtualfiles.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Test the C API functions related to virtual files.
33
"""
44
import os
5+
from importlib.util import find_spec
56
from itertools import product
67

78
import numpy as np
@@ -321,16 +322,21 @@ def test_virtualfile_from_matrix_slice(dtypes):
321322

322323
def test_virtualfile_from_vectors_pandas(dtypes):
323324
"""
324-
Pass vectors to a dataset using pandas Series.
325+
Pass vectors to a dataset using pandas.Series, checking both numpy and
326+
pyarrow dtypes.
325327
"""
326328
size = 13
329+
if find_spec("pyarrow") is not None:
330+
dtypes.extend([f"{dtype}[pyarrow]" for dtype in dtypes])
331+
327332
for dtype in dtypes:
328333
data = pd.DataFrame(
329334
data={
330-
"x": np.arange(size, dtype=dtype),
331-
"y": np.arange(size, size * 2, 1, dtype=dtype),
332-
"z": np.arange(size * 2, size * 3, 1, dtype=dtype),
333-
}
335+
"x": np.arange(size),
336+
"y": np.arange(size, size * 2, 1),
337+
"z": np.arange(size * 2, size * 3, 1),
338+
},
339+
dtype=dtype,
334340
)
335341
with clib.Session() as lib:
336342
with lib.virtualfile_from_vectors(data.x, data.y, data.z) as vfile:

pygmt/tests/test_geopandas.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66
import pytest
77
from pygmt import Figure, info, makecpt, which
8+
from pygmt.helpers.testing import skip_if_no
89

910
gpd = pytest.importorskip("geopandas")
1011
shapely = pytest.importorskip("shapely")
@@ -161,6 +162,24 @@ def test_geopandas_plot3d_non_default_circle():
161162
"int64",
162163
pd.Int32Dtype(),
163164
pd.Int64Dtype(),
165+
pytest.param(
166+
"int32[pyarrow]",
167+
marks=[
168+
skip_if_no(package="pyarrow"),
169+
pytest.mark.xfail(
170+
reason="geopandas doesn't support writing columns with pyarrow dtypes to OGR_GMT yet."
171+
),
172+
],
173+
),
174+
pytest.param(
175+
"int64[pyarrow]",
176+
marks=[
177+
skip_if_no(package="pyarrow"),
178+
pytest.mark.xfail(
179+
reason="geopandas doesn't support writing columns with pyarrow dtypes to OGR_GMT yet."
180+
),
181+
],
182+
),
164183
],
165184
)
166185
@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")

pygmt/tests/test_info.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import xarray as xr
1313
from pygmt import info
1414
from pygmt.exceptions import GMTInvalidInput
15+
from pygmt.helpers.testing import skip_if_no
1516

1617
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
1718
POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt")
@@ -74,16 +75,27 @@ def test_info_2d_list():
7475
assert output == expected_output
7576

7677

77-
def test_info_series():
78+
@pytest.mark.parametrize(
79+
"dtype",
80+
["int64", pytest.param("int64[pyarrow]", marks=skip_if_no(package="pyarrow"))],
81+
)
82+
def test_info_series(dtype):
7883
"""
7984
Make sure info works on a pandas.Series input.
8085
"""
81-
output = info(pd.Series(data=[0, 4, 2, 8, 6]))
86+
output = info(pd.Series(data=[0, 4, 2, 8, 6], dtype=dtype))
8287
expected_output = "<vector memory>: N = 5 <0/8>\n"
8388
assert output == expected_output
8489

8590

86-
def test_info_dataframe():
91+
@pytest.mark.parametrize(
92+
"dtype",
93+
[
94+
"float64",
95+
pytest.param("float64[pyarrow]", marks=skip_if_no(package="pyarrow")),
96+
],
97+
)
98+
def test_info_dataframe(dtype):
8799
"""
88100
Make sure info works on pandas.DataFrame inputs.
89101
"""

0 commit comments

Comments
 (0)