Skip to content

Commit 70fc9e4

Browse files
committed
Refactor the data_kind and the virtualfile_to_data functions
1 parent b7b11c5 commit 70fc9e4

File tree

4 files changed

+99
-105
lines changed

4 files changed

+99
-105
lines changed

pygmt/clib/session.py

+16-19
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
fmt_docstring,
3333
tempfile_from_geojson,
3434
tempfile_from_image,
35+
validate_data_input,
3536
)
3637

3738
FAMILIES = [
@@ -1474,11 +1475,8 @@ def virtualfile_from_data(
14741475
self,
14751476
check_kind=None,
14761477
data=None,
1477-
x=None,
1478-
y=None,
1479-
z=None,
1480-
extra_arrays=None,
1481-
required_z=False,
1478+
vectors=None,
1479+
ncols=1,
14821480
required_data=True,
14831481
):
14841482
"""
@@ -1497,13 +1495,11 @@ def virtualfile_from_data(
14971495
Any raster or vector data format. This could be a file name or
14981496
path, a raster grid, a vector matrix/arrays, or other supported
14991497
data input.
1500-
x/y/z : 1-D arrays or None
1501-
x, y, and z columns as numpy arrays.
1502-
extra_arrays : list of 1-D arrays
1503-
Optional. A list of numpy arrays in addition to x, y, and z.
1504-
All of these arrays must be of the same size as the x/y/z arrays.
1505-
required_z : bool
1506-
State whether the 'z' column is required.
1498+
vectors : list of 1-D arrays or None
1499+
A list of 1-D arrays. Each array will be a column in the table.
1500+
All of these arrays must be of the same size.
1501+
ncols : int
1502+
The minimum number of columns required for the data.
15071503
required_data : bool
15081504
Set to True when 'data' is required, or False when dealing with
15091505
optional virtual files. [Default is True].
@@ -1537,8 +1533,13 @@ def virtualfile_from_data(
15371533
...
15381534
<vector memory>: N = 3 <7/9> <4/6> <1/3>
15391535
"""
1540-
kind = data_kind(
1541-
data, x, y, z, required_z=required_z, required_data=required_data
1536+
kind = data_kind(data, required=required_data)
1537+
validate_data_input(
1538+
data=data,
1539+
vectors=vectors,
1540+
ncols=ncols,
1541+
required_data=required_data,
1542+
kind=kind,
15421543
)
15431544

15441545
if check_kind:
@@ -1579,11 +1580,7 @@ def virtualfile_from_data(
15791580
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
15801581
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
15811582
elif kind == "vectors":
1582-
_data = [np.atleast_1d(x), np.atleast_1d(y)]
1583-
if z is not None:
1584-
_data.append(np.atleast_1d(z))
1585-
if extra_arrays:
1586-
_data.extend(extra_arrays)
1583+
_data = [np.atleast_1d(v) for v in vectors]
15871584
elif kind == "matrix": # turn 2-D arrays into list of vectors
15881585
try:
15891586
# pandas.Series will be handled below like a 1-D numpy.ndarray

pygmt/helpers/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@
2020
is_nonstr_iter,
2121
launch_external_viewer,
2222
non_ascii_to_octal,
23+
validate_data_input,
2324
)

pygmt/helpers/utils.py

+81-85
Original file line numberDiff line numberDiff line change
@@ -15,127 +15,133 @@
1515
from pygmt.exceptions import GMTInvalidInput
1616

1717

18-
def _validate_data_input(
19-
data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None
18+
def validate_data_input(
19+
data=None, vectors=None, ncols=2, required_data=True, kind=None
2020
):
2121
"""
22-
Check if the combination of data/x/y/z is valid.
22+
Check if the data input is valid.
2323
2424
Examples
2525
--------
26-
>>> _validate_data_input(data="infile")
27-
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6])
28-
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9])
29-
>>> _validate_data_input(data=None, required_data=False)
30-
>>> _validate_data_input()
26+
>>> validate_data_input(data="infile")
27+
>>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6]], ncols=2)
28+
>>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ncols=3)
29+
>>> validate_data_input(data=None, required_data=False)
30+
>>> validate_data_input()
3131
Traceback (most recent call last):
3232
...
3333
pygmt.exceptions.GMTInvalidInput: No input data provided.
34-
>>> _validate_data_input(x=[1, 2, 3])
34+
>>> validate_data_input(vectors=[[1, 2, 3], None], ncols=2)
3535
Traceback (most recent call last):
3636
...
37-
pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
38-
>>> _validate_data_input(y=[4, 5, 6])
37+
pygmt.exceptions.GMTInvalidInput: The 'y' column can't be None.
38+
>>> validate_data_input(vectors=[None, [4, 5, 6]], ncols=2)
3939
Traceback (most recent call last):
4040
...
41-
pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
42-
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True)
41+
pygmt.exceptions.GMTInvalidInput: The 'x' column can't be None.
42+
>>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], None], ncols=3)
4343
Traceback (most recent call last):
4444
...
45-
pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z.
45+
pygmt.exceptions.GMTInvalidInput: The 'z' column can't be None.
4646
>>> import numpy as np
4747
>>> import pandas as pd
4848
>>> import xarray as xr
4949
>>> data = np.arange(8).reshape((4, 2))
50-
>>> _validate_data_input(data=data, required_z=True, kind="matrix")
50+
>>> validate_data_input(data=data, ncols=3, kind="matrix")
5151
Traceback (most recent call last):
5252
...
53-
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
54-
>>> _validate_data_input(
53+
pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
54+
>>> validate_data_input(
5555
... data=pd.DataFrame(data, columns=["x", "y"]),
56-
... required_z=True,
56+
... ncols=3,
5757
... kind="matrix",
5858
... )
5959
Traceback (most recent call last):
6060
...
61-
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
62-
>>> _validate_data_input(
61+
pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
62+
>>> validate_data_input(
6363
... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])),
64-
... required_z=True,
64+
... ncols=3,
6565
... kind="matrix",
6666
... )
6767
Traceback (most recent call last):
6868
...
69-
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
70-
>>> _validate_data_input(data="infile", x=[1, 2, 3])
69+
pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
70+
>>> validate_data_input(data="infile", vectors=[[1, 2, 3], None])
7171
Traceback (most recent call last):
7272
...
73-
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
74-
>>> _validate_data_input(data="infile", y=[4, 5, 6])
73+
pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
74+
>>> validate_data_input(data="infile", vectors=[None, [4, 5, 6]])
7575
Traceback (most recent call last):
7676
...
77-
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
78-
>>> _validate_data_input(data="infile", z=[7, 8, 9])
77+
pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
78+
>>> validate_data_input(data="infile", vectors=[None, None, [7, 8, 9]])
7979
Traceback (most recent call last):
8080
...
81-
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
81+
pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
8282
8383
Raises
8484
------
8585
GMTInvalidInput
8686
If the data input is not valid.
8787
"""
88-
if data is None: # data is None
89-
if x is None and y is None: # both x and y are None
90-
if required_data: # data is not optional
91-
raise GMTInvalidInput("No input data provided.")
92-
elif x is None or y is None: # either x or y is None
93-
raise GMTInvalidInput("Must provide both x and y.")
94-
if required_z and z is None: # both x and y are not None, now check z
95-
raise GMTInvalidInput("Must provide x, y, and z.")
96-
else: # data is not None
97-
if x is not None or y is not None or z is not None:
98-
raise GMTInvalidInput("Too much data. Use either data or x/y/z.")
99-
# For 'matrix' kind, check if data has the required z column
100-
if kind == "matrix" and required_z:
88+
if kind is None:
89+
kind = data_kind(data=data, required=required_data)
90+
91+
if kind == "vectors": # From data_kind, we know that data is None
92+
if vectors is None:
93+
raise GMTInvalidInput("No input data provided.")
94+
if len(vectors) < ncols:
95+
raise GMTInvalidInput(
96+
f"Requires {ncols} 1-D arrays but got {len(vectors)}."
97+
)
98+
for i, v in enumerate(vectors[:ncols]):
99+
if v is None:
100+
if i < 3:
101+
msg = f"The '{'xyz'[i]}' column can't be None."
102+
else:
103+
msg = "Column {i} can't be None."
104+
raise GMTInvalidInput(msg)
105+
else:
106+
if vectors is not None and any(v is not None for v in vectors):
107+
raise GMTInvalidInput("Too much data. Pass in either 'data' or 1-D arrays.")
108+
if kind == "matrix": # check number of columns for matrix-like data
101109
if hasattr(data, "shape"): # np.ndarray or pd.DataFrame
102-
if len(data.shape) == 1 and data.shape[0] < 3:
103-
raise GMTInvalidInput("data must provide x, y, and z columns.")
104-
if len(data.shape) > 1 and data.shape[1] < 3:
105-
raise GMTInvalidInput("data must provide x, y, and z columns.")
106-
if hasattr(data, "data_vars") and len(data.data_vars) < 3: # xr.Dataset
107-
raise GMTInvalidInput("data must provide x, y, and z columns.")
110+
if len(data.shape) == 1 and data.shape[0] < ncols:
111+
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
112+
if len(data.shape) > 1 and data.shape[1] < ncols:
113+
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
114+
if hasattr(data, "data_vars") and len(data.data_vars) < ncols: # xr.Dataset
115+
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
108116

109117

110-
def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data=True):
118+
def data_kind(data=None, required=True):
111119
"""
112-
Check what kind of data is provided to a module.
120+
Determine the kind of data that will be passed to a module.
113121
114-
Possible types:
122+
It checks the type of the ``data`` argument and determines the kind of
123+
data. Falls back to ``"vectors"`` if ``data`` is None but required.
115124
116-
* a file name provided as 'data'
117-
* a pathlib.PurePath object provided as 'data'
118-
* an xarray.DataArray object provided as 'data'
119-
* a 2-D matrix provided as 'data'
120-
* 1-D arrays x and y (and z, optionally)
121-
* an optional argument (None, bool, int or float) provided as 'data'
125+
Possible data kinds:
122126
123-
Arguments should be ``None`` if not used. If doesn't fit any of these
124-
categories (or fits more than one), will raise an exception.
127+
- ``'file'``: a file name or a pathlib.PurePath object providfed as 'data'
128+
- ``'arg'``: an optional argument (None, bool, int or float) provided
129+
as 'data'
130+
- ``'grid'``: an xarray.DataArray with 2 dimensions provided as 'data'
131+
- ``'image'``: an xarray.DataArray with 3 dimensions provided as 'data'
132+
- ``'geojson'``: a geo-like Python object that implements
133+
``__geo_interface__`` (geopandas.GeoDataFrame or shapely.geometry)
134+
provided as 'data'
135+
- ``'matrix'``: a 2-D array provided as 'data'
136+
- ``'vectors'``: a list of 1-D arrays provided as 'vectors'
125137
126138
Parameters
127139
----------
128140
data : str, pathlib.PurePath, None, bool, xarray.DataArray or {table-like}
129141
Pass in either a file name or :class:`pathlib.Path` to an ASCII data
130142
table, an :class:`xarray.DataArray`, a 1-D/2-D
131143
{table-classes} or an option argument.
132-
x/y : 1-D arrays or None
133-
x and y columns as numpy arrays.
134-
z : 1-D array or None
135-
z column as numpy array. To be used optionally when x and y are given.
136-
required_z : bool
137-
State whether the 'z' column is required.
138-
required_data : bool
144+
required : bool
139145
Set to True when 'data' is required, or False when dealing with
140146
optional virtual files. [Default is True].
141147
@@ -151,49 +157,39 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
151157
>>> import numpy as np
152158
>>> import xarray as xr
153159
>>> import pathlib
154-
>>> data_kind(data=None, x=np.array([1, 2, 3]), y=np.array([4, 5, 6]))
160+
>>> data_kind(data=None)
155161
'vectors'
156-
>>> data_kind(data=np.arange(10).reshape((5, 2)), x=None, y=None)
162+
>>> data_kind(data=np.arange(10).reshape((5, 2)))
157163
'matrix'
158-
>>> data_kind(data="my-data-file.txt", x=None, y=None)
164+
>>> data_kind(data="my-data-file.txt")
159165
'file'
160-
>>> data_kind(data=pathlib.Path("my-data-file.txt"), x=None, y=None)
166+
>>> data_kind(data=pathlib.Path("my-data-file.txt"))
161167
'file'
162-
>>> data_kind(data=None, x=None, y=None, required_data=False)
168+
>>> data_kind(data=None, required=False)
163169
'arg'
164-
>>> data_kind(data=2.0, x=None, y=None, required_data=False)
170+
>>> data_kind(data=2.0, required=False)
165171
'arg'
166-
>>> data_kind(data=True, x=None, y=None, required_data=False)
172+
>>> data_kind(data=True, required=False)
167173
'arg'
168174
>>> data_kind(data=xr.DataArray(np.random.rand(4, 3)))
169175
'grid'
170176
>>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5)))
171177
'image'
172178
"""
173-
# determine the data kind
174179
if isinstance(data, (str, pathlib.PurePath)):
175180
kind = "file"
176-
elif isinstance(data, (bool, int, float)) or (data is None and not required_data):
181+
elif isinstance(data, (bool, int, float)) or (data is None and not required):
177182
kind = "arg"
178183
elif isinstance(data, xr.DataArray):
179184
kind = "image" if len(data.dims) == 3 else "grid"
180185
elif hasattr(data, "__geo_interface__"):
181186
# geo-like Python object that implements ``__geo_interface__``
182187
# (geopandas.GeoDataFrame or shapely.geometry)
183188
kind = "geojson"
184-
elif data is not None:
189+
elif data is not None: # anything but None is taken as a matrix
185190
kind = "matrix"
186-
else:
191+
else: # fallback to vectors if data is None but required
187192
kind = "vectors"
188-
_validate_data_input(
189-
data=data,
190-
x=x,
191-
y=y,
192-
z=z,
193-
required_z=required_z,
194-
required_data=required_data,
195-
kind=kind,
196-
)
197193
return kind
198194

199195

pygmt/src/contour.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def contour(self, data=None, x=None, y=None, z=None, **kwargs):
116116

117117
with Session() as lib:
118118
file_context = lib.virtualfile_from_data(
119-
check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
119+
check_kind="vector", data=data, vectors=[x, y, z], ncols=3
120120
)
121121
with file_context as fname:
122122
lib.call_module(

0 commit comments

Comments
 (0)