Skip to content

Commit 7eb93f2

Browse files
authored
clib.Session.virtualfile_from_vectors: Now takes a sequence of vectors as its single argument (Passing multiple arguments will be unsupported in v0.16.0) (#3522)
1 parent 0482394 commit 7eb93f2

File tree

2 files changed

+92
-58
lines changed

2 files changed

+92
-58
lines changed

pygmt/clib/session.py

Lines changed: 62 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,37 +1324,36 @@ def open_virtual_file(self, family, geometry, direction, data):
13241324
return self.open_virtualfile(family, geometry, direction, data)
13251325

13261326
@contextlib.contextmanager
1327-
def virtualfile_from_vectors(self, *vectors):
1327+
def virtualfile_from_vectors(
1328+
self, vectors: Sequence, *args
1329+
) -> Generator[str, None, None]:
13281330
"""
1329-
Store 1-D arrays as columns of a table inside a virtual file.
1331+
Store a sequence of 1-D vectors as columns of a dataset inside a virtual file.
13301332
1331-
Use the virtual file name to pass in the data in your vectors to a GMT
1332-
module.
1333+
Use the virtual file name to pass the dataset with your vectors to a GMT module.
13331334
1334-
Context manager (use in a ``with`` block). Yields the virtual file name
1335-
that you can pass as an argument to a GMT module call. Closes the
1336-
virtual file upon exit of the ``with`` block.
1335+
Context manager (use in a ``with`` block). Yields the virtual file name that you
1336+
can pass as an argument to a GMT module call. Closes the virtual file upon exit
1337+
of the ``with`` block.
13371338
1338-
Use this instead of creating the data container and virtual file by
1339-
hand with :meth:`pygmt.clib.Session.create_data`,
1340-
:meth:`pygmt.clib.Session.put_vector`, and
1341-
:meth:`pygmt.clib.Session.open_virtualfile`.
1339+
Use this instead of creating the data container and virtual file by hand with
1340+
:meth:`pygmt.clib.Session.create_data`, :meth:`pygmt.clib.Session.put_vector`,
1341+
and :meth:`pygmt.clib.Session.open_virtualfile`.
13421342
1343-
If the arrays are C contiguous blocks of memory, they will be passed
1344-
without copying to GMT. If they are not (e.g., they are columns of a
1345-
2-D array), they will need to be copied to a contiguous block.
1343+
If the arrays are C contiguous blocks of memory, they will be passed without
1344+
copying to GMT. If they are not (e.g., they are columns of a 2-D array), they
1345+
will need to be copied to a contiguous block.
13461346
13471347
Parameters
13481348
----------
1349-
vectors : 1-D arrays
1350-
The vectors that will be included in the array. All must be of the
1349+
vectors
1350+
A sequence of vectors that will be stored in the dataset. All must be of the
13511351
same size.
13521352
13531353
Yields
13541354
------
1355-
fname : str
1356-
The name of virtual file. Pass this as a file name argument to a
1357-
GMT module.
1355+
fname
1356+
The name of virtual file. Pass this as a file name argument to a GMT module.
13581357
13591358
Examples
13601359
--------
@@ -1366,34 +1365,49 @@ def virtualfile_from_vectors(self, *vectors):
13661365
>>> y = np.array([4, 5, 6])
13671366
>>> z = pd.Series([7, 8, 9])
13681367
>>> with Session() as ses:
1369-
... with ses.virtualfile_from_vectors(x, y, z) as fin:
1368+
... with ses.virtualfile_from_vectors((x, y, z)) as fin:
13701369
... # Send the output to a file so that we can read it
13711370
... with GMTTempFile() as fout:
13721371
... ses.call_module("info", [fin, f"->{fout.name}"])
13731372
... print(fout.read().strip())
13741373
<vector memory>: N = 3 <1/3> <4/6> <7/9>
13751374
"""
1376-
# Conversion to a C-contiguous array needs to be done here and not in
1377-
# put_vector or put_strings because we need to maintain a reference to
1378-
# the copy while it is being used by the C API. Otherwise, the array
1379-
# would be garbage collected and the memory freed. Creating it in this
1380-
# context manager guarantees that the copy will be around until the
1381-
# virtual file is closed. The conversion is implicit in
1375+
# "*args" is added in v0.14.0 for backward-compatibility with the deprecated
1376+
# syntax of passing multiple vectors as positional arguments.
1377+
# Remove it in v0.16.0.
1378+
if len(args) > 0:
1379+
msg = (
1380+
"Passing multiple arguments to Session.virtualfile_from_vectors is "
1381+
"deprecated since v0.14.0 and will be unsupported in v0.16.0. "
1382+
"Put all vectors in a sequence (a tuple or a list) instead and pass "
1383+
"the sequence as the single argument to this function. "
1384+
"E.g., use `with lib.virtualfile_from_vectors((x, y, z)) as vfile` "
1385+
"instead of `with lib.virtualfile_from_vectors(x, y, z) as vfile`."
1386+
)
1387+
warnings.warn(message=msg, category=FutureWarning, stacklevel=3)
1388+
vectors = (vectors, *args)
1389+
1390+
# Conversion to a C-contiguous array needs to be done here and not in put_vector
1391+
# or put_strings because we need to maintain a reference to the copy while it is
1392+
# being used by the C API. Otherwise, the array would be garbage collected and
1393+
# the memory freed. Creating it in this context manager guarantees that the copy
1394+
# will be around until the virtual file is closed. The conversion is implicit in
13821395
# vectors_to_arrays.
13831396
arrays = vectors_to_arrays(vectors)
13841397

13851398
columns = len(arrays)
1386-
# Find arrays that are of string dtype from column 3 onwards
1387-
# Assumes that first 2 columns contains coordinates like longitude
1388-
# latitude, or datetime string types.
1399+
# Find arrays that are of string dtype from column 3 onwards. Assumes that first
1400+
# 2 columns contains coordinates like longitude, latitude, or datetime string
1401+
# types.
13891402
for col, array in enumerate(arrays[2:]):
13901403
if pd.api.types.is_string_dtype(array.dtype):
13911404
columns = col + 2
13921405
break
13931406

13941407
rows = len(arrays[0])
13951408
if not all(len(i) == rows for i in arrays):
1396-
raise GMTInvalidInput("All arrays must have same size.")
1409+
msg = "All arrays must have same size."
1410+
raise GMTInvalidInput(msg)
13971411

13981412
family = "GMT_IS_DATASET|GMT_VIA_VECTOR"
13991413
geometry = "GMT_IS_POINT"
@@ -1406,8 +1420,8 @@ def virtualfile_from_vectors(self, *vectors):
14061420
for col, array in enumerate(arrays[:columns]):
14071421
self.put_vector(dataset, column=col, vector=array)
14081422

1409-
# Use put_strings for last column(s) with string type data
1410-
# Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings
1423+
# Use put_strings for last column(s) with string type data.
1424+
# Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings.
14111425
string_arrays = arrays[columns:]
14121426
if string_arrays:
14131427
if len(string_arrays) == 1:
@@ -1682,7 +1696,7 @@ def virtualfile_from_stringio(
16821696
seg.header = None
16831697
seg.text = None
16841698

1685-
def virtualfile_in( # noqa: PLR0912
1699+
def virtualfile_in(
16861700
self,
16871701
check_kind=None,
16881702
data=None,
@@ -1781,19 +1795,18 @@ def virtualfile_in( # noqa: PLR0912
17811795
"vectors": self.virtualfile_from_vectors,
17821796
}[kind]
17831797

1784-
# Ensure the data is an iterable (Python list or tuple).
1798+
# "_data" is the data that will be passed to the _virtualfile_from function.
1799+
# "_data" defaults to "data" but should be adjusted for some cases.
1800+
_data = data
17851801
match kind:
1786-
case "arg" | "file" | "geojson" | "grid" | "image" | "stringio":
1787-
_data = (data,)
1788-
if kind == "image" and data.dtype != "uint8":
1789-
msg = (
1790-
f"Input image has dtype: {data.dtype} which is unsupported, "
1791-
"and may result in an incorrect output. Please recast image "
1792-
"to a uint8 dtype and/or scale to 0-255 range, e.g. "
1793-
"using a histogram equalization function like "
1794-
"skimage.exposure.equalize_hist."
1795-
)
1796-
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
1802+
case "image" if data.dtype != "uint8":
1803+
msg = (
1804+
f"Input image has dtype: {data.dtype} which is unsupported, and "
1805+
"may result in an incorrect output. Please recast image to a uint8 "
1806+
"dtype and/or scale to 0-255 range, e.g. using a histogram "
1807+
"equalization function like skimage.exposure.equalize_hist."
1808+
)
1809+
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
17971810
case "empty": # data is None, so data must be given via x/y/z.
17981811
_data = [x, y]
17991812
if z is not None:
@@ -1808,19 +1821,17 @@ def virtualfile_in( # noqa: PLR0912
18081821
else:
18091822
# Python list, tuple, numpy.ndarray, and pandas.Series types
18101823
_data = np.atleast_2d(np.asanyarray(data).T)
1811-
case "matrix":
1824+
case "matrix" if data.dtype.kind not in "iuf":
18121825
# GMT can only accept a 2-D matrix which are signed integer (i),
18131826
# unsigned integer (u) or floating point (f) types. For other data
18141827
# types, we need to use virtualfile_from_vectors instead, which turns
18151828
# the matrix into a list of vectors and allows for better handling of
18161829
# non-integer/float type inputs (e.g. for string or datetime data types)
1817-
_data = (data,)
1818-
if data.dtype.kind not in "iuf":
1819-
_virtualfile_from = self.virtualfile_from_vectors
1820-
_data = data.T
1830+
_virtualfile_from = self.virtualfile_from_vectors
1831+
_data = data.T
18211832

18221833
# Finally create the virtualfile from the data, to be passed into GMT
1823-
file_context = _virtualfile_from(*_data)
1834+
file_context = _virtualfile_from(_data)
18241835
return file_context
18251836

18261837
def virtualfile_from_data(

pygmt/tests/test_clib_virtualfile_from_vectors.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_virtualfile_from_vectors(dtypes):
4444
y = np.arange(size, size * 2, 1, dtype=dtype)
4545
z = np.arange(size * 2, size * 3, 1, dtype=dtype)
4646
with clib.Session() as lib:
47-
with lib.virtualfile_from_vectors(x, y, z) as vfile:
47+
with lib.virtualfile_from_vectors((x, y, z)) as vfile:
4848
with GMTTempFile() as outfile:
4949
lib.call_module("info", [vfile, f"->{outfile.name}"])
5050
output = outfile.read(keep_tabs=True)
@@ -64,7 +64,7 @@ def test_virtualfile_from_vectors_one_string_or_object_column(dtype):
6464
y = np.arange(size, size * 2, 1, dtype=np.int32)
6565
strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype)
6666
with clib.Session() as lib:
67-
with lib.virtualfile_from_vectors(x, y, strings) as vfile:
67+
with lib.virtualfile_from_vectors((x, y, strings)) as vfile:
6868
with GMTTempFile() as outfile:
6969
lib.call_module("convert", [vfile, f"->{outfile.name}"])
7070
output = outfile.read(keep_tabs=True)
@@ -86,7 +86,7 @@ def test_virtualfile_from_vectors_two_string_or_object_columns(dtype):
8686
strings1 = np.array(["a", "bc", "def", "ghij", "klmnolooong"], dtype=dtype)
8787
strings2 = np.array(["pqrst", "uvwx", "yz!", "@#", "$"], dtype=dtype)
8888
with clib.Session() as lib:
89-
with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile:
89+
with lib.virtualfile_from_vectors((x, y, strings1, strings2)) as vfile:
9090
with GMTTempFile() as outfile:
9191
lib.call_module("convert", [vfile, f"->{outfile.name}"])
9292
output = outfile.read(keep_tabs=True)
@@ -105,7 +105,7 @@ def test_virtualfile_from_vectors_transpose(dtypes):
105105
for dtype in dtypes:
106106
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
107107
with clib.Session() as lib:
108-
with lib.virtualfile_from_vectors(*data.T) as vfile:
108+
with lib.virtualfile_from_vectors(data.T) as vfile:
109109
with GMTTempFile() as outfile:
110110
lib.call_module("info", [vfile, "-C", f"->{outfile.name}"])
111111
output = outfile.read(keep_tabs=True)
@@ -122,7 +122,7 @@ def test_virtualfile_from_vectors_diff_size():
122122
y = np.arange(6)
123123
with clib.Session() as lib:
124124
with pytest.raises(GMTInvalidInput):
125-
with lib.virtualfile_from_vectors(x, y):
125+
with lib.virtualfile_from_vectors((x, y)):
126126
pass
127127

128128

@@ -143,7 +143,7 @@ def test_virtualfile_from_vectors_pandas(dtypes_pandas):
143143
dtype=dtype,
144144
)
145145
with clib.Session() as lib:
146-
with lib.virtualfile_from_vectors(data.x, data.y, data.z) as vfile:
146+
with lib.virtualfile_from_vectors((data.x, data.y, data.z)) as vfile:
147147
with GMTTempFile() as outfile:
148148
lib.call_module("info", [vfile, f"->{outfile.name}"])
149149
output = outfile.read(keep_tabs=True)
@@ -163,10 +163,33 @@ def test_virtualfile_from_vectors_arraylike():
163163
y = tuple(range(size, size * 2, 1))
164164
z = range(size * 2, size * 3, 1)
165165
with clib.Session() as lib:
166-
with lib.virtualfile_from_vectors(x, y, z) as vfile:
166+
with lib.virtualfile_from_vectors((x, y, z)) as vfile:
167167
with GMTTempFile() as outfile:
168168
lib.call_module("info", [vfile, f"->{outfile.name}"])
169169
output = outfile.read(keep_tabs=True)
170170
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
171171
expected = f"<vector memory>: N = {size}\t{bounds}\n"
172172
assert output == expected
173+
174+
175+
def test_virtualfile_from_vectors_args():
176+
"""
177+
Test the backward compatibility of the deprecated syntax for passing multiple
178+
vectors.
179+
180+
This test is the same as test_virtualfile_from_vectors_arraylike, but using the
181+
old syntax.
182+
"""
183+
size = 13
184+
x = list(range(0, size, 1))
185+
y = tuple(range(size, size * 2, 1))
186+
z = range(size * 2, size * 3, 1)
187+
with pytest.warns(FutureWarning, match="virtualfile_from_vectors"):
188+
with clib.Session() as lib:
189+
with lib.virtualfile_from_vectors(x, y, z) as vfile:
190+
with GMTTempFile() as outfile:
191+
lib.call_module("info", [vfile, f"->{outfile.name}"])
192+
output = outfile.read(keep_tabs=True)
193+
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
194+
expected = f"<vector memory>: N = {size}\t{bounds}\n"
195+
assert output == expected

0 commit comments

Comments
 (0)