Skip to content

Commit 83b1a12

Browse files
authored
pygmt.select: Improve performance by storing output in virtual files (#3108)
1 parent fa3e0e7 commit 83b1a12

File tree

1 file changed

+38
-31
lines changed

1 file changed

+38
-31
lines changed

pygmt/src/select.py

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
select - Select data table subsets based on multiple spatial criteria.
33
"""
44

5+
from typing import Literal
6+
7+
import numpy as np
58
import pandas as pd
69
from pygmt.clib import Session
710
from pygmt.helpers import (
8-
GMTTempFile,
911
build_arg_string,
1012
fmt_docstring,
1113
kwargs_to_strings,
1214
use_alias,
15+
validate_output_table_type,
1316
)
1417

1518
__doctest_skip__ = ["select"]
@@ -41,7 +44,12 @@
4144
w="wrap",
4245
)
4346
@kwargs_to_strings(M="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
44-
def select(data=None, outfile=None, **kwargs):
47+
def select(
48+
data=None,
49+
output_type: Literal["pandas", "numpy", "file"] = "pandas",
50+
outfile: str | None = None,
51+
**kwargs,
52+
) -> pd.DataFrame | np.ndarray | None:
4553
r"""
4654
Select data table subsets based on multiple spatial criteria.
4755
@@ -70,8 +78,8 @@ def select(data=None, outfile=None, **kwargs):
7078
data : str, {table-like}
7179
Pass in either a file name to an ASCII data table, a 2-D
7280
{table-classes}.
73-
outfile : str
74-
The file name for the output ASCII file.
81+
{output_type}
82+
{outfile}
7583
{area_thresh}
7684
dist2pt : str
7785
*pointfile*\|\ *lon*/*lat*\ **+d**\ *dist*.
@@ -180,12 +188,13 @@ def select(data=None, outfile=None, **kwargs):
180188
181189
Returns
182190
-------
183-
output : pandas.DataFrame or None
184-
Return type depends on whether the ``outfile`` parameter is set:
191+
ret
192+
Return type depends on ``outfile`` and ``output_type``:
185193
186-
- :class:`pandas.DataFrame` table if ``outfile`` is not set.
187-
- None if ``outfile`` is set (filtered output will be stored in file
188-
set by ``outfile``).
194+
- ``None`` if ``outfile`` is set (output will be stored in file set by
195+
``outfile``)
196+
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
197+
(depends on ``output_type``)
189198
190199
Example
191200
-------
@@ -196,25 +205,23 @@ def select(data=None, outfile=None, **kwargs):
196205
>>> # longitudes 246 and 247 and latitudes 20 and 21
197206
>>> out = pygmt.select(data=ship_data, region=[246, 247, 20, 21])
198207
"""
199-
200-
with GMTTempFile(suffix=".csv") as tmpfile:
201-
with Session() as lib:
202-
with lib.virtualfile_in(check_kind="vector", data=data) as vintbl:
203-
if outfile is None:
204-
outfile = tmpfile.name
205-
lib.call_module(
206-
module="select",
207-
args=build_arg_string(kwargs, infile=vintbl, outfile=outfile),
208-
)
209-
210-
# Read temporary csv output to a pandas table
211-
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
212-
try:
213-
column_names = data.columns.to_list()
214-
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
215-
except AttributeError: # 'str' object has no attribute 'columns'
216-
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
217-
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
218-
result = None
219-
220-
return result
208+
output_type = validate_output_table_type(output_type, outfile=outfile)
209+
210+
column_names = None
211+
if output_type == "pandas" and isinstance(data, pd.DataFrame):
212+
column_names = data.columns.to_list()
213+
214+
with Session() as lib:
215+
with (
216+
lib.virtualfile_in(check_kind="vector", data=data) as vintbl,
217+
lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl,
218+
):
219+
lib.call_module(
220+
module="select",
221+
args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl),
222+
)
223+
return lib.virtualfile_to_dataset(
224+
output_type=output_type,
225+
vfname=vouttbl,
226+
column_names=column_names,
227+
)

0 commit comments

Comments
 (0)