Skip to content

Commit 0cdb3cb

Browse files
committed
pygmt.select: Improve performance by storing output in virtual files
1 parent 3a507a8 commit 0cdb3cb

File tree

1 file changed

+37
-31
lines changed

1 file changed

+37
-31
lines changed

pygmt/src/select.py

+37-31
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
select - Select data table subsets based on multiple spatial criteria.
33
"""
44

5+
from typing import Literal
6+
7+
import numpy as np
58
import pandas as pd
69
from pygmt.clib import Session
710
from pygmt.helpers import (
8-
GMTTempFile,
911
build_arg_string,
1012
fmt_docstring,
1113
kwargs_to_strings,
1214
use_alias,
15+
validate_output_table_type,
1316
)
1417

1518
__doctest_skip__ = ["select"]
@@ -41,7 +44,12 @@
4144
w="wrap",
4245
)
4346
@kwargs_to_strings(M="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
44-
def select(data=None, outfile=None, **kwargs):
47+
def select(
48+
data=None,
49+
output_type: Literal["pandas", "numpy", "file"] = "pandas",
50+
outfile: str | None = None,
51+
**kwargs,
52+
) -> pd.DataFrame | np.ndarray | None:
4553
r"""
4654
Select data table subsets based on multiple spatial criteria.
4755
@@ -70,8 +78,8 @@ def select(data=None, outfile=None, **kwargs):
7078
data : str, {table-like}
7179
Pass in either a file name to an ASCII data table, a 2-D
7280
{table-classes}.
73-
outfile : str
74-
The file name for the output ASCII file.
81+
{output_type}
82+
{outfile}
7583
{area_thresh}
7684
dist2pt : str
7785
*pointfile*\|\ *lon*/*lat*\ **+d**\ *dist*.
@@ -180,12 +188,12 @@ def select(data=None, outfile=None, **kwargs):
180188
181189
Returns
182190
-------
183-
output : pandas.DataFrame or None
184-
Return type depends on whether the ``outfile`` parameter is set:
191+
ret
192+
Return type depends on ``outfile`` and ``output_type``:
185193
186-
- :class:`pandas.DataFrame` table if ``outfile`` is not set.
187-
- None if ``outfile`` is set (filtered output will be stored in file
188-
set by ``outfile``).
194+
- None if ``outfile`` is set (output will be stored in file set by ``outfile``)
195+
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
196+
(depends on ``output_type``)
189197
190198
Example
191199
-------
@@ -196,25 +204,23 @@ def select(data=None, outfile=None, **kwargs):
196204
>>> # longitudes 246 and 247 and latitudes 20 and 21
197205
>>> out = pygmt.select(data=ship_data, region=[246, 247, 20, 21])
198206
"""
199-
200-
with GMTTempFile(suffix=".csv") as tmpfile:
201-
with Session() as lib:
202-
with lib.virtualfile_in(check_kind="vector", data=data) as vintbl:
203-
if outfile is None:
204-
outfile = tmpfile.name
205-
lib.call_module(
206-
module="select",
207-
args=build_arg_string(kwargs, infile=vintbl, outfile=outfile),
208-
)
209-
210-
# Read temporary csv output to a pandas table
211-
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
212-
try:
213-
column_names = data.columns.to_list()
214-
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
215-
except AttributeError: # 'str' object has no attribute 'columns'
216-
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
217-
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
218-
result = None
219-
220-
return result
207+
output_type = validate_output_table_type(output_type, outfile=outfile)
208+
209+
column_names = None
210+
if output_type == "pandas" and isinstance(data, pd.DataFrame):
211+
column_names = data.columns.to_list()
212+
213+
with Session() as lib:
214+
with (
215+
lib.virtualfile_in(check_kind="vector", data=data) as vintbl,
216+
lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl,
217+
):
218+
lib.call_module(
219+
module="select",
220+
args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl),
221+
)
222+
return lib.virtualfile_to_dataset(
223+
output_type=output_type,
224+
vfile=vouttbl,
225+
column_names=column_names,
226+
)

0 commit comments

Comments
 (0)