Skip to content

Commit 0b46aad

Browse files
seismanweiji14
andauthored
clib: Add virtualfile_to_dataset method for converting virtualfile to a dataset (#3083)
Co-authored-by: Wei Ji <[email protected]>
1 parent 28e3513 commit 0b46aad

File tree

2 files changed

+124
-2
lines changed

2 files changed

+124
-2
lines changed

doc/api/index.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,8 @@ the :meth:`~pygmt.clib.Session.call_module` method:
283283

284284
Passing memory blocks between Python data objects (e.g. :class:`numpy.ndarray`,
285285
:class:`pandas.Series`, :class:`xarray.DataArray`, etc) and GMT happens through
286-
*virtual files*. These methods are context managers that automate the
287-
conversion of Python variables to GMT virtual files:
286+
*virtual files*. These methods are context managers that automate the conversion of
287+
Python objects to and from GMT virtual files:
288288

289289
.. autosummary::
290290
:toctree: generated
@@ -294,6 +294,7 @@ conversion of Python variables to GMT virtual files:
294294
clib.Session.virtualfile_from_grid
295295
clib.Session.virtualfile_in
296296
clib.Session.virtualfile_out
297+
clib.Session.virtualfile_to_dataset
297298

298299
Low level access (these are mostly used by the :mod:`pygmt.clib` package):
299300

pygmt/clib/session.py

+121
Original file line numberDiff line numberDiff line change
@@ -1738,6 +1738,127 @@ def read_virtualfile(
17381738
dtype = {"dataset": _GMT_DATASET, "grid": _GMT_GRID}[kind]
17391739
return ctp.cast(pointer, ctp.POINTER(dtype))
17401740

1741+
def virtualfile_to_dataset(
1742+
self,
1743+
output_type: Literal["pandas", "numpy", "file"],
1744+
vfname: str,
1745+
column_names: list[str] | None = None,
1746+
) -> pd.DataFrame | np.ndarray | None:
1747+
"""
1748+
Output a tabular dataset stored in a virtual file to a different format.
1749+
1750+
The format of the dataset is determined by the ``output_type`` parameter.
1751+
1752+
Parameters
1753+
----------
1754+
output_type
1755+
Desired output type of the result data.
1756+
1757+
- ``"pandas"`` will return a :class:`pandas.DataFrame` object.
1758+
- ``"numpy"`` will return a :class:`numpy.ndarray` object.
1759+
- ``"file"`` means the result was saved to a file and will return ``None``.
1760+
vfname
1761+
The virtual file name that stores the result data. Required for ``"pandas"``
1762+
and ``"numpy"`` output type.
1763+
column_names
1764+
The column names for the :class:`pandas.DataFrame` output.
1765+
1766+
Returns
1767+
-------
1768+
result
1769+
The result dataset. If ``output_type="file"`` returns ``None``.
1770+
1771+
Examples
1772+
--------
1773+
>>> from pathlib import Path
1774+
>>> import numpy as np
1775+
>>> import pandas as pd
1776+
>>>
1777+
>>> from pygmt.helpers import GMTTempFile
1778+
>>> from pygmt.clib import Session
1779+
>>>
1780+
>>> with GMTTempFile(suffix=".txt") as tmpfile:
1781+
... # prepare the sample data file
1782+
... with open(tmpfile.name, mode="w") as fp:
1783+
... print(">", file=fp)
1784+
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
1785+
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
1786+
... print(">", file=fp)
1787+
... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp)
1788+
... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp)
1789+
...
1790+
... # file output
1791+
... with Session() as lib:
1792+
... with GMTTempFile(suffix=".txt") as outtmp:
1793+
... with lib.virtualfile_out(
1794+
... kind="dataset", fname=outtmp.name
1795+
... ) as vouttbl:
1796+
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1797+
... result = lib.virtualfile_to_dataset(
1798+
... output_type="file", vfname=vouttbl
1799+
... )
1800+
... assert result is None
1801+
... assert Path(outtmp.name).stat().st_size > 0
1802+
...
1803+
... # numpy output
1804+
... with Session() as lib:
1805+
... with lib.virtualfile_out(kind="dataset") as vouttbl:
1806+
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1807+
... outnp = lib.virtualfile_to_dataset(
1808+
... output_type="numpy", vfname=vouttbl
1809+
... )
1810+
... assert isinstance(outnp, np.ndarray)
1811+
...
1812+
... # pandas output
1813+
... with Session() as lib:
1814+
... with lib.virtualfile_out(kind="dataset") as vouttbl:
1815+
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1816+
... outpd = lib.virtualfile_to_dataset(
1817+
... output_type="pandas", vfname=vouttbl
1818+
... )
1819+
... assert isinstance(outpd, pd.DataFrame)
1820+
...
1821+
... # pandas output with specified column names
1822+
... with Session() as lib:
1823+
... with lib.virtualfile_out(kind="dataset") as vouttbl:
1824+
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1825+
... outpd2 = lib.virtualfile_to_dataset(
1826+
... output_type="pandas",
1827+
... vfname=vouttbl,
1828+
... column_names=["col1", "col2", "col3", "coltext"],
1829+
... )
1830+
... assert isinstance(outpd2, pd.DataFrame)
1831+
>>> outnp
1832+
array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
1833+
[4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
1834+
[7.0, 8.0, 9.0, 'TEXT8 TEXT90'],
1835+
[10.0, 11.0, 12.0, 'TEXT123 TEXT456789']], dtype=object)
1836+
>>> outpd
1837+
0 1 2 3
1838+
0 1.0 2.0 3.0 TEXT1 TEXT23
1839+
1 4.0 5.0 6.0 TEXT4 TEXT567
1840+
2 7.0 8.0 9.0 TEXT8 TEXT90
1841+
3 10.0 11.0 12.0 TEXT123 TEXT456789
1842+
>>> outpd2
1843+
col1 col2 col3 coltext
1844+
0 1.0 2.0 3.0 TEXT1 TEXT23
1845+
1 4.0 5.0 6.0 TEXT4 TEXT567
1846+
2 7.0 8.0 9.0 TEXT8 TEXT90
1847+
3 10.0 11.0 12.0 TEXT123 TEXT456789
1848+
"""
1849+
if output_type == "file": # Already written to file, so return None
1850+
return None
1851+
1852+
# Read the virtual file as a GMT dataset and convert to pandas.DataFrame
1853+
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe()
1854+
if output_type == "numpy": # numpy.ndarray output
1855+
return result.to_numpy()
1856+
1857+
# Assign column names
1858+
if column_names is not None:
1859+
result.columns = column_names
1860+
return result # pandas.DataFrame output
1861+
17411862
def extract_region(self):
17421863
"""
17431864
Extract the WESN bounding box of the currently active figure.

0 commit comments

Comments
 (0)