@@ -1738,6 +1738,127 @@ def read_virtualfile(
1738
1738
dtype = {"dataset" : _GMT_DATASET , "grid" : _GMT_GRID }[kind ]
1739
1739
return ctp .cast (pointer , ctp .POINTER (dtype ))
1740
1740
1741
+ def virtualfile_to_dataset (
1742
+ self ,
1743
+ output_type : Literal ["pandas" , "numpy" , "file" ],
1744
+ vfname : str ,
1745
+ column_names : list [str ] | None = None ,
1746
+ ) -> pd .DataFrame | np .ndarray | None :
1747
+ """
1748
+ Output a tabular dataset stored in a virtual file to a different format.
1749
+
1750
+ The format of the dataset is determined by the ``output_type`` parameter.
1751
+
1752
+ Parameters
1753
+ ----------
1754
+ output_type
1755
+ Desired output type of the result data.
1756
+
1757
+ - ``"pandas"`` will return a :class:`pandas.DataFrame` object.
1758
+ - ``"numpy"`` will return a :class:`numpy.ndarray` object.
1759
+ - ``"file"`` means the result was saved to a file and will return ``None``.
1760
+ vfname
1761
+ The virtual file name that stores the result data. Required for ``"pandas"``
1762
+ and ``"numpy"`` output type.
1763
+ column_names
1764
+ The column names for the :class:`pandas.DataFrame` output.
1765
+
1766
+ Returns
1767
+ -------
1768
+ result
1769
+ The result dataset. If ``output_type="file"`` returns ``None``.
1770
+
1771
+ Examples
1772
+ --------
1773
+ >>> from pathlib import Path
1774
+ >>> import numpy as np
1775
+ >>> import pandas as pd
1776
+ >>>
1777
+ >>> from pygmt.helpers import GMTTempFile
1778
+ >>> from pygmt.clib import Session
1779
+ >>>
1780
+ >>> with GMTTempFile(suffix=".txt") as tmpfile:
1781
+ ... # prepare the sample data file
1782
+ ... with open(tmpfile.name, mode="w") as fp:
1783
+ ... print(">", file=fp)
1784
+ ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
1785
+ ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
1786
+ ... print(">", file=fp)
1787
+ ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp)
1788
+ ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp)
1789
+ ...
1790
+ ... # file output
1791
+ ... with Session() as lib:
1792
+ ... with GMTTempFile(suffix=".txt") as outtmp:
1793
+ ... with lib.virtualfile_out(
1794
+ ... kind="dataset", fname=outtmp.name
1795
+ ... ) as vouttbl:
1796
+ ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1797
+ ... result = lib.virtualfile_to_dataset(
1798
+ ... output_type="file", vfname=vouttbl
1799
+ ... )
1800
+ ... assert result is None
1801
+ ... assert Path(outtmp.name).stat().st_size > 0
1802
+ ...
1803
+ ... # numpy output
1804
+ ... with Session() as lib:
1805
+ ... with lib.virtualfile_out(kind="dataset") as vouttbl:
1806
+ ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1807
+ ... outnp = lib.virtualfile_to_dataset(
1808
+ ... output_type="numpy", vfname=vouttbl
1809
+ ... )
1810
+ ... assert isinstance(outnp, np.ndarray)
1811
+ ...
1812
+ ... # pandas output
1813
+ ... with Session() as lib:
1814
+ ... with lib.virtualfile_out(kind="dataset") as vouttbl:
1815
+ ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1816
+ ... outpd = lib.virtualfile_to_dataset(
1817
+ ... output_type="pandas", vfname=vouttbl
1818
+ ... )
1819
+ ... assert isinstance(outpd, pd.DataFrame)
1820
+ ...
1821
+ ... # pandas output with specified column names
1822
+ ... with Session() as lib:
1823
+ ... with lib.virtualfile_out(kind="dataset") as vouttbl:
1824
+ ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1825
+ ... outpd2 = lib.virtualfile_to_dataset(
1826
+ ... output_type="pandas",
1827
+ ... vfname=vouttbl,
1828
+ ... column_names=["col1", "col2", "col3", "coltext"],
1829
+ ... )
1830
+ ... assert isinstance(outpd2, pd.DataFrame)
1831
+ >>> outnp
1832
+ array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
1833
+ [4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
1834
+ [7.0, 8.0, 9.0, 'TEXT8 TEXT90'],
1835
+ [10.0, 11.0, 12.0, 'TEXT123 TEXT456789']], dtype=object)
1836
+ >>> outpd
1837
+ 0 1 2 3
1838
+ 0 1.0 2.0 3.0 TEXT1 TEXT23
1839
+ 1 4.0 5.0 6.0 TEXT4 TEXT567
1840
+ 2 7.0 8.0 9.0 TEXT8 TEXT90
1841
+ 3 10.0 11.0 12.0 TEXT123 TEXT456789
1842
+ >>> outpd2
1843
+ col1 col2 col3 coltext
1844
+ 0 1.0 2.0 3.0 TEXT1 TEXT23
1845
+ 1 4.0 5.0 6.0 TEXT4 TEXT567
1846
+ 2 7.0 8.0 9.0 TEXT8 TEXT90
1847
+ 3 10.0 11.0 12.0 TEXT123 TEXT456789
1848
+ """
1849
+ if output_type == "file" : # Already written to file, so return None
1850
+ return None
1851
+
1852
+ # Read the virtual file as a GMT dataset and convert to pandas.DataFrame
1853
+ result = self .read_virtualfile (vfname , kind = "dataset" ).contents .to_dataframe ()
1854
+ if output_type == "numpy" : # numpy.ndarray output
1855
+ return result .to_numpy ()
1856
+
1857
+ # Assign column names
1858
+ if column_names is not None :
1859
+ result .columns = column_names
1860
+ return result # pandas.DataFrame output
1861
+
1741
1862
def extract_region (self ):
1742
1863
"""
1743
1864
Extract the WESN bounding box of the currently active figure.
0 commit comments