Skip to content

Commit 917b3aa

Browse files
seismanweiji14
andauthored
Wrap the GMT API function GMT_Read_Data to read data into GMT data containers (#3324)
Co-authored-by: Wei Ji <[email protected]>
1 parent 6c436a3 commit 917b3aa

File tree

4 files changed

+239
-2
lines changed

4 files changed

+239
-2
lines changed

doc/api/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ Low level access (these are mostly used by the :mod:`pygmt.clib` package):
309309
clib.Session.put_matrix
310310
clib.Session.put_strings
311311
clib.Session.put_vector
312+
clib.Session.read_data
312313
clib.Session.write_data
313314
clib.Session.open_virtualfile
314315
clib.Session.read_virtualfile

pygmt/clib/conversion.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,9 @@ def as_c_contiguous(array):
247247
return array
248248

249249

250-
def sequence_to_ctypes_array(sequence: Sequence, ctype, size: int) -> ctp.Array | None:
250+
def sequence_to_ctypes_array(
251+
sequence: Sequence | None, ctype, size: int
252+
) -> ctp.Array | None:
251253
"""
252254
Convert a sequence of numbers into a ctypes array variable.
253255

pygmt/clib/session.py

+94-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pathlib
1111
import sys
1212
import warnings
13-
from collections.abc import Generator
13+
from collections.abc import Generator, Sequence
1414
from typing import Literal
1515

1616
import numpy as np
@@ -1067,6 +1067,99 @@ def put_matrix(self, dataset, matrix, pad=0):
10671067
if status != 0:
10681068
raise GMTCLibError(f"Failed to put matrix of type {matrix.dtype}.")
10691069

1070+
def read_data(
1071+
self,
1072+
infile: str,
1073+
kind: Literal["dataset", "grid"],
1074+
family: str | None = None,
1075+
geometry: str | None = None,
1076+
mode: str = "GMT_READ_NORMAL",
1077+
region: Sequence[float] | None = None,
1078+
data=None,
1079+
):
1080+
"""
1081+
Read a data file into a GMT data container.
1082+
1083+
Wraps ``GMT_Read_Data`` but only allows reading from a file. The function
1084+
definition is different from the original C API function.
1085+
1086+
Parameters
1087+
----------
1088+
infile
1089+
The input file name.
1090+
kind
1091+
The data kind of the input file. Valid values are ``"dataset"`` and
1092+
``"grid"``.
1093+
family
1094+
A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See the
1095+
``FAMILIES`` attribute for valid names. If ``None``, will determine the data
1096+
family from the ``kind`` parameter.
1097+
geometry
1098+
A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See the
1099+
``GEOMETRIES`` attribute for valid names. If ``None``, will determine the
1100+
data geometry from the ``kind`` parameter.
1101+
mode
1102+
How the data is to be read from the file. This option varies depending on
1103+
the given family. See the
1104+
:gmt-docs:`GMT API documentation <devdocs/api.html#import-from-a-file-stream-or-handle>`
1105+
for details. Default is ``GMT_READ_NORMAL`` which corresponds to the default
1106+
read mode value of 0 in the ``GMT_enum_read`` enum.
1107+
region
1108+
Subregion of the data, in the form of [xmin, xmax, ymin, ymax, zmin, zmax].
1109+
If ``None``, the whole data is read.
1110+
data
1111+
``None`` or the pointer returned by this function after a first call. It's
1112+
useful when reading grids/images/cubes in two steps (get a grid/image/cube
1113+
structure with a header, then read the data).
1114+
1115+
Returns
1116+
-------
1117+
Pointer to the data container, or ``None`` if there were errors.
1118+
1119+
Raises
1120+
------
1121+
GMTCLibError
1122+
If the GMT API function fails to read the data.
1123+
""" # noqa: W505
1124+
c_read_data = self.get_libgmt_func(
1125+
"GMT_Read_Data",
1126+
argtypes=[
1127+
ctp.c_void_p, # V_API
1128+
ctp.c_uint, # family
1129+
ctp.c_uint, # method
1130+
ctp.c_uint, # geometry
1131+
ctp.c_uint, # mode
1132+
ctp.POINTER(ctp.c_double), # wesn
1133+
ctp.c_char_p, # infile
1134+
ctp.c_void_p, # data
1135+
],
1136+
restype=ctp.c_void_p, # data_ptr
1137+
)
1138+
1139+
# Determine the family, geometry and data container from kind
1140+
_family, _geometry, dtype = {
1141+
"dataset": ("GMT_IS_DATASET", "GMT_IS_PLP", _GMT_DATASET),
1142+
"grid": ("GMT_IS_GRID", "GMT_IS_SURFACE", _GMT_GRID),
1143+
}[kind]
1144+
if family is None:
1145+
family = _family
1146+
if geometry is None:
1147+
geometry = _geometry
1148+
1149+
data_ptr = c_read_data(
1150+
self.session_pointer,
1151+
self[family],
1152+
self["GMT_IS_FILE"], # Reading from a file
1153+
self[geometry],
1154+
self[mode],
1155+
sequence_to_ctypes_array(region, ctp.c_double, 6),
1156+
infile.encode(),
1157+
data,
1158+
)
1159+
if data_ptr is None:
1160+
raise GMTCLibError(f"Failed to read dataset from '{infile}'.")
1161+
return ctp.cast(data_ptr, ctp.POINTER(dtype))
1162+
10701163
def write_data(self, family, geometry, mode, wesn, output, data):
10711164
"""
10721165
Write a GMT data container to a file.

pygmt/tests/test_clib_read_data.py

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""
2+
Test the Session.read_data method.
3+
"""
4+
5+
from pathlib import Path
6+
7+
import pandas as pd
8+
import pytest
9+
import xarray as xr
10+
from pygmt.clib import Session
11+
from pygmt.exceptions import GMTCLibError
12+
from pygmt.helpers import GMTTempFile
13+
from pygmt.io import load_dataarray
14+
from pygmt.src import which
15+
16+
try:
17+
import rioxarray # noqa: F401
18+
19+
_HAS_RIOXARRAY = True
20+
except ImportError:
21+
_HAS_RIOXARRAY = False
22+
23+
24+
@pytest.fixture(scope="module", name="expected_xrgrid")
25+
def fixture_expected_xrgrid():
26+
"""
27+
The expected xr.DataArray object for the static_earth_relief.nc file.
28+
"""
29+
return load_dataarray(which("@static_earth_relief.nc"))
30+
31+
32+
def test_clib_read_data_dataset():
33+
"""
34+
Test the Session.read_data method for datasets.
35+
"""
36+
with GMTTempFile(suffix=".txt") as tmpfile:
37+
# Prepare the sample data file
38+
with Path(tmpfile.name).open(mode="w", encoding="utf-8") as fp:
39+
print("# x y z name", file=fp)
40+
print(">", file=fp)
41+
print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
42+
print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
43+
print(">", file=fp)
44+
print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp)
45+
print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp)
46+
47+
with Session() as lib:
48+
ds = lib.read_data(tmpfile.name, kind="dataset").contents
49+
df = ds.to_dataframe(header=0)
50+
expected_df = pd.DataFrame(
51+
data={
52+
"x": [1.0, 4.0, 7.0, 10.0],
53+
"y": [2.0, 5.0, 8.0, 11.0],
54+
"z": [3.0, 6.0, 9.0, 12.0],
55+
"name": pd.Series(
56+
[
57+
"TEXT1 TEXT23",
58+
"TEXT4 TEXT567",
59+
"TEXT8 TEXT90",
60+
"TEXT123 TEXT456789",
61+
],
62+
dtype=pd.StringDtype(),
63+
),
64+
}
65+
)
66+
pd.testing.assert_frame_equal(df, expected_df)
67+
68+
69+
def test_clib_read_data_grid(expected_xrgrid):
70+
"""
71+
Test the Session.read_data method for grids.
72+
"""
73+
with Session() as lib:
74+
grid = lib.read_data("@static_earth_relief.nc", kind="grid").contents
75+
xrgrid = grid.to_dataarray()
76+
xr.testing.assert_equal(xrgrid, expected_xrgrid)
77+
assert grid.header.contents.n_bands == 1 # Explicitly check n_bands
78+
79+
80+
def test_clib_read_data_grid_two_steps(expected_xrgrid):
81+
"""
82+
Test the Session.read_data method for grids in two steps, first reading the header
83+
and then the data.
84+
"""
85+
infile = "@static_earth_relief.nc"
86+
with Session() as lib:
87+
# Read the header first
88+
data_ptr = lib.read_data(infile, kind="grid", mode="GMT_CONTAINER_ONLY")
89+
grid = data_ptr.contents
90+
header = grid.header.contents
91+
assert header.n_rows == 14
92+
assert header.n_columns == 8
93+
assert header.wesn[:] == [-55.0, -47.0, -24.0, -10.0]
94+
assert header.z_min == 190.0
95+
assert header.z_max == 981.0
96+
assert header.n_bands == 1 # Explicitly check n_bands
97+
assert not grid.data # The data is not read yet
98+
99+
# Read the data
100+
lib.read_data(infile, kind="grid", mode="GMT_DATA_ONLY", data=data_ptr)
101+
xrgrid = data_ptr.contents.to_dataarray()
102+
xr.testing.assert_equal(xrgrid, expected_xrgrid)
103+
104+
105+
def test_clib_read_data_grid_actual_image():
106+
"""
107+
Test the Session.read_data method for grid, but actually the file is an image.
108+
"""
109+
with Session() as lib:
110+
data_ptr = lib.read_data(
111+
"@earth_day_01d_p", kind="grid", mode="GMT_CONTAINER_AND_DATA"
112+
)
113+
image = data_ptr.contents
114+
header = image.header.contents
115+
assert header.n_rows == 180
116+
assert header.n_columns == 360
117+
assert header.wesn[:] == [-180.0, 180.0, -90.0, 90.0]
118+
# Explicitly check n_bands. Only one band is read for 3-band images.
119+
assert header.n_bands == 1
120+
121+
if _HAS_RIOXARRAY: # Full check if rioxarray is installed.
122+
xrimage = image.to_dataarray()
123+
expected_xrimage = xr.open_dataarray(
124+
which("@earth_day_01d_p"), engine="rasterio"
125+
)
126+
assert expected_xrimage.band.size == 3 # 3-band image.
127+
xr.testing.assert_equal(
128+
xrimage,
129+
expected_xrimage.isel(band=0)
130+
.drop_vars(["band", "spatial_ref"])
131+
.sortby("y"),
132+
)
133+
134+
135+
def test_clib_read_data_fails():
136+
"""
137+
Test that the Session.read_data method raises an exception if there are errors.
138+
"""
139+
with Session() as lib:
140+
with pytest.raises(GMTCLibError):
141+
lib.read_data("not-exsits.txt", kind="dataset")

0 commit comments

Comments
 (0)