Skip to content

Commit 8eb055d

Browse files
JamieJQuinnweiji14Meghan Jonesmichaelgrundseisman
authored
Wrap nearneighbor (#1379)
Wrapping the nearneighbor function which grids table data using a "Nearest neighbor" algorithm. Official GMT documentation is at https://docs.generic-mapping-tools.org/6.2/nearneighbor.html. Aliased empty (E), spacing (I), sectors (N), search_radius (S). * Add nearneighbor to API index * Add similar figure to that found in GMTs nearneighbor documentation * Ignore flake8 error using noqa W505 * Shorten line length to under 100 using the dev image * Merge two tests using pytest parametrize Check that numpy.array and xarray.Dataset inputs work. Co-authored-by: Wei Ji <[email protected]> Co-authored-by: Meghan Jones <[email protected]> Co-authored-by: Michael Grund <[email protected]> Co-authored-by: Dongdong Tian <[email protected]>
1 parent ef108d9 commit 8eb055d

File tree

5 files changed

+237
-0
lines changed

5 files changed

+237
-0
lines changed

doc/api/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ Operations on tabular data:
8282
blockmean
8383
blockmedian
8484
blockmode
85+
nearneighbor
8586
surface
8687

8788
Operations on grids:

pygmt/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
grdtrack,
4949
info,
5050
makecpt,
51+
nearneighbor,
5152
sphdistance,
5253
surface,
5354
which,

pygmt/src/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from pygmt.src.logo import logo
3333
from pygmt.src.makecpt import makecpt
3434
from pygmt.src.meca import meca
35+
from pygmt.src.nearneighbor import nearneighbor
3536
from pygmt.src.plot import plot
3637
from pygmt.src.plot3d import plot3d
3738
from pygmt.src.rose import rose

pygmt/src/nearneighbor.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
"""
2+
nearneighbor - Grid table data using a "Nearest neighbor" algorithm
3+
"""
4+
5+
from pygmt.clib import Session
6+
from pygmt.helpers import (
7+
GMTTempFile,
8+
build_arg_string,
9+
fmt_docstring,
10+
kwargs_to_strings,
11+
use_alias,
12+
)
13+
from pygmt.io import load_dataarray
14+
15+
16+
@fmt_docstring
17+
@use_alias(
18+
E="empty",
19+
G="outgrid",
20+
I="spacing",
21+
N="sectors",
22+
R="region",
23+
S="search_radius",
24+
V="verbose",
25+
a="aspatial",
26+
b="binary",
27+
d="nodata",
28+
e="find",
29+
f="coltypes",
30+
g="gap",
31+
h="header",
32+
i="incols",
33+
r="registration",
34+
w="wrap",
35+
)
36+
@kwargs_to_strings(R="sequence", i="sequence_comma")
37+
def nearneighbor(data=None, x=None, y=None, z=None, **kwargs):
38+
r"""
39+
Grid table data using a "Nearest neighbor" algorithm
40+
41+
**nearneighbor** reads arbitrarily located (*x,y,z*\ [,\ *w*]) triples
42+
[quadruplets] and uses a nearest neighbor algorithm to assign a weighted
43+
average value to each node that has one or more data points within a search
44+
radius centered on the node with adequate coverage across a subset of the
45+
chosen sectors. The node value is computed as a weighted mean of the
46+
nearest point from each sector inside the search radius. The weighting
47+
function and the averaging used is given by:
48+
49+
.. math::
50+
w(r_i) = \frac{{w_i}}{{1 + d(r_i) ^ 2}},
51+
\quad d(r) = \frac {{3r}}{{R}},
52+
\quad \bar{{z}} = \frac{{\sum_i^n w(r_i) z_i}}{{\sum_i^n w(r_i)}}
53+
54+
where :math:`n` is the number of data points that satisfy the selection
55+
criteria and :math:`r_i` is the distance from the node to the *i*'th data
56+
point. If no data weights are supplied then :math:`w_i = 1`.
57+
58+
.. figure:: https://docs.generic-mapping-tools.org/dev/_images/GMT_nearneighbor.png # noqa: W505
59+
:width: 300 px
60+
:align: center
61+
62+
Search geometry includes the search radius (R) which limits the points
63+
considered and the number of sectors (here 4), which restricts how
64+
points inside the search radius contribute to the value at the node.
65+
Only the closest point in each sector (red circles) contribute to the
66+
weighted estimate.
67+
68+
Takes a matrix, xyz triples, or a file name as input.
69+
70+
Must provide either ``data`` or ``x``, ``y``, and ``z``.
71+
72+
Full option list at :gmt-docs:`nearneighbor.html`
73+
74+
{aliases}
75+
76+
Parameters
77+
----------
78+
data : str or {table-like}
79+
Pass in (x, y, z) or (longitude, latitude, elevation) values by
80+
providing a file name to an ASCII data table, a 2D
81+
{table-classes}.
82+
x/y/z : 1d arrays
83+
Arrays of x and y coordinates and values z of the data points.
84+
85+
{I}
86+
87+
{R}
88+
89+
search_radius : str
90+
Sets the search radius that determines which data points are considered
91+
close to a node.
92+
93+
outgrid : str
94+
Optional. The file name for the output netcdf file with extension .nc
95+
to store the grid in.
96+
97+
empty : str
98+
Optional. Set the value assigned to empty nodes. Defaults to NaN.
99+
100+
sectors : str
101+
*sectors*\ [**+m**\ *min_sectors*]\|\ **n**.
102+
Optional. The circular search area centered on each node is divided
103+
into *sectors* sectors. Average values will only be computed if there
104+
is *at least* one value inside each of at least *min_sectors* of the
105+
sectors for a given node. Nodes that fail this test are assigned the
106+
value NaN (but see ``empty``). If **+m** is omitted then *min_sectors*
107+
is set to be at least 50% of *sectors* (i.e., rounded up to next
108+
integer) [Default is a quadrant search with 100% coverage, i.e.,
109+
*sectors* = *min_sectors* = 4]. Note that only the nearest value per
110+
sector enters into the averaging; the more distant points are ignored.
111+
Alternatively, use ``sectors="n"`` to call GDAL's nearest neighbor
112+
algorithm instead.
113+
114+
{V}
115+
{a}
116+
{b}
117+
{d}
118+
{e}
119+
{f}
120+
{g}
121+
{h}
122+
{i}
123+
{r}
124+
{w}
125+
126+
Returns
127+
-------
128+
ret: xarray.DataArray or None
129+
Return type depends on whether the ``outgrid`` parameter is set:
130+
131+
- :class:`xarray.DataArray`: if ``outgrid`` is not set
132+
- None if ``outgrid`` is set (grid output will be stored in file set by
133+
``outgrid``)
134+
"""
135+
with GMTTempFile(suffix=".nc") as tmpfile:
136+
with Session() as lib:
137+
# Choose how data will be passed into the module
138+
table_context = lib.virtualfile_from_data(
139+
check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
140+
)
141+
with table_context as infile:
142+
if "G" not in kwargs.keys(): # if outgrid is unset, output to tmpfile
143+
kwargs.update({"G": tmpfile.name})
144+
outgrid = kwargs["G"]
145+
arg_str = " ".join([infile, build_arg_string(kwargs)])
146+
lib.call_module(module="nearneighbor", args=arg_str)
147+
148+
return load_dataarray(outgrid) if outgrid == tmpfile.name else None

pygmt/tests/test_nearneighbor.py

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""
2+
Tests for nearneighbor.
3+
"""
4+
import os
5+
6+
import numpy as np
7+
import numpy.testing as npt
8+
import pytest
9+
import xarray as xr
10+
from pygmt import nearneighbor
11+
from pygmt.datasets import load_sample_bathymetry
12+
from pygmt.exceptions import GMTInvalidInput
13+
from pygmt.helpers import GMTTempFile, data_kind
14+
15+
16+
@pytest.fixture(scope="module", name="ship_data")
17+
def fixture_ship_data():
18+
"""
19+
Load the data from the sample bathymetry dataset.
20+
"""
21+
return load_sample_bathymetry()
22+
23+
24+
@pytest.mark.parametrize("array_func", [np.array, xr.Dataset])
25+
def test_nearneighbor_input_data(array_func, ship_data):
26+
"""
27+
Run nearneighbor by passing in a numpy.array or xarray.Dataset.
28+
"""
29+
data = array_func(ship_data)
30+
output = nearneighbor(
31+
data=data, spacing="5m", region=[245, 255, 20, 30], search_radius="10m"
32+
)
33+
assert isinstance(output, xr.DataArray)
34+
assert output.gmt.registration == 0 # Gridline registration
35+
assert output.gmt.gtype == 1 # Geographic type
36+
assert output.shape == (121, 121)
37+
npt.assert_allclose(output.mean(), -2378.2385)
38+
39+
40+
def test_nearneighbor_input_xyz(ship_data):
41+
"""
42+
Run nearneighbor by passing in x, y, z numpy.ndarrays individually.
43+
"""
44+
output = nearneighbor(
45+
x=ship_data.longitude,
46+
y=ship_data.latitude,
47+
z=ship_data.bathymetry,
48+
spacing="5m",
49+
region=[245, 255, 20, 30],
50+
search_radius="10m",
51+
)
52+
assert isinstance(output, xr.DataArray)
53+
assert output.shape == (121, 121)
54+
npt.assert_allclose(output.mean(), -2378.2385)
55+
56+
57+
def test_nearneighbor_wrong_kind_of_input(ship_data):
58+
"""
59+
Run nearneighbor using grid input that is not file/matrix/vectors.
60+
"""
61+
data = ship_data.bathymetry.to_xarray() # convert pandas.Series to xarray.DataArray
62+
assert data_kind(data) == "grid"
63+
with pytest.raises(GMTInvalidInput):
64+
nearneighbor(
65+
data=data, spacing="5m", region=[245, 255, 20, 30], search_radius="10m"
66+
)
67+
68+
69+
def test_nearneighbor_with_outgrid_param(ship_data):
70+
"""
71+
Run nearneighbor with the 'outgrid' parameter.
72+
"""
73+
with GMTTempFile() as tmpfile:
74+
output = nearneighbor(
75+
data=ship_data,
76+
spacing="5m",
77+
region=[245, 255, 20, 30],
78+
outgrid=tmpfile.name,
79+
search_radius="10m",
80+
)
81+
assert output is None # check that output is None since outgrid is set
82+
assert os.path.exists(path=tmpfile.name) # check that outgrid exists at path
83+
with xr.open_dataarray(tmpfile.name) as grid:
84+
assert isinstance(grid, xr.DataArray) # ensure netcdf grid loads ok
85+
assert grid.shape == (121, 121)
86+
npt.assert_allclose(grid.mean(), -2378.2385)

0 commit comments

Comments
 (0)