forked from poplarShift/python-data-science-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
geometry.py
117 lines (97 loc) · 3.34 KB
/
geometry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from scipy.spatial import cKDTree
import numpy as np
from shapely.geometry import Point, Polygon
from geopandas import GeoDataFrame
# need to sort out geometry vs. attr vs dict entry/column,
# and dataset vs dataframe!!
def _build_tree(d1, d2, x, y):
n1 = np.array(list(zip(d1[x], d1[y])) )
n2 = np.array(list(zip(d2[x], d2[y])) )
btree = cKDTree(n2)
return btree, n1, n2
def nearest(d1, d2, x='lon', y='lat'):
"""
For each point in d1, find nearest point in d2.
Parameters
----------
d1, d2 : pandas dataframes
Returns
-------
dist : distances to each nearest
idx : indices of each nearest
License
-------
GNU-GPLv3, (C) A. R.
(https://github.com/poplarShift/python-data-science-utils)
"""
btree, n1, _ = _build_tree(d1, d2, x, y)
return btree.query(n1, k=1)
def points_within(d1, d2, radius, x='lon', y='lat'):
"""
For each point in d1, find all points of d2 within given radius
Parameters
----------
d1, d2 : pandas dataframes
Returns
-------
idx : indices
License
-------
GNU-GPLv3, (C) A. R.
(https://github.com/poplarShift/python-data-science-utils)
"""
n1 = np.array(list(zip(d1.geometry.x, d1.geometry.y)) )
n2 = np.array(list(zip(d2.geometry.x, d2.geometry.y)) )
btree = cKDTree(n2)
idx = btree.query_ball_point(n1, r=radius)
return idx
def nearest_with_time_constraint(d1, d2, x, y, dist_tol=.1, t='date', t_tol=1):
"""
For each point in d1, find nearest point in d2,
and return a boolean index that is True iff their distance is less
than dist_tol and they are not further apart in time than t_tol days.
Confused yet?
Parameters
----------
d1, d2 : pandas dataframes
dist_tol : float, units given by x, y
t_tol : floats, allowed time difference in days
x, y : Names of x and y columns
Returns
-------
nearest :
within_tol : bool array, True of nearest neighbour within dist_tol and t_tol
License
-------
GNU-GPLv3, (C) A. R.
(https://github.com/poplarShift/python-data-science-utils)
"""
n1 = np.array(list(zip(d1[x], d1[y])) )
n2 = np.array(list(zip(d2[x], d2[y])) )
tree_spatial = cKDTree(n2)
distances, nearest = tree_spatial.query(n1)
# for each point in d1, find all points in d2 within ttol days
t1_epoch_days = (d1[t].values[:, np.newaxis] - np.datetime64('1900-01-01'))/np.timedelta64(1, 'D')
t2_epoch_days = (d2[t].values[:, np.newaxis] - np.datetime64('1900-01-01'))/np.timedelta64(1, 'D')
tree_tmp = cKDTree(t2_epoch_days)
within_t_tol = tree_tmp.query_ball_point(t1_epoch_days, r=t_tol) # array of lists of pot. candidates
within_tol = [True if idx in candidates and dist<=dist_tol else False
for idx, dist, candidates
in zip(nearest, distances, within_t_tol)]
return nearest, within_tol
def smoothen(g):
"""
Insert new points along Polygon boundary.
License
-------
GNU-GPLv3, (C) A. R.
(https://github.com/poplarShift/python-data-science-utils)
"""
newcoords = []
for c in g.exterior.coords.xy:
newc = []
for c1, c2 in zip(c[:-1], c[1:]):
newc += list(np.linspace(c1, c2, 50)[:-1])
newcoords.append(newc)
newcoords = [(x,y) for x, y in zip(*newcoords)]
return Polygon(newcoords)