-
Notifications
You must be signed in to change notification settings - Fork 0
/
stack.py
135 lines (115 loc) · 4.48 KB
/
stack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from operator import index
import numpy as np
import pandas as pd
import math
import time
import os
import itertools
clusters = []
latarray = np.array([])
longarray = np.array([])
start_time = time.time()
temp_time = time.time()
meters = 10
LatDegPerMeter = 0.000008999
LatDeg = meters*LatDegPerMeter
LongDegPerMeter = 0.000010844
LongDeg = meters*LongDegPerMeter
pokestops = pd.read_csv("pokestops.csv", header=None)
pokestops.columns = ["Lat", "Long"]
LocLatMin = pokestops["Lat"].min()
LocLatMax = pokestops["Lat"].max()
LocLongMin = pokestops["Long"].min()
LocLongMax = pokestops["Long"].max()
latiterations = math.ceil((LocLatMax-LocLatMin)/(LatDeg))
longiterations = math.ceil((LocLongMax-LocLongMin)/(LongDeg))
print("Find all coordinates for the locations df")
print("Total Loops Required: " + str(latiterations*longiterations*len(pokestops)))
locations = pd.DataFrame(itertools.product(np.arange(latiterations)*LatDeg + LocLatMin, np.arange(longiterations)*LongDeg + LocLongMin), columns=["Lat", "Long"])
print("%s seconds" % (time.time() - temp_time))
print("")
locations["Count"] = 0
def haversine_np_matrix(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
lon1 = np.expand_dims(lon1, axis=0)
lat1 = np.expand_dims(lat1, axis=0)
lon2 = np.expand_dims(lon2, axis=1)
lat2 = np.expand_dims(lat2, axis=1)
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6367 * c
return km
print("Calculating Distance Table")
temp_time = time.time()
distances = haversine_np_matrix(pokestops["Long"],pokestops["Lat"], locations["Long"],locations["Lat"])
print("%s seconds" % (time.time() - temp_time))
print("")
print("Creating Distance Table")
temp_time = time.time()
locations = locations.join(pd.DataFrame(distances))
print("%s seconds" % (time.time() - temp_time))
print("")
print("Removing Distances < 70m")
temp_time = time.time()
locations[locations.iloc[:, 3:] > 0.07] = np.NaN
print("%s seconds" % (time.time() - temp_time))
print("")
print("Counting number of Nearby locations")
temp_time = time.time()
locations["Count"] = locations.iloc[:, 3:].count(axis="columns")
print("%s seconds" % (time.time() - temp_time))
print("")
print("Sorting by location count")
temp_time = time.time()
locations.sort_values("Count", ascending=False, inplace=True)
print("%s seconds" % (time.time() - temp_time))
print("")
print("Saving Top 20 Locations")
temp_time = time.time()
top20 = locations.iloc[0:20, 0:3]
if os.path.exists("Top 20 Locations.txt"):
os.remove("Top 20 Locations.txt")
with open("Top 20 Locations.txt", 'a') as f:
dfAsString = top20.to_string(header=True, index=True)
f.write(dfAsString)
print("%s seconds" % (time.time() - temp_time))
print("")
print("Removing all 0 distance locations from Database")
temp_time = time.time()
locations.drop(locations.index[locations["Count"] == 0], inplace=True)
print("%s seconds" % (time.time() - temp_time))
print("")
print("Running the filtering algorithm")
temp_time = time.time()
while len(locations) > 0:
#Count number of locations
locations["Count"] = locations.iloc[:, 3:].count(axis="columns")
#Remove all 0 locations
locations.drop(locations.index[locations["Count"] == 0], inplace=True)
#Sort by count
locations.sort_values("Count", ascending=False, inplace=True)
if len(locations) == 0:
break
#Add top entry to final clusters array
clusters = np.append(clusters, locations.iloc[0 , 0:2])
#Remove all columns that contain pokestops in first row from df
locations = locations[locations.columns[[True]*3 + list(locations.iloc[0].isna()[3:])]]
#Remove top entry from df
locations.drop(index=locations.index[0], axis=0, inplace=True)
print("%s seconds" % (time.time() - temp_time))
print("")
print("Saving Locations to Array")
temp_time = time.time()
clusters = np.reshape(clusters, (-1,2))
print("%s seconds" % (time.time() - temp_time))
print("")
print("Writing final Clustered Spawnpoints to file")
temp_time = time.time()
if os.path.exists("Clustered_Spawnpoints.txt"):
os.remove("Clustered_Spawnpoints.txt")
np.savetxt("Clustered_Spawnpoints.txt", np.array(clusters), delimiter=',', fmt='%1.8f')
print("%s seconds" % (time.time() - temp_time))
print("")
print("Total Time Taken: %s Seconds" % (time.time() - start_time))