-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflask_app.py
122 lines (106 loc) · 4.16 KB
/
flask_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import requests
import json
from bs4 import BeautifulSoup
from geopy.distance import geodesic
from flask import Flask, render_template, request
APP = Flask(__name__)
API = "https://en.wikipedia.org/w/api.php"
SES = requests.Session()
# Main index function
@APP.route('/', methods=['GET', 'POST'])
def index():
# Get locations on screen and export as geojson
if request.method == "POST":
data = request.get_json()
# Fetch from Wikipedia api based on screen location/size
results = getNearbyPlaces(data['latitude'], data['longitude'], geodesic(reversed(data['ne']),reversed(data['sw'])).meters / 2)
# Convert to geojson
results = jsonToGeojson(results)
with open('/home/ashpil/mysite/static/locations.geojson', 'w') as outfile:
json.dump(results, outfile)
print("got new geojson")
return results
return render_template('map.html')
def getNearbyPlaces(lat, lng, screensize):
# Wikipedia API parameters
params = {
"action": "query",
"format": "json",
"colimit": "40",
"generator": "search",
"prop": "coordinates",
"gsrsearch": "nearcoord:" + str(round(screensize)) + "m," + str(lat) + "," + str(lng),
"gsrlimit": "40"
}
# I know there's a specific Wikipedia geosearch ability, but that's very limited in the fact that it can only search
# in a maximum of 10km, which is very unhelpful for the scale I'm working on.
# This is a neat workaround that allows searching for articles in a location of any size.
res = SES.get(url=API, params=params)
data = res.json()
# Strip some of the junk, return
places = data["query"]["pages"]
return places
# Takes the json file receives from the Wikipedia API and converts it to a valid geojson file, keeping only the
# information that we are interested in
def jsonToGeojson(json):
geojson = {
'type': 'FeatureCollection',
'features': []
}
for entry in json.items():
try:
feature = {
'type': 'Feature',
'properties': {},
'geometry': {
'type': 'Point',
'coordinates': []
}
}
feature['geometry']['coordinates'] = [entry[1]["coordinates"][0]["lon"],
entry[1]["coordinates"][0]["lat"]]
feature['properties']['title'] = entry[1]['title']
feature['properties']['id'] = entry[1]['pageid']
geojson['features'].append(feature)
except KeyError:
continue
return geojson
# Fetches raw html Wikipedia description of article from id, strips it of data we don't want.
@APP.route('/_get_description', methods=['GET', 'POST'])
def getDescription():
if request.method == "POST":
article = request.form['article_id']
params = {
"action": "query",
"prop": "revisions",
"format": "json",
"pageids": article,
"rvprop": "content",
"rvparse": True,
"rvlimit": 1,
"rvsection": 0
}
res = SES.get(url=API, params=params)
data = res.json()
data = data["query"]["pages"][str(article)]["revisions"][0]["*"]
# Uses BeautifulSoup html parser to remove unneeded elements
soup = BeautifulSoup(data, "html.parser")
# Gets first non-infobox paragraph with text in it
for x in soup.find_all('table', 'infobox geography vcard'):
x.extract()
for x in soup.find_all('span', id="coordinates"):
x.extract()
for x in soup.find_all('sup'):
x.extract()
paragraphs = soup.find_all("p")
for paragraph in paragraphs:
if len(paragraph.get_text(strip=True)) != 0:
data = paragraph
break
data = str(data)
# Replaces local links with links that would actually work
data = data.replace("/wiki/", "//en.wikipedia.org/wiki/")
# Makes those links open in new tab
data = data.replace("<a ", '<a target="_blank" ')
if __name__ == '__main__':
APP.run(debug=True)