-
Notifications
You must be signed in to change notification settings - Fork 0
/
geocode.py
78 lines (61 loc) · 2.36 KB
/
geocode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# geocode.py
# (c) 2020 CincoNoveSeis Jornalismo Ltda.
#
# This program is licensed under the GNU General Public License, version 3.
# See the LICENSE file for details.
import googlemaps
import csv
import pprint
import re
import json
from unidecode import unidecode
from sys import argv
from pathlib import Path
with open('gapi-key.txt', 'r') as ff:
ak = ff.read().strip()
gmaps = googlemaps.Client(key = ak)
oup = []
with open(argv[1]) as f:
addrs = [{k: v for k, v in row.items()}
for row in csv.DictReader(f, skipinitialspace=True)]
for addr in addrs:
try:
endr_orig = unidecode(addr['endereco']).upper()
endr = endr_orig
endr = ' '.join(endr.strip().split())
endr = endr.replace("ZONA URBANA", "")
endr = endr.split('(')[0].strip()
endr = endr.split('AO LADO')[0].strip()
endr = endr.split(' - ')[0].strip()
m = re.search(r'.*?(?=,\s+[^"\dSN])', endr)
if m is not None:
endr = m.group(0).strip()
endr = endr.replace('FONE ', '').replace('TELEFONE ', '')
endr = re.sub(r'\(?\d{2}\)?\d{4}\d?-?\d{4}', '', endr)
endr = re.sub(r'\d{4}\d?-?\d{4}', '', endr)
bairro = unidecode(addr['bairro']).upper()
endr = re.sub(bairro + '$', '', endr)
cidade = unidecode(addr['cidade']).upper()
endr = ' '.join([endr, bairro, cidade, addr['uf']])
print(endr)
components = {
'locality': bairro,
'administrative_area': cidade
}
result = gmaps.geocode(endr, components)
oup.append({
'ID': addr['ID'],
'endr_orig': endr_orig,
'endr': endr,
'bairro': addr['bairro'],
'uf': addr['uf'],
'cidade': addr['cidade'],
'data': result
})
except Exception as e:
print('ERROR! STOPPED!')
print(e)
break
json.dump(oup, open('backup.json', 'w'))
with open(Path(argv[1]).stem + '.json', 'w') as f:
json.dump(oup, f)