Skip to content

Port tiger_vs_json to Python 3 and rm all control chars #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions tiger_versus_json.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#! /usr/bin/env python2
#! /usr/bin/env python3

import json
import string
import sys
import re

#This data is of absolutely awful quality, so longitude and latitude are sometimes reversed.
#Fix this appropriately if your data is not in the Northern Western quadrant of the Earth.
Expand Down Expand Up @@ -32,6 +33,9 @@ def data_is_fucked(coords):
elif file_tokens[-1]=="county":
default_county = file_tokens[0].replace('_',' ').upper()

# All control characters (ASCII 0 - 31 and 127).
CONTROL_CHARS_RE = re.compile(r'[\x00-\x1F\x7F]')

for line in map(str.rstrip,open(sys.argv[1]).readlines()):
try:
address_object = json.loads(line)
Expand All @@ -42,7 +46,7 @@ def data_is_fucked(coords):
#Some basic sanity checks since the data isn't sane
if 'properties' not in address_object or not address_object['properties'] or 'geometry' not in address_object or not address_object['geometry'] or 'coordinates' not in address_object['geometry'] or not address_object['geometry']['coordinates']:
continue

properties = address_object['properties']
if 'number' not in properties or properties['number']=="" or 'street' not in properties or properties['street']=="":
continue
Expand All @@ -61,9 +65,10 @@ def data_is_fucked(coords):
if data_is_fucked(coords):
sys.stderr.write("Skipping fucked-beyond-repair JSON coordinates of "+repr(coords)+'\n')
continue

row[7] = '('+repr(coords[0])+' '+repr(coords[1])
row = map(string.capwords,row)
row = [string.capwords(x) for x in row]
row[5] = row[5].upper()
row = map(lambda x: x.replace(';','#'),row)
print "".join(map(lambda x: x if x >= ' ' and x <= '~' else "",';'.join(row).encode('ascii','ignore')))
row = [x.replace(';', '#') for x in row]
row = [CONTROL_CHARS_RE.sub('', x) for x in row]
print(";".join(row))