Skip to content

Commit

Permalink
adding scripts to build api joins table
Browse files Browse the repository at this point in the history
  • Loading branch information
jspeis committed Sep 7, 2016
1 parent 7aef753 commit d575d03
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
24 changes: 24 additions & 0 deletions scripts/bidirectional_geo_crosswalk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pandas as pd

df = pd.read_csv("crosswalk_geo_containment.csv")

my_connections = {}

headers = [str(x) for x in df.columns]
for row in list(df.itertuples()):
my_set = frozenset([row.parent_geoid, row.child_geoid])
if not my_set in my_connections:
my_connections[my_set] = True

import csv
with open('eggs.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(["geo_a", "geo_b"])
for row in my_connections.keys():
my_row = list(row)
if len(my_row) == 1:
my_row = 2*my_row
spamwriter.writerow(my_row)


96 changes: 96 additions & 0 deletions scripts/crosswalk_geo_containment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
table_name = 'attrs.crosswalk_geo_containment'
print '''
CREATE TABLE {} (
child_geoid varchar(40),
parent_geoid varchar(40),
percent_covered real,
area_covered real
);\n'''.format(table_name)


# list of child/parent
containments = [
("160", "040"),
("160", "050"),
("160", "310"),
("160", "795"),

("140", "160"),

("050", "040"),
("050", "160"),
("050", "310"),
("050", "795"),

("310", "040"),
("310", "050"),
("310", "795"),
("310", "160"),

("795", "040"),
("795", "050"),
("795", "160"),
("795", "310"),

]

# -- (160) in Counties (050)

name_map = {
"160": "place",
"050": "county",
"310": "cbsa",
"795": "puma",
"140": "tract",
"040": "state",
}

def gen_sql(child_level, parent_level):
child_gid_name = "geoid" if child_level != "795" else "geoid10"
parent_gid_name = "geoid" if parent_level != "795" else "geoid10"
child_name = name_map[child_level]
parent_name = name_map[parent_level]
sql = '''INSERT INTO {4} (
SELECT
'{5}00US' || {0}.{2} AS child_geoid,
'{6}00US' || {1}.{3} AS parent_geoid,
ST_Area(ST_Intersection({0}.geom,{1}.geom))/ST_Area({0}.geom)*100 as percent_covered,
ST_Area(ST_Intersection({0}.geom,{1}.geom)) as area_covered
FROM tiger2013.{0}
JOIN tiger2013.{1} ON ST_Intersects({0}.geom, {1}.geom)
WHERE ST_Area(ST_Intersection({0}.geom,{1}.geom))/ST_Area({0}.geom) > 0
);'''.format(child_name, parent_name, child_gid_name, parent_gid_name, table_name, child_level, parent_level)
return sql



# identity rows for geo crosswalk joins
def states_sql(table_name):
sql = '''INSERT INTO {0} (
select distinct parent_geoid as child_geoid, '01000US' parent_geoid,
100 as percent_covered, NULL::real as area_covered
FROM {0}
WHERE parent_geoid LIKE '040%'
);
'''
return sql.format(table_name)

def ident_sql(table_name):
sql = '''INSERT INTO {0} (
select distinct child_geoid, child_geoid as parent_geoid,
NULL::real as percent_covered, NULL::real as area_covered
FROM {0}
);
INSERT INTO {0} (child_geoid, parent_geoid, percent_covered, area_covered) VALUES ('01000US', '01000US', NULL, NULL);
'''
return sql.format(table_name)

for parent, child in containments:
print gen_sql(parent, child)

print states_sql(table_name)
print ident_sql(table_name)

table_name_no_dot = table_name.replace(".", "_")
print "CREATE INDEX {}_idx_child_geoid ON {} (child_geoid);".format(table_name_no_dot, table_name)
print "CREATE INDEX {}_idx_parent_geoid ON {} (parent_geoid);".format(table_name_no_dot, table_name)

0 comments on commit d575d03

Please sign in to comment.