From d575d031bdd92db1437a28e2a6a30360ca1f66a3 Mon Sep 17 00:00:00 2001 From: jspeis Date: Wed, 7 Sep 2016 16:12:57 -0400 Subject: [PATCH] adding scripts to build api joins table --- scripts/bidirectional_geo_crosswalk.py | 24 +++++++ scripts/crosswalk_geo_containment.py | 96 ++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 scripts/bidirectional_geo_crosswalk.py create mode 100644 scripts/crosswalk_geo_containment.py diff --git a/scripts/bidirectional_geo_crosswalk.py b/scripts/bidirectional_geo_crosswalk.py new file mode 100644 index 0000000..3345a34 --- /dev/null +++ b/scripts/bidirectional_geo_crosswalk.py @@ -0,0 +1,24 @@ +import pandas as pd + +df = pd.read_csv("crosswalk_geo_containment.csv") + +my_connections = {} + +headers = [str(x) for x in df.columns] +for row in list(df.itertuples()): + my_set = frozenset([row.parent_geoid, row.child_geoid]) + if not my_set in my_connections: + my_connections[my_set] = True + +import csv +with open('eggs.csv', 'wb') as csvfile: + spamwriter = csv.writer(csvfile, delimiter=',', + quotechar='|', quoting=csv.QUOTE_MINIMAL) + spamwriter.writerow(["geo_a", "geo_b"]) + for row in my_connections.keys(): + my_row = list(row) + if len(my_row) == 1: + my_row = 2*my_row + spamwriter.writerow(my_row) + + diff --git a/scripts/crosswalk_geo_containment.py b/scripts/crosswalk_geo_containment.py new file mode 100644 index 0000000..c26b12c --- /dev/null +++ b/scripts/crosswalk_geo_containment.py @@ -0,0 +1,96 @@ +table_name = 'attrs.crosswalk_geo_containment' +print ''' +CREATE TABLE {} ( + child_geoid varchar(40), + parent_geoid varchar(40), + percent_covered real, + area_covered real +);\n'''.format(table_name) + + +# list of child/parent +containments = [ + ("160", "040"), + ("160", "050"), + ("160", "310"), + ("160", "795"), + + ("140", "160"), + + ("050", "040"), + ("050", "160"), + ("050", "310"), + ("050", "795"), + + ("310", "040"), + ("310", "050"), + ("310", "795"), + ("310", "160"), + + ("795", "040"), + ("795", "050"), + ("795", "160"), + ("795", "310"), + +] + +# -- (160) in Counties (050) + +name_map = { + "160": "place", + "050": "county", + "310": "cbsa", + "795": "puma", + "140": "tract", + "040": "state", +} + +def gen_sql(child_level, parent_level): + child_gid_name = "geoid" if child_level != "795" else "geoid10" + parent_gid_name = "geoid" if parent_level != "795" else "geoid10" + child_name = name_map[child_level] + parent_name = name_map[parent_level] + sql = '''INSERT INTO {4} ( + SELECT + '{5}00US' || {0}.{2} AS child_geoid, + '{6}00US' || {1}.{3} AS parent_geoid, + ST_Area(ST_Intersection({0}.geom,{1}.geom))/ST_Area({0}.geom)*100 as percent_covered, + ST_Area(ST_Intersection({0}.geom,{1}.geom)) as area_covered + FROM tiger2013.{0} + JOIN tiger2013.{1} ON ST_Intersects({0}.geom, {1}.geom) + WHERE ST_Area(ST_Intersection({0}.geom,{1}.geom))/ST_Area({0}.geom) > 0 + );'''.format(child_name, parent_name, child_gid_name, parent_gid_name, table_name, child_level, parent_level) + return sql + + + +# identity rows for geo crosswalk joins +def states_sql(table_name): + sql = '''INSERT INTO {0} ( + select distinct parent_geoid as child_geoid, '01000US' parent_geoid, + 100 as percent_covered, NULL::real as area_covered + FROM {0} + WHERE parent_geoid LIKE '040%' + ); + ''' + return sql.format(table_name) + +def ident_sql(table_name): + sql = '''INSERT INTO {0} ( + select distinct child_geoid, child_geoid as parent_geoid, + NULL::real as percent_covered, NULL::real as area_covered + FROM {0} + ); + INSERT INTO {0} (child_geoid, parent_geoid, percent_covered, area_covered) VALUES ('01000US', '01000US', NULL, NULL); + ''' + return sql.format(table_name) + +for parent, child in containments: + print gen_sql(parent, child) + +print states_sql(table_name) +print ident_sql(table_name) + +table_name_no_dot = table_name.replace(".", "_") +print "CREATE INDEX {}_idx_child_geoid ON {} (child_geoid);".format(table_name_no_dot, table_name) +print "CREATE INDEX {}_idx_parent_geoid ON {} (parent_geoid);".format(table_name_no_dot, table_name)