From efe67761bfc6209d69520524553dd5d2405c3512 Mon Sep 17 00:00:00 2001
From: rfl-urbaniak
Date: Thu, 18 Jan 2024 16:55:43 +0100
Subject: [PATCH 001/142] updated readme
---
README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 44 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index 2a5b125c..f6644d15 100644
--- a/README.md
+++ b/README.md
@@ -3,14 +3,10 @@
-## Evaluating Policy Transfer via Similarity Analysis and Causal Inference
-```
-python -m venv venv
-source venv/bin/activate
-pip install -r requirements.txt
-pip install -e .
-cd tests && python -m pytest
-```
+# Evaluating Policy Transfer via Similarity Analysis and Causal Inference
+
+
+## Getting started
Welcome to the repository for [polis](http://polis.basis.ai/), developed by the [Basis Research Institute](https://www.basis.ai/) for [The Opportunity Project (TOP)](https://opportunity.census.gov/) 2023 in collaboration with the U.S. Department of Commerce. The primary goal of this project is to enhance access to data for local policymakers, facilitating more informed decision-making.
@@ -18,6 +14,43 @@ Welcome to the repository for [polis](http://polis.basis.ai/), developed by the
This is the backend repository for more advanced users. For a more pleasant frontend experience and more information, please use the [app](http://polis.basis.ai/).
+Installation
+------------
+
+**Basic Setup:**
+
+```sh
+
+ git clone git@github.com:BasisResearch/cities.git
+ cd cities
+ git checkout main
+ pip install .
+```
+
+The above will install the minimal version that's ported to [polis.basis.ai](http://polis.basis.ai)
+
+**Dev Setup:**
+
+To install dev dependencies, needed to run models, train models and run all the tests, run the following command:
+
+```sh
+pip install -e .[dev]
+```
+
+Details of which packages are available in which see `setup.py`.
+
+
+** Contributing: **
+
+Before submitting a pull request, please autoformat code and ensure that unit tests pass locally
+
+```sh
+make lint # linting
+make format # runs black and isort, including on notebooks in the docs/ folder
+make tests # linting, unit and notebook tests
+```
+
+
### The repository is structured as follows:
```
@@ -36,9 +69,12 @@ This is the backend repository for more advanced users. For a more pleasant fron
└── tests
```
+**WARNING: during the beta testing, the most recent version lives on the `staging-county-data` branch, and so do the most recent versions of the notebooks. Please switch to the branch before inspecting the notebooks.
If you're interested in downloading the data or exploring advanced features beyond the frontend, check out the `guides` folder in the `docs` directory. There, you'll find:
- `data_sources.ipynb` for information on data sources,
+- `similarity-conceptual.ipynb` for a conceptual account of how similarity comparison works.
+- `counterfactual-explained.ipynb` contains a rough explanation of how our causal model works.
- `similarity_demo.ipynb` demonstrating the use of the `DataGrabber` class for easy data acces, and of our `FipsQuery` class, which is the key tool in the similarity-focused part of the project,
- `causal_insights_demo.ipynb` for an overview of how the `CausalInsight` class can be used to explore the influence of a range of intervention variables thanks to causal inference tools we employed. [WIP]
From fc50736525cbeba62c85d9766299141465046871 Mon Sep 17 00:00:00 2001
From: Emily
Date: Thu, 18 Jan 2024 11:30:01 -0500
Subject: [PATCH 002/142] adding mailing list blurb from polis.basis.ai
---
README.md | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index f6644d15..4dfa19a9 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
## Getting started
-Welcome to the repository for [polis](http://polis.basis.ai/), developed by the [Basis Research Institute](https://www.basis.ai/) for [The Opportunity Project (TOP)](https://opportunity.census.gov/) 2023 in collaboration with the U.S. Department of Commerce. The primary goal of this project is to enhance access to data for local policymakers, facilitating more informed decision-making.
+Welcome to the repository for [polis](http://polis.basis.ai/), developed by [Basis Research Institute](https://www.basis.ai/) for [The Opportunity Project (TOP)](https://opportunity.census.gov/) 2023 in collaboration with the U.S. Department of Commerce. The primary goal of this project is to enhance access to data for local policymakers, facilitating more informed decision-making.
This is the backend repository for more advanced users. For a more pleasant frontend experience and more information, please use the [app](http://polis.basis.ai/).
@@ -69,7 +69,7 @@ make tests # linting, unit and notebook tests
└── tests
```
-**WARNING: during the beta testing, the most recent version lives on the `staging-county-data` branch, and so do the most recent versions of the notebooks. Please switch to the branch before inspecting the notebooks.
+**WARNING: during the beta testing, the most recent version lives on the `staging-county-data` git branch, and so do the most recent versions of the notebooks. Please switch to this branch before inspecting the notebooks.
If you're interested in downloading the data or exploring advanced features beyond the frontend, check out the `guides` folder in the `docs` directory. There, you'll find:
- `data_sources.ipynb` for information on data sources,
@@ -78,5 +78,15 @@ If you're interested in downloading the data or exploring advanced features beyo
- `similarity_demo.ipynb` demonstrating the use of the `DataGrabber` class for easy data acces, and of our `FipsQuery` class, which is the key tool in the similarity-focused part of the project,
- `causal_insights_demo.ipynb` for an overview of how the `CausalInsight` class can be used to explore the influence of a range of intervention variables thanks to causal inference tools we employed. [WIP]
-Feel free to dive into these resources to gain deeper insights into the capabilities of the Polis project, or to reach out if you have any comments or suggestions.
+## Interested? We'd love to hear from you.
+
+[polis](http://polis.basis.ai/) is a research tool under very active development, and we are eager to hear feedback from users in the policymaking and public administration spaces to accelerate its benefit.
+
+If you have feature requests, recommendations for new data sources, tips for how to resolve missing data issues, find bugs in the tool (they certainly exist!), or anything else, please do not hesitate to contact us at polis@basis.ai.
+
+To stay up to date on our latest features, you can subscribe to our [mailing list](https://dashboard.mailerlite.com/forms/102625/110535550672308121/share). In the near-term, we will send out a notice about our upcoming batch of improvements (including performance speedups, support for mobile, and more comprehensive tutorials), as well as an interest form for users who would like to work closely with us on case studies to make the tool most useful in their work.
+
+Lastly, we emphasize that this website is still in beta testing, and hence all predictions should be taken with a grain of salt.
+
+Acknowledgments: polis was built by Basis, a non-profit AI research organization dedicated to creating automated reasoning technology that helps solve society's most intractable problems. To learn more about us, visit https://basis.ai.
From 26865db21309e4e8e7adabf33de3223c919b6c65 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 2 Aug 2024 09:57:54 -0400
Subject: [PATCH 003/142] add code for loading raw & processed parcel data into
db
---
etl/load_parcels.py | 42 +++++++++++++++++++++++++++++++++++++++++
etl/load_raw_parcels.py | 18 ++++++++++++++++++
etl/schema.sql | 22 +++++++++++++++++++++
3 files changed, 82 insertions(+)
create mode 100644 etl/load_parcels.py
create mode 100644 etl/load_raw_parcels.py
create mode 100644 etl/schema.sql
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
new file mode 100644
index 00000000..9ce370cd
--- /dev/null
+++ b/etl/load_parcels.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+import psycopg2
+
+PARCEL_YEARS = range(2002, 2018)
+COUNTY_ID = "053"
+
+conn = psycopg2.connect(database="cities")
+cur = conn.cursor()
+
+
+with open("etl/schema.sql", "r") as f:
+ cur.execute(f.read())
+conn.commit()
+
+# select distinct geometry from all parcel tables
+distinct_geom = " union ".join(
+ f"select geom from parcel_raw_{year} where city = 'MINNEAPOLIS'"
+ for year in PARCEL_YEARS
+)
+parcel_geom_load = f"insert into parcel_geom (parcel_geom_data) {distinct_geom};"
+print("Executing:", parcel_geom_load)
+cur.execute(parcel_geom_load)
+conn.commit()
+
+# insert parcel data into parcel table
+parcel_data = " union all ".join(
+ f"""
+ select replace(pin, '{COUNTY_ID}-', ''), {year}, emv_land, emv_bldg, emv_total, nullif(year_built, 0), sale_date, sale_value, parcel_geom_id
+ from parcel_raw_{year}, parcel_geom
+ where parcel_raw_{year}.geom = parcel_geom.parcel_geom_data
+ and city = 'MINNEAPOLIS'
+ """
+ for year in PARCEL_YEARS
+)
+parcel_load = f"""
+insert into parcel (parcel_id, parcel_year, parcel_emv_land, parcel_emv_building, parcel_emv_total, parcel_year_built, parcel_sale_date, parcel_sale_value, parcel_geom_id)
+ {parcel_data}
+ """
+print("Executing:", parcel_load)
+cur.execute(parcel_load)
+conn.commit()
diff --git a/etl/load_raw_parcels.py b/etl/load_raw_parcels.py
new file mode 100644
index 00000000..efbba10c
--- /dev/null
+++ b/etl/load_raw_parcels.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+
+import glob
+import os
+
+SRID = 26915 # UTM Zone 15N
+
+for parcel_shape_dir in glob.glob(
+ "zoning/data/raw/property_values/shp_plan_regional_parcels_*/"
+):
+ year = int(parcel_shape_dir.split("/")[-2].split("_")[-1])
+ print(f"Loading parcels for year {year} from {parcel_shape_dir}")
+
+ os.system(
+ f"""
+ shp2pgsql -s {SRID} -I -d {parcel_shape_dir}Parcels{year}Hennepin.shp parcel_raw_{year} | pv -l | psql --quiet cities
+ """,
+ )
diff --git a/etl/schema.sql b/etl/schema.sql
new file mode 100644
index 00000000..48199913
--- /dev/null
+++ b/etl/schema.sql
@@ -0,0 +1,22 @@
+create extension if not exists postgis;
+
+drop table if exists parcel_geom cascade;
+create table parcel_geom (
+ parcel_geom_id serial primary key
+ , parcel_geom_data geometry
+);
+create index parcel_geom_data_idx on parcel_geom using gist(parcel_geom_data);
+
+drop table if exists parcel;
+create table parcel (
+ parcel_pk serial primary key
+ , parcel_id text
+ , parcel_year int not null
+ , parcel_emv_land numeric -- Estimated Market Value, land
+ , parcel_emv_building numeric -- Estimated Market Value, building
+ , parcel_emv_total numeric -- Estimated Market Value, total (may be more than sum of land and building)
+ , parcel_year_built int
+ , parcel_sale_date date
+ , parcel_sale_value numeric
+ , parcel_geom_id int references parcel_geom(parcel_geom_id)
+);
From 94923022e1e7846988ee9b1cfebe39ab5ba65d68 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 2 Aug 2024 13:20:54 -0400
Subject: [PATCH 004/142] add code to load raw zip code data
---
etl/load_raw_zip_codes.py | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
create mode 100644 etl/load_raw_zip_codes.py
diff --git a/etl/load_raw_zip_codes.py b/etl/load_raw_zip_codes.py
new file mode 100644
index 00000000..3317f5d9
--- /dev/null
+++ b/etl/load_raw_zip_codes.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import glob
+import os
+
+SRID = 26915 # UTM Zone 15N
+
+
+os.system(
+ f"""
+ shp2pgsql -s {SRID} -I -d zoning/data/raw/base/shp_society_census2000tiger_zcta/Census2000TigerZipCodeTabAreas.shp zip_raw_2000 | pv -l | psql --quiet cities
+ """,
+)
+
+os.system(
+ f"""
+ shp2pgsql -s {SRID} -I -d zoning/data/raw/base/shp_bdry_zip_code_tabulation_areas/zip_code_tabulation_areas.shp zip_raw_2020 | pv -l | psql --quiet cities
+ """,
+)
From e4d7eb21e67a0956d3141e98d49efc4cc403cab1 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 09:29:19 -0400
Subject: [PATCH 005/142] merge code for loading raw shapes and raw zip codes
---
etl/db.py | 2 +
etl/load_raw_parcels.py | 18 --------
etl/load_raw_shapes.py | 88 +++++++++++++++++++++++++++++++++++++++
etl/load_raw_zip_codes.py | 19 ---------
4 files changed, 90 insertions(+), 37 deletions(-)
create mode 100644 etl/db.py
delete mode 100644 etl/load_raw_parcels.py
create mode 100644 etl/load_raw_shapes.py
delete mode 100644 etl/load_raw_zip_codes.py
diff --git a/etl/db.py b/etl/db.py
new file mode 100644
index 00000000..acaa0053
--- /dev/null
+++ b/etl/db.py
@@ -0,0 +1,2 @@
+HOST = "34.123.100.76"
+USER = "postgres"
diff --git a/etl/load_raw_parcels.py b/etl/load_raw_parcels.py
deleted file mode 100644
index efbba10c..00000000
--- a/etl/load_raw_parcels.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python
-
-import glob
-import os
-
-SRID = 26915 # UTM Zone 15N
-
-for parcel_shape_dir in glob.glob(
- "zoning/data/raw/property_values/shp_plan_regional_parcels_*/"
-):
- year = int(parcel_shape_dir.split("/")[-2].split("_")[-1])
- print(f"Loading parcels for year {year} from {parcel_shape_dir}")
-
- os.system(
- f"""
- shp2pgsql -s {SRID} -I -d {parcel_shape_dir}Parcels{year}Hennepin.shp parcel_raw_{year} | pv -l | psql --quiet cities
- """,
- )
diff --git a/etl/load_raw_shapes.py b/etl/load_raw_shapes.py
new file mode 100644
index 00000000..3f6a09b1
--- /dev/null
+++ b/etl/load_raw_shapes.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+import glob
+import subprocess
+import logging
+import os
+
+from db import HOST, USER
+
+log = logging.getLogger(__name__)
+
+BASE_DIR = "zoning/data/raw"
+OGR2OGR_OPTS = [
+ "--config",
+ "PG_USE_COPY", # use postgres specific copy
+ "-progress",
+ "-lco",
+ "PRECISION=NO", # disable use of numeric types (required when shapefiles mis-specify numeric precision)
+ "-overwrite", # overwrite existing tables
+ "-lco",
+ "GEOMETRY_NAME=geom", # name of geometry column
+ "-nlt",
+ "PROMOTE_TO_MULTI", # promote all POLYGONs to MULTIPOLYGONs
+]
+DB_OPTS = [f"Pg:dbname=cities host={HOST} user={USER} port=5432"]
+
+# (shapefile, table_name) pairs. shapefiles are relative to BASE_DIR
+REL_SHAPES = [
+ (
+ "base/shp_society_census2000tiger_zcta/Census2000TigerZipCodeTabAreas.shp",
+ "zip_raw_2000",
+ ),
+ (
+ "base/shp_bdry_zip_code_tabulation_areas/zip_code_tabulation_areas.shp",
+ "zip_raw_2020",
+ ),
+ (
+ "base/hennepin_county_census_tracts_2018/cb_2018_27_tract_500k.shp",
+ "census_tract_raw_2018",
+ ),
+ (
+ "base/hennepin_county_census_block_groups_2018/cb_2018_27_bg_500k.shp",
+ "census_block_group_raw_2018",
+ ),
+ (
+ "base/hennepin_county_census_tracts_2023/cb_2023_27_tract_500k.shp",
+ "census_tract_raw_2023",
+ ),
+ (
+ "base/hennepin_county_census_block_groups_2023/cb_2023_27_bg_500k.shp",
+ "census_block_group_raw_2023",
+ ),
+ (
+ "commercial_permits/shp_struc_non_res_construction/NonresidentialConstruction.shp",
+ "commercial_permits_raw",
+ ),
+ (
+ "residential_permits/shp_econ_residential_building_permts/ResidentialPermits.shp",
+ "residential_permits_raw",
+ ),
+]
+
+
+def main():
+ # convert relative paths to absolute paths
+ abs_shapes = [(os.path.join(BASE_DIR, shape), table) for shape, table in REL_SHAPES]
+
+ for parcel_shape_dir in glob.glob(
+ os.path.join(BASE_DIR, "property_values/shp_plan_regional_parcels_*/")
+ ):
+ year = int(parcel_shape_dir.split("/")[-2].split("_")[-1])
+ shape = os.path.join(parcel_shape_dir, f"Parcels{year}Hennepin.shp")
+ table = f"parcel_raw_{year}"
+ abs_shapes.append((shape, table))
+
+ log.info("Loading raw shape files: %s", abs_shapes)
+ for shape, table in abs_shapes:
+ if not os.path.exists(shape):
+ log.warn("Skipping %s because it does not exist", shape)
+ continue
+
+ subprocess.check_call(
+ ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", table] + DB_OPTS + [shape]
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/etl/load_raw_zip_codes.py b/etl/load_raw_zip_codes.py
deleted file mode 100644
index 3317f5d9..00000000
--- a/etl/load_raw_zip_codes.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-
-import glob
-import os
-
-SRID = 26915 # UTM Zone 15N
-
-
-os.system(
- f"""
- shp2pgsql -s {SRID} -I -d zoning/data/raw/base/shp_society_census2000tiger_zcta/Census2000TigerZipCodeTabAreas.shp zip_raw_2000 | pv -l | psql --quiet cities
- """,
-)
-
-os.system(
- f"""
- shp2pgsql -s {SRID} -I -d zoning/data/raw/base/shp_bdry_zip_code_tabulation_areas/zip_code_tabulation_areas.shp zip_raw_2020 | pv -l | psql --quiet cities
- """,
-)
From d1cae373e23c7d09463c6a313d294286908be284 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 11:00:24 -0400
Subject: [PATCH 006/142] use correct srid when creating joined parcel table
---
etl/load_parcels.py | 16 +++++++++-------
etl/schema.sql | 4 +++-
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
index 9ce370cd..40229899 100644
--- a/etl/load_parcels.py
+++ b/etl/load_parcels.py
@@ -2,20 +2,21 @@
import psycopg2
-PARCEL_YEARS = range(2002, 2018)
+from db import HOST, USER
+
+PARCEL_YEARS = range(2002, 2024)
COUNTY_ID = "053"
-conn = psycopg2.connect(database="cities")
+conn = psycopg2.connect(host=HOST, user=USER, database="cities")
cur = conn.cursor()
-
with open("etl/schema.sql", "r") as f:
cur.execute(f.read())
conn.commit()
# select distinct geometry from all parcel tables
distinct_geom = " union ".join(
- f"select geom from parcel_raw_{year} where city = 'MINNEAPOLIS'"
+ f"select geom from parcel_raw_{year} where upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'"
for year in PARCEL_YEARS
)
parcel_geom_load = f"insert into parcel_geom (parcel_geom_data) {distinct_geom};"
@@ -26,13 +27,14 @@
# insert parcel data into parcel table
parcel_data = " union all ".join(
f"""
- select replace(pin, '{COUNTY_ID}-', ''), {year}, emv_land, emv_bldg, emv_total, nullif(year_built, 0), sale_date, sale_value, parcel_geom_id
+ select replace(pin, '{COUNTY_ID}-', ''), {year}, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom_id
from parcel_raw_{year}, parcel_geom
where parcel_raw_{year}.geom = parcel_geom.parcel_geom_data
- and city = 'MINNEAPOLIS'
+ and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
"""
- for year in PARCEL_YEARS
+ for year in range(2002, 2018)
)
+
parcel_load = f"""
insert into parcel (parcel_id, parcel_year, parcel_emv_land, parcel_emv_building, parcel_emv_total, parcel_year_built, parcel_sale_date, parcel_sale_value, parcel_geom_id)
{parcel_data}
diff --git a/etl/schema.sql b/etl/schema.sql
index 48199913..8f27cea4 100644
--- a/etl/schema.sql
+++ b/etl/schema.sql
@@ -3,7 +3,7 @@ create extension if not exists postgis;
drop table if exists parcel_geom cascade;
create table parcel_geom (
parcel_geom_id serial primary key
- , parcel_geom_data geometry
+ , parcel_geom_data geometry(MultiPolygon, 26915) not null
);
create index parcel_geom_data_idx on parcel_geom using gist(parcel_geom_data);
@@ -12,11 +12,13 @@ create table parcel (
parcel_pk serial primary key
, parcel_id text
, parcel_year int not null
+
, parcel_emv_land numeric -- Estimated Market Value, land
, parcel_emv_building numeric -- Estimated Market Value, building
, parcel_emv_total numeric -- Estimated Market Value, total (may be more than sum of land and building)
, parcel_year_built int
, parcel_sale_date date
, parcel_sale_value numeric
+
, parcel_geom_id int references parcel_geom(parcel_geom_id)
);
From 7cae0db02290f25b58a3f899e06ad36a0820254a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 11:06:01 -0400
Subject: [PATCH 007/142] make comments visible in db
---
etl/schema.sql | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/etl/schema.sql b/etl/schema.sql
index 8f27cea4..3543c813 100644
--- a/etl/schema.sql
+++ b/etl/schema.sql
@@ -13,12 +13,16 @@ create table parcel (
, parcel_id text
, parcel_year int not null
- , parcel_emv_land numeric -- Estimated Market Value, land
- , parcel_emv_building numeric -- Estimated Market Value, building
- , parcel_emv_total numeric -- Estimated Market Value, total (may be more than sum of land and building)
+ , parcel_emv_land numeric
+ , parcel_emv_building numeric
+ , parcel_emv_total numeric
, parcel_year_built int
, parcel_sale_date date
, parcel_sale_value numeric
, parcel_geom_id int references parcel_geom(parcel_geom_id)
);
+
+comment on column parcel.parcel_emv_land is 'Estimated Market Value, land';
+comment on column parcel.parcel_emv_building is 'Estimated Market Value, buildings';
+comment on column parcel.parcel_emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
From 936ce9f45d0701f16c31eb541249963a90256aec Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 11:16:09 -0400
Subject: [PATCH 008/142] enable logging
---
etl/load_raw_shapes.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/etl/load_raw_shapes.py b/etl/load_raw_shapes.py
index 3f6a09b1..41119a47 100644
--- a/etl/load_raw_shapes.py
+++ b/etl/load_raw_shapes.py
@@ -73,9 +73,10 @@ def main():
table = f"parcel_raw_{year}"
abs_shapes.append((shape, table))
- log.info("Loading raw shape files: %s", abs_shapes)
for shape, table in abs_shapes:
- if not os.path.exists(shape):
+ if os.path.exists(shape):
+ log.info("Loading %s into %s", shape, table)
+ else:
log.warn("Skipping %s because it does not exist", shape)
continue
@@ -85,4 +86,5 @@ def main():
if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
main()
From ad5dff511b0a923da3266c110646dd7db42ede93 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 12:10:39 -0400
Subject: [PATCH 009/142] remove table prefixes from field names
---
etl/load_parcels.py | 8 ++++----
etl/schema.sql | 33 +++++++++++++++++----------------
2 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
index 40229899..6128f9ba 100644
--- a/etl/load_parcels.py
+++ b/etl/load_parcels.py
@@ -19,7 +19,7 @@
f"select geom from parcel_raw_{year} where upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'"
for year in PARCEL_YEARS
)
-parcel_geom_load = f"insert into parcel_geom (parcel_geom_data) {distinct_geom};"
+parcel_geom_load = f"insert into parcel_geom (geom) {distinct_geom};"
print("Executing:", parcel_geom_load)
cur.execute(parcel_geom_load)
conn.commit()
@@ -27,16 +27,16 @@
# insert parcel data into parcel table
parcel_data = " union all ".join(
f"""
- select replace(pin, '{COUNTY_ID}-', ''), {year}, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom_id
+ select replace(pin, '{COUNTY_ID}-', ''), {year}, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
from parcel_raw_{year}, parcel_geom
- where parcel_raw_{year}.geom = parcel_geom.parcel_geom_data
+ where parcel_raw_{year}.geom = parcel_geom.geom
and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
"""
for year in range(2002, 2018)
)
parcel_load = f"""
-insert into parcel (parcel_id, parcel_year, parcel_emv_land, parcel_emv_building, parcel_emv_total, parcel_year_built, parcel_sale_date, parcel_sale_value, parcel_geom_id)
+insert into parcel (pid, year, emv_land, emv_building, emv_total, year_built, sale_date, sale_value, geom_id)
{parcel_data}
"""
print("Executing:", parcel_load)
diff --git a/etl/schema.sql b/etl/schema.sql
index 3543c813..4083f332 100644
--- a/etl/schema.sql
+++ b/etl/schema.sql
@@ -2,27 +2,28 @@ create extension if not exists postgis;
drop table if exists parcel_geom cascade;
create table parcel_geom (
- parcel_geom_id serial primary key
- , parcel_geom_data geometry(MultiPolygon, 26915) not null
+ id serial primary key
+ , geom geometry(MultiPolygon, 26915) not null
);
-create index parcel_geom_data_idx on parcel_geom using gist(parcel_geom_data);
+create index parcel_geom_idx on parcel_geom using gist(geom);
drop table if exists parcel;
create table parcel (
- parcel_pk serial primary key
- , parcel_id text
- , parcel_year int not null
+ id serial primary key
+ , pid text not null
+ , year int not null
- , parcel_emv_land numeric
- , parcel_emv_building numeric
- , parcel_emv_total numeric
- , parcel_year_built int
- , parcel_sale_date date
- , parcel_sale_value numeric
+ , emv_land numeric
+ , emv_building numeric
+ , emv_total numeric
+ , year_built int
+ , sale_date date
+ , sale_value numeric
- , parcel_geom_id int references parcel_geom(parcel_geom_id)
+ , geom_id int references parcel_geom(id)
);
-comment on column parcel.parcel_emv_land is 'Estimated Market Value, land';
-comment on column parcel.parcel_emv_building is 'Estimated Market Value, buildings';
-comment on column parcel.parcel_emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
+comment on column parcel.pid is 'Municipal parcel ID';
+comment on column parcel.emv_land is 'Estimated Market Value, land';
+comment on column parcel.emv_building is 'Estimated Market Value, buildings';
+comment on column parcel.emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
From e90d83362977b2e142a76c5649ea6d0c96fcce04 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 12:10:51 -0400
Subject: [PATCH 010/142] load zip data into shared table
---
etl/load_zip.py | 7 +++++++
etl/zip_schema.sql | 8 ++++++++
2 files changed, 15 insertions(+)
create mode 100644 etl/load_zip.py
create mode 100644 etl/zip_schema.sql
diff --git a/etl/load_zip.py b/etl/load_zip.py
new file mode 100644
index 00000000..14e819d3
--- /dev/null
+++ b/etl/load_zip.py
@@ -0,0 +1,7 @@
+zip_load = """
+select zcta5ce20, 2020, geom from zip_raw_2020
+union select zcta, 2000, geom from zip_raw_2000
+"""
+print("Executing:", zip_load)
+cur.execute(zip_load)
+conn.commit()
diff --git a/etl/zip_schema.sql b/etl/zip_schema.sql
new file mode 100644
index 00000000..6521a281
--- /dev/null
+++ b/etl/zip_schema.sql
@@ -0,0 +1,8 @@
+drop table if exists zip_code;
+create table zip_code (
+ id serial primary key
+ , zip_code text not null
+ , year int not null
+ , geom geometry(MultiPolygon, 4269) not null
+);
+create index zip_code_geom_idx on zip_code using gist(geom);
From c2c3aa666d2c0b2c19b9522898865b57860f08ba Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 12:14:37 -0400
Subject: [PATCH 011/142] actually load zip data
---
etl/load_zip.py | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/etl/load_zip.py b/etl/load_zip.py
index 14e819d3..8cf966db 100644
--- a/etl/load_zip.py
+++ b/etl/load_zip.py
@@ -1,6 +1,21 @@
+import psycopg2
+
+from db import HOST, USER
+
+PARCEL_YEARS = range(2002, 2024)
+COUNTY_ID = "053"
+
+conn = psycopg2.connect(host=HOST, user=USER, database="cities")
+cur = conn.cursor()
+
+with open("etl/zip_schema.sql", "r") as f:
+ cur.execute(f.read())
+conn.commit()
+
zip_load = """
+insert into zip_code(zip_code, year, geom)
select zcta5ce20, 2020, geom from zip_raw_2020
-union select zcta, 2000, geom from zip_raw_2000
+union select zcta, 2000, ST_Transform(geom, 4269) from zip_raw_2000
"""
print("Executing:", zip_load)
cur.execute(zip_load)
From 2d8e43cb3438bf4ebe2797fe52f013b93cfbf94a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 14:09:10 -0400
Subject: [PATCH 012/142] switch from dates to validity ranges
---
etl/load_parcels.py | 4 ++--
etl/load_zip.py | 6 +++---
etl/schema.sql | 3 ++-
etl/zip_schema.sql | 2 +-
4 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
index 6128f9ba..0c7c292e 100644
--- a/etl/load_parcels.py
+++ b/etl/load_parcels.py
@@ -27,7 +27,7 @@
# insert parcel data into parcel table
parcel_data = " union all ".join(
f"""
- select replace(pin, '{COUNTY_ID}-', ''), {year}, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
+ select replace(pin, '{COUNTY_ID}-', ''), '[{year}-01-01,{year+1}-01-01)'::daterange, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
from parcel_raw_{year}, parcel_geom
where parcel_raw_{year}.geom = parcel_geom.geom
and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
@@ -36,7 +36,7 @@
)
parcel_load = f"""
-insert into parcel (pid, year, emv_land, emv_building, emv_total, year_built, sale_date, sale_value, geom_id)
+insert into parcel (pid, valid, emv_land, emv_building, emv_total, year_built, sale_date, sale_value, geom_id)
{parcel_data}
"""
print("Executing:", parcel_load)
diff --git a/etl/load_zip.py b/etl/load_zip.py
index 8cf966db..02a27cbc 100644
--- a/etl/load_zip.py
+++ b/etl/load_zip.py
@@ -13,9 +13,9 @@
conn.commit()
zip_load = """
-insert into zip_code(zip_code, year, geom)
-select zcta5ce20, 2020, geom from zip_raw_2020
-union select zcta, 2000, ST_Transform(geom, 4269) from zip_raw_2000
+insert into zip_code(zip_code, valid, geom)
+select zcta5ce20, '[2020-01-01,)'::daterange, geom from zip_raw_2020
+union select zcta, '[2000-01-01,2020-01-01)'::daterange, ST_Transform(geom, 4269) from zip_raw_2000
"""
print("Executing:", zip_load)
cur.execute(zip_load)
diff --git a/etl/schema.sql b/etl/schema.sql
index 4083f332..11517b64 100644
--- a/etl/schema.sql
+++ b/etl/schema.sql
@@ -11,7 +11,7 @@ drop table if exists parcel;
create table parcel (
id serial primary key
, pid text not null
- , year int not null
+ , valid daterange not null
, emv_land numeric
, emv_building numeric
@@ -23,6 +23,7 @@ create table parcel (
, geom_id int references parcel_geom(id)
);
+comment on column parcel.valid is 'Dates for which this parcel is valid';
comment on column parcel.pid is 'Municipal parcel ID';
comment on column parcel.emv_land is 'Estimated Market Value, land';
comment on column parcel.emv_building is 'Estimated Market Value, buildings';
diff --git a/etl/zip_schema.sql b/etl/zip_schema.sql
index 6521a281..4a15a2fa 100644
--- a/etl/zip_schema.sql
+++ b/etl/zip_schema.sql
@@ -2,7 +2,7 @@ drop table if exists zip_code;
create table zip_code (
id serial primary key
, zip_code text not null
- , year int not null
+ , valid daterange not null
, geom geometry(MultiPolygon, 4269) not null
);
create index zip_code_geom_idx on zip_code using gist(geom);
From ccb3b62e75d048ea5713b4df466adeb763edd9c1 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 5 Aug 2024 16:19:51 -0400
Subject: [PATCH 013/142] create parcel to zip mapping
---
etl/parcel_to_zip.sql | 54 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
create mode 100644 etl/parcel_to_zip.sql
diff --git a/etl/parcel_to_zip.sql b/etl/parcel_to_zip.sql
new file mode 100644
index 00000000..b0efa96b
--- /dev/null
+++ b/etl/parcel_to_zip.sql
@@ -0,0 +1,54 @@
+drop table if exists parcel_zip;
+create table parcel_zip (
+ parcel_id int references parcel(id)
+ , zip_code_id int references zip_code(id)
+ , valid daterange not null
+);
+
+with
+parcel_with_geom as (
+ select parcel.id, geom_id, valid, ST_Transform(geom, 4269) as geom
+ from parcel
+ join parcel_geom on geom_id = parcel_geom.id
+),
+parcel_in_zip as ( -- easy case: one parcel in one zip code
+ select parcel.id as parcel_id,
+ zip_code.id as zip_code_id,
+ parcel.valid * zip_code.valid as valid
+ from parcel_with_geom as parcel
+ join zip_code on ST_Within(parcel.geom, zip_code.geom) and parcel.valid && zip_code.valid
+),
+parcel_not_within_zip as ( -- parcels that are not fully within any zip code
+ select *
+ from parcel_with_geom
+ where not exists (select parcel_id from parcel_in_zip where parcel_id = id)
+),
+parcel_largest_overlap as ( -- parcels that overlap multiple zip codes map to the one with the largest overlap
+ select distinct on (parcel.id)
+ parcel.id as parcel_id,
+ zip_code.id as zip_code_id,
+ parcel.valid * zip_code.valid as valid
+ from parcel_not_within_zip as parcel
+ join zip_code on ST_Intersects(parcel.geom, zip_code.geom) and parcel.valid && zip_code.valid
+ order by parcel_id, ST_Area(ST_Intersection(parcel.geom, zip_code.geom)) desc
+),
+parcel_no_overlap as ( -- parcels that do not overlap any zip code
+ select *
+ from parcel_not_within_zip
+ where not exists (select parcel_id from parcel_largest_overlap where parcel_id = id)
+),
+parcel_closest as ( -- parcels that overlap no zip codes map to the closest one
+ select distinct on (parcel.id)
+ parcel.id as parcel_id,
+ zip_code.id as zip_code_id,
+ parcel.valid * zip_code.valid as valid
+ from parcel_no_overlap as parcel
+ join zip_code on parcel.valid && zip_code.valid
+ order by parcel_id, ST_Distance(parcel.geom, zip_code.geom)
+)
+insert into parcel_zip
+select * from parcel_in_zip
+union all
+select * from parcel_largest_overlap
+union all
+select * from parcel_closest;
From 57e7609fda1dc58983e4af9cf6f37ab8db0ad7fb Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 6 Aug 2024 10:15:47 -0400
Subject: [PATCH 014/142] track why a parcel was assigned to a zip code
---
etl/parcel_to_zip.sql | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/etl/parcel_to_zip.sql b/etl/parcel_to_zip.sql
index b0efa96b..a1edac26 100644
--- a/etl/parcel_to_zip.sql
+++ b/etl/parcel_to_zip.sql
@@ -1,8 +1,12 @@
+drop type if exists parcel_zip_type;
+create type parcel_zip_type as enum ('within', 'most_overlap', 'closest');
+
drop table if exists parcel_zip;
create table parcel_zip (
parcel_id int references parcel(id)
, zip_code_id int references zip_code(id)
, valid daterange not null
+ , type parcel_zip_type not null
);
with
@@ -47,8 +51,8 @@ parcel_closest as ( -- parcels that overlap no zip codes map to the closest one
order by parcel_id, ST_Distance(parcel.geom, zip_code.geom)
)
insert into parcel_zip
-select * from parcel_in_zip
+select *, 'within'::parcel_zip_type from parcel_in_zip
union all
-select * from parcel_largest_overlap
+select *, 'most_overlap'::parcel_zip_type from parcel_largest_overlap
union all
-select * from parcel_closest;
+select *, 'closest'::parcel_zip_type from parcel_closest;
From cf4b4a0bfb98f9fbd5f747198af1dc724e5edca3 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 6 Aug 2024 11:34:19 -0400
Subject: [PATCH 015/142] reformat sql
---
.pg_format | 2 +
etl/parcel_to_zip.sql | 162 +++++++++++++++++++++++++++++-------------
etl/schema.sql | 36 ++++++----
etl/zip_schema.sql | 13 ++--
scripts/clean.sh | 4 +-
5 files changed, 145 insertions(+), 72 deletions(-)
create mode 100644 .pg_format
diff --git a/.pg_format b/.pg_format
new file mode 100644
index 00000000..2a3c25bb
--- /dev/null
+++ b/.pg_format
@@ -0,0 +1,2 @@
+keyword-case=1
+comma=start
\ No newline at end of file
diff --git a/etl/parcel_to_zip.sql b/etl/parcel_to_zip.sql
index a1edac26..669e48a1 100644
--- a/etl/parcel_to_zip.sql
+++ b/etl/parcel_to_zip.sql
@@ -1,58 +1,118 @@
drop type if exists parcel_zip_type;
-create type parcel_zip_type as enum ('within', 'most_overlap', 'closest');
+
+create type parcel_zip_type as enum (
+ 'within'
+ , 'most_overlap'
+ , 'closest'
+);
drop table if exists parcel_zip;
+
create table parcel_zip (
- parcel_id int references parcel(id)
- , zip_code_id int references zip_code(id)
- , valid daterange not null
- , type parcel_zip_type not null
+ parcel_id int references parcel (id)
+ , zip_code_id int references zip_code (id)
+ , valid daterange not null
+ , type parcel_zip_type not null
);
-with
-parcel_with_geom as (
- select parcel.id, geom_id, valid, ST_Transform(geom, 4269) as geom
- from parcel
- join parcel_geom on geom_id = parcel_geom.id
-),
-parcel_in_zip as ( -- easy case: one parcel in one zip code
- select parcel.id as parcel_id,
- zip_code.id as zip_code_id,
- parcel.valid * zip_code.valid as valid
- from parcel_with_geom as parcel
- join zip_code on ST_Within(parcel.geom, zip_code.geom) and parcel.valid && zip_code.valid
-),
-parcel_not_within_zip as ( -- parcels that are not fully within any zip code
- select *
- from parcel_with_geom
- where not exists (select parcel_id from parcel_in_zip where parcel_id = id)
-),
-parcel_largest_overlap as ( -- parcels that overlap multiple zip codes map to the one with the largest overlap
- select distinct on (parcel.id)
- parcel.id as parcel_id,
- zip_code.id as zip_code_id,
- parcel.valid * zip_code.valid as valid
- from parcel_not_within_zip as parcel
- join zip_code on ST_Intersects(parcel.geom, zip_code.geom) and parcel.valid && zip_code.valid
- order by parcel_id, ST_Area(ST_Intersection(parcel.geom, zip_code.geom)) desc
-),
-parcel_no_overlap as ( -- parcels that do not overlap any zip code
- select *
- from parcel_not_within_zip
- where not exists (select parcel_id from parcel_largest_overlap where parcel_id = id)
-),
-parcel_closest as ( -- parcels that overlap no zip codes map to the closest one
- select distinct on (parcel.id)
- parcel.id as parcel_id,
- zip_code.id as zip_code_id,
- parcel.valid * zip_code.valid as valid
- from parcel_no_overlap as parcel
- join zip_code on parcel.valid && zip_code.valid
- order by parcel_id, ST_Distance(parcel.geom, zip_code.geom)
+with parcel_with_geom as (
+ select
+ parcel.id
+ , geom_id
+ , valid
+ , ST_Transform (geom
+ , 4269) as geom
+ from
+ parcel
+ join parcel_geom on geom_id = parcel_geom.id
+)
+, parcel_in_zip as (
+ -- easy case: one parcel in one zip code
+ select
+ parcel.id as parcel_id
+ , zip_code.id as zip_code_id
+ , parcel.valid * zip_code.valid as valid
+ from
+ parcel_with_geom as parcel
+ join zip_code on ST_Within (parcel.geom
+ , zip_code.geom)
+ and parcel.valid && zip_code.valid
)
-insert into parcel_zip
-select *, 'within'::parcel_zip_type from parcel_in_zip
-union all
-select *, 'most_overlap'::parcel_zip_type from parcel_largest_overlap
-union all
-select *, 'closest'::parcel_zip_type from parcel_closest;
+, parcel_not_within_zip as (
+ -- parcels that are not fully within any zip code
+ select
+ *
+ from
+ parcel_with_geom
+ where
+ not exists (
+ select
+ parcel_id
+ from
+ parcel_in_zip
+ where
+ parcel_id = id)
+)
+, parcel_largest_overlap as (
+ -- parcels that overlap multiple zip codes map to the one with the largest overlap
+ select distinct on (parcel.id)
+ parcel.id as parcel_id
+ , zip_code.id as zip_code_id
+ , parcel.valid * zip_code.valid as valid
+ from
+ parcel_not_within_zip as parcel
+ join zip_code on ST_Intersects (parcel.geom
+ , zip_code.geom)
+ and parcel.valid && zip_code.valid
+ order by
+ parcel_id
+ , ST_Area (ST_Intersection (parcel.geom
+ , zip_code.geom)) desc
+)
+, parcel_no_overlap as (
+ -- parcels that do not overlap any zip code
+ select
+ *
+ from
+ parcel_not_within_zip
+ where
+ not exists (
+ select
+ parcel_id
+ from
+ parcel_largest_overlap
+ where
+ parcel_id = id)
+)
+, parcel_closest as (
+ -- parcels that overlap no zip codes map to the closest one
+ select distinct on (parcel.id)
+ parcel.id as parcel_id
+ , zip_code.id as zip_code_id
+ , parcel.valid * zip_code.valid as valid
+ from
+ parcel_no_overlap as parcel
+ join zip_code on parcel.valid && zip_code.valid
+ order by
+ parcel_id
+ , ST_Distance (parcel.geom
+ , zip_code.geom))
+ insert into parcel_zip
+ select
+ *
+ , 'within'::parcel_zip_type
+ from
+ parcel_in_zip
+ union all
+ select
+ *
+ , 'most_overlap'::parcel_zip_type
+ from
+ parcel_largest_overlap
+ union all
+ select
+ *
+ , 'closest'::parcel_zip_type
+ from
+ parcel_closest;
+
diff --git a/etl/schema.sql b/etl/schema.sql
index 11517b64..e1ff8d38 100644
--- a/etl/schema.sql
+++ b/etl/schema.sql
@@ -1,30 +1,36 @@
create extension if not exists postgis;
drop table if exists parcel_geom cascade;
+
create table parcel_geom (
- id serial primary key
- , geom geometry(MultiPolygon, 26915) not null
+ id serial primary key
+ , geom geometry(MultiPolygon , 26915) not null
);
-create index parcel_geom_idx on parcel_geom using gist(geom);
+
+create index parcel_geom_idx on parcel_geom using gist (geom);
drop table if exists parcel;
+
create table parcel (
- id serial primary key
- , pid text not null
- , valid daterange not null
-
- , emv_land numeric
- , emv_building numeric
- , emv_total numeric
- , year_built int
- , sale_date date
- , sale_value numeric
-
- , geom_id int references parcel_geom(id)
+ id serial primary key
+ , pid text not null
+ , valid daterange not null
+ , emv_land numeric
+ , emv_building numeric
+ , emv_total numeric
+ , year_built int
+ , sale_date date
+ , sale_value numeric
+ , geom_id int references parcel_geom (id)
);
comment on column parcel.valid is 'Dates for which this parcel is valid';
+
comment on column parcel.pid is 'Municipal parcel ID';
+
comment on column parcel.emv_land is 'Estimated Market Value, land';
+
comment on column parcel.emv_building is 'Estimated Market Value, buildings';
+
comment on column parcel.emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
+
diff --git a/etl/zip_schema.sql b/etl/zip_schema.sql
index 4a15a2fa..d1a93ab7 100644
--- a/etl/zip_schema.sql
+++ b/etl/zip_schema.sql
@@ -1,8 +1,11 @@
drop table if exists zip_code;
+
create table zip_code (
- id serial primary key
- , zip_code text not null
- , valid daterange not null
- , geom geometry(MultiPolygon, 4269) not null
+ id serial primary key
+ , zip_code text not null
+ , valid daterange not null
+ , geom geometry(MultiPolygon , 4269) not null
);
-create index zip_code_geom_idx on zip_code using gist(geom);
+
+create index zip_code_geom_idx on zip_code using gist (geom);
+
diff --git a/scripts/clean.sh b/scripts/clean.sh
index 30ffad25..2bd06083 100755
--- a/scripts/clean.sh
+++ b/scripts/clean.sh
@@ -6,5 +6,7 @@ black cities/ tests/
autoflake --remove-all-unused-imports --in-place --recursive ./cities ./tests
nbqa black docs/guides/
-nbqa autoflake --remove-all-unused-imports --recursive --in-place docs/guides/
+nbqa autoflake --remove-all-unused-imports --recursive --in-place docs/guides/
nbqa isort -in-place docs/guides/
+
+pg_format -c .pg_format -i etl/*.sql
From 4fb0199f8423c0dd7c32048a6301edbb5fa3bb82 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 6 Aug 2024 11:34:37 -0400
Subject: [PATCH 016/142] process raw census data
---
etl/census_schema.sql | 87 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 87 insertions(+)
create mode 100644 etl/census_schema.sql
diff --git a/etl/census_schema.sql b/etl/census_schema.sql
new file mode 100644
index 00000000..b0716773
--- /dev/null
+++ b/etl/census_schema.sql
@@ -0,0 +1,87 @@
+drop table if exists census_tract cascade;
+
+create table census_tract (
+ id serial primary key
+ , statefp text not null
+ , countyfp text not null
+ , tractce text not null
+ , geoidfq text not null
+ , valid daterange not null
+ , geom geometry(MultiPolygon , 4269) not null
+);
+
+create index census_tract_geom_idx on census_tract using gist (geom);
+
+insert into census_tract (statefp , countyfp , tractce , geoidfq , valid , geom)
+select
+ statefp
+ , countyfp
+ , tractce
+ , affgeoid
+ , '[2010-01-01,2020-01-01)'::daterange
+ , geom
+from
+ cb_2018_27_tract_500k
+union all
+select
+ statefp
+ , countyfp
+ , tractce
+ , geoidfq
+ , '[2020-01-01,2030-01-01)'::daterange
+ , geom
+from
+ cb_2023_27_tract_500k;
+
+drop table if exists census_block_group cascade;
+
+create table census_block_group (
+ id serial primary key
+ , statefp text not null
+ , countyfp text not null
+ , tractce text not null
+ , blkgrpce text not null
+ , geoidfq text not null
+ , tract_id int references census_tract (id)
+ , valid daterange not null
+ , geom geometry(MultiPolygon , 4269) not null
+);
+
+create index census_block_group_geom_idx on census_block_group using gist (geom);
+
+insert into census_block (statefp , countyfp , tractce , blkgrpce , geoidfq , tract_id , valid , geom)
+select
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , bg.geoidfq
+ , census_tract.id
+ , bg.valid
+ , bg.geom
+from (
+ select
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , affgeoid as geoidfq
+ , '[2010-01-01,2020-01-01)'::daterange as valid
+ , geom
+ from
+ cb_2018_27_bg_500k
+ union all
+ select
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , geoidfq
+ , '[2020-01-01,2030-01-01)'::daterange as valid
+ , geom
+ from
+ cb_2023_27_bg_500k) as bg
+ join census_tract using (statefp , countyfp , tractce)
+where
+ census_tract.valid && bg.valid;
+
From 70a1a34963356205b962c0201c2d6d97f47ef357 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 6 Aug 2024 12:10:56 -0400
Subject: [PATCH 017/142] add census schema and mapping from parcels to block
groups
---
etl/census_schema.sql | 13 +++--
etl/parcel_to_bg.sql | 117 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+), 5 deletions(-)
create mode 100644 etl/parcel_to_bg.sql
diff --git a/etl/census_schema.sql b/etl/census_schema.sql
index b0716773..521bd20b 100644
--- a/etl/census_schema.sql
+++ b/etl/census_schema.sql
@@ -12,6 +12,8 @@ create table census_tract (
create index census_tract_geom_idx on census_tract using gist (geom);
+create index census_tract_valid_idx on census_tract using gist (valid);
+
insert into census_tract (statefp , countyfp , tractce , geoidfq , valid , geom)
select
statefp
@@ -33,9 +35,9 @@ select
from
cb_2023_27_tract_500k;
-drop table if exists census_block_group cascade;
+drop table if exists census_bg cascade;
-create table census_block_group (
+create table census_bg (
id serial primary key
, statefp text not null
, countyfp text not null
@@ -47,9 +49,11 @@ create table census_block_group (
, geom geometry(MultiPolygon , 4269) not null
);
-create index census_block_group_geom_idx on census_block_group using gist (geom);
+create index census_bg_geom_idx on census_bg using gist (geom);
+
+create index census_bg_valid_idx on census_bg using gist (valid);
-insert into census_block (statefp , countyfp , tractce , blkgrpce , geoidfq , tract_id , valid , geom)
+insert into census_bg (statefp , countyfp , tractce , blkgrpce , geoidfq , tract_id , valid , geom)
select
statefp
, countyfp
@@ -84,4 +88,3 @@ from (
join census_tract using (statefp , countyfp , tractce)
where
census_tract.valid && bg.valid;
-
diff --git a/etl/parcel_to_bg.sql b/etl/parcel_to_bg.sql
new file mode 100644
index 00000000..ebee0dde
--- /dev/null
+++ b/etl/parcel_to_bg.sql
@@ -0,0 +1,117 @@
+drop type if exists parcel_census_bg_type cascade;
+
+create type parcel_census_bg_type as enum (
+ 'within'
+ , 'most_overlap'
+ , 'closest'
+);
+
+drop table if exists parcel_census_bg;
+
+create table parcel_census_bg (
+ parcel_id int references parcel (id)
+ , census_bg_id int references census_bg (id)
+ , valid daterange not null
+ , type parcel_census_bg_type not null
+);
+
+with parcel_with_geom as (
+ select
+ parcel.id
+ , geom_id
+ , valid
+ , ST_Transform (geom
+ , 4269) as geom
+ from
+ parcel
+ join parcel_geom on geom_id = parcel_geom.id
+)
+, parcel_within as (
+ -- easy case: one parcel in one bg
+ select
+ parcel.id as parcel_id
+ , census_bg.id as census_bg_id
+ , parcel.valid * census_bg.valid as valid
+ from
+ parcel_with_geom as parcel
+ join census_bg on ST_Within (parcel.geom
+ , census_bg.geom)
+ and parcel.valid && census_bg.valid
+)
+, parcel_not_within as (
+ -- parcels that are not fully within any bg
+ select
+ *
+ from
+ parcel_with_geom
+ where
+ not exists (
+ select
+ parcel_id
+ from
+ parcel_within
+ where
+ parcel_id = id)
+)
+, parcel_largest_overlap as (
+ -- parcels that overlap multiple bgs map to the one with the largest overlap
+ select distinct on (parcel.id)
+ parcel.id as parcel_id
+ , census_bg.id as census_bg_id
+ , parcel.valid * census_bg.valid as valid
+ from
+ parcel_not_within as parcel
+ join census_bg on ST_Intersects (parcel.geom
+ , census_bg.geom)
+ and parcel.valid && census_bg.valid
+ order by
+ parcel_id
+ , ST_Area (ST_Intersection (parcel.geom
+ , census_bg.geom)) desc
+)
+, parcel_no_overlap as (
+ -- parcels that do not overlap any bg
+ select
+ *
+ from
+ parcel_not_within
+ where
+ not exists (
+ select
+ parcel_id
+ from
+ parcel_largest_overlap
+ where
+ parcel_id = id)
+)
+, parcel_closest as (
+ -- parcels that overlap no bgs map to the closest one
+ select distinct on (parcel.id)
+ parcel.id as parcel_id
+ , census_bg.id as census_bg_id
+ , parcel.valid * census_bg.valid as valid
+ from
+ parcel_no_overlap as parcel
+ join census_bg on parcel.valid && census_bg.valid
+ order by
+ parcel_id
+ , ST_Distance (parcel.geom
+ , census_bg.geom))
+ insert into parcel_census_bg
+ select
+ *
+ , 'within'::parcel_census_bg_type
+ from
+ parcel_within
+ union all
+ select
+ *
+ , 'most_overlap'::parcel_census_bg_type
+ from
+ parcel_largest_overlap
+ union all
+ select
+ *
+ , 'closest'::parcel_census_bg_type
+ from
+ parcel_closest;
From 648f22c68aec63000b847bc28186eea713059403 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 6 Aug 2024 14:46:05 -0400
Subject: [PATCH 018/142] fix parcel validities
---
etl/load_parcels.py | 4 ++--
etl/schema.sql | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
index 0c7c292e..6604281f 100644
--- a/etl/load_parcels.py
+++ b/etl/load_parcels.py
@@ -27,12 +27,12 @@
# insert parcel data into parcel table
parcel_data = " union all ".join(
f"""
- select replace(pin, '{COUNTY_ID}-', ''), '[{year}-01-01,{year+1}-01-01)'::daterange, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
+ select replace(pin, '{COUNTY_ID}-', ''), '[{year-1}-01-01,{year}-01-01)'::daterange, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
from parcel_raw_{year}, parcel_geom
where parcel_raw_{year}.geom = parcel_geom.geom
and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
"""
- for year in range(2002, 2018)
+ for year in PARCEL_YEARS
)
parcel_load = f"""
diff --git a/etl/schema.sql b/etl/schema.sql
index e1ff8d38..1029c451 100644
--- a/etl/schema.sql
+++ b/etl/schema.sql
@@ -9,7 +9,7 @@ create table parcel_geom (
create index parcel_geom_idx on parcel_geom using gist (geom);
-drop table if exists parcel;
+drop table if exists parcel cascade;
create table parcel (
id serial primary key
@@ -33,4 +33,3 @@ comment on column parcel.emv_land is 'Estimated Market Value, land';
comment on column parcel.emv_building is 'Estimated Market Value, buildings';
comment on column parcel.emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
-
From 56a0b9881dfa35b6c07dcb6ac91717988a5b1836 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 6 Aug 2024 17:44:06 -0400
Subject: [PATCH 019/142] add code to process permits and match them to parcels
---
etl/permit_schema.sql | 290 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 290 insertions(+)
create mode 100644 etl/permit_schema.sql
diff --git a/etl/permit_schema.sql b/etl/permit_schema.sql
new file mode 100644
index 00000000..e1dc5df2
--- /dev/null
+++ b/etl/permit_schema.sql
@@ -0,0 +1,290 @@
+drop table if exists residential_permit cascade;
+
+create table residential_permit (
+ id serial primary key,
+ ctu_id text,
+ coctu_id text,
+ year int,
+ tenure text,
+ housing_ty text,
+ res_permit text,
+ address text,
+ zip_code text,
+ name text,
+ buildings int,
+ units int,
+ age_restri int,
+ memory_car int,
+ assisted int,
+ com_off_re boolean,
+ sqf numeric,
+ public_fun boolean,
+ permit_val numeric,
+ community_ text,
+ notes text,
+ pin text,
+ geom geometry (multipoint, 26915)
+);
+
+create index residential_permit_geom_idx on residential_permit using gist (
+ geom
+);
+
+insert into residential_permit (
+ ctu_id,
+ coctu_id,
+ year,
+ tenure,
+ housing_ty,
+ res_permit,
+ address,
+ zip_code,
+ name,
+ buildings,
+ units,
+ age_restri,
+ memory_car,
+ assisted,
+ com_off_re,
+ sqf,
+ public_fun,
+ permit_val,
+ community_,
+ notes,
+ pin,
+ geom
+)
+select
+ ctu_id,
+ coctu_id,
+ year::int,
+ tenure,
+ housing_ty,
+ res_permit,
+ address,
+ zip_code,
+ name,
+ buildings,
+ units,
+ age_restri,
+ memory_car,
+ assisted,
+ com_off_re = 'Y',
+ sqf,
+ public_fun = 'Y',
+ permit_val,
+ community_,
+ notes,
+ pin,
+ geom
+from
+ residential_permits_raw
+where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis';
+
+drop table if exists residential_permit_parcel;
+
+create table residential_permit_parcel (
+ permit_id int references residential_permit (id),
+ parcel_id int references parcel (id),
+ type_ region_tag_type
+);
+
+with within as (
+ select
+ residential_permit.id as permit_id,
+ parcel.id as parcel_id
+ from
+ parcel_with_geom as parcel
+ join residential_permit on st_within(
+ residential_permit.geom,
+ parcel.geom
+ )
+ and to_date(
+ year::text,
+ 'YYYY'
+ ) <@ parcel.valid
+),
+not_within as (
+ select
+ id,
+ year,
+ geom
+ from
+ residential_permit
+ where
+ not exists (
+ select permit_id
+ from
+ within
+ where
+ permit_id = id
+ )
+),
+closest as (
+ select distinct on (permit.id)
+ permit.id as permit_id,
+ parcel.id as parcel_id
+ from
+ not_within as permit
+ join parcel_with_geom as parcel
+ on st_dwithin(permit.geom, parcel.geom, 100.0) and to_date(
+ year::text,
+ 'YYYY'
+ ) <@ parcel.valid
+ order by
+ permit_id,
+ st_distance(
+ permit.geom,
+ parcel.geom
+ )
+)
+insert into residential_permit_parcel select
+ permit_id,
+ parcel_id,
+ 'within'::region_tag_type
+from
+ within
+union all
+select
+ permit_id,
+ parcel_id,
+ 'closest'::region_tag_type
+from
+ closest;
+
+drop table if exists commercial_permit cascade;
+
+create table commercial_permit (
+ id serial primary key,
+ ctu_id text,
+ coctu_id text,
+ year int,
+ nonres_gro text,
+ nonres_sub text,
+ nonres_typ text,
+ bldg_name text,
+ bldg_desc text,
+ permit_typ text,
+ permit_val numeric,
+ sqf int,
+ address text,
+ zip_code text,
+ pin text,
+ geom geometry (multipoint, 26915)
+);
+
+create index commercial_permit_geom_idx on commercial_permit using gist (
+ geom
+);
+
+insert into commercial_permit (
+ ctu_id,
+ coctu_id,
+ year,
+ nonres_gro,
+ nonres_sub,
+ nonres_typ,
+ bldg_name,
+ bldg_desc,
+ permit_typ,
+ permit_val,
+ sqf,
+ address,
+ zip_code,
+ pin,
+ geom
+)
+select
+ ctu_id,
+ coctu_id,
+ year::int,
+ nonres_gro,
+ nonres_sub,
+ nonres_typ,
+ bldg_name,
+ bldg_desc,
+ permit_typ,
+ permit_val,
+ sqf,
+ address,
+ zip_code,
+ pin,
+ geom
+from
+ commercial_permits_raw
+where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis';
+
+drop table if exists commercial_permit_parcel;
+
+create table commercial_permit_parcel (
+ permit_id int references commercial_permit (id),
+ parcel_id int references parcel (id),
+ type_ region_tag_type
+);
+
+with within as (
+ select
+ commercial_permit.id as permit_id,
+ parcel.id as parcel_id
+ from
+ parcel_with_geom as parcel
+ join commercial_permit on st_within(
+ commercial_permit.geom,
+ parcel.geom
+ )
+ and to_date(
+ year::text,
+ 'YYYY'
+ ) <@ parcel.valid
+),
+not_within as (
+ select
+ id,
+ year,
+ geom
+ from
+ commercial_permit
+ where
+ not exists (
+ select permit_id
+ from
+ within
+ where
+ permit_id = id
+ )
+),
+closest as (
+ select distinct on (permit.id)
+ permit.id as permit_id,
+ parcel.id as parcel_id
+ from
+ not_within as permit
+ join parcel_with_geom as parcel
+ on st_dwithin(permit.geom, parcel.geom, 100.0) and to_date(
+ year::text,
+ 'YYYY'
+ ) <@ parcel.valid
+ order by
+ permit_id,
+ st_distance(
+ permit.geom,
+ parcel.geom
+ )
+)
+insert into commercial_permit_parcel select
+ permit_id,
+ parcel_id,
+ 'within'::region_tag_type
+from
+ within
+union all
+select
+ permit_id,
+ parcel_id,
+ 'closest'::region_tag_type
+from
+ closest;
From 4f9de18ca80eaf4bb621a77613900d01a8a51778 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 7 Aug 2024 09:51:41 -0400
Subject: [PATCH 020/142] create a property values view
---
etl/property_values.sql | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100644 etl/property_values.sql
diff --git a/etl/property_values.sql b/etl/property_values.sql
new file mode 100644
index 00000000..3e163f9c
--- /dev/null
+++ b/etl/property_values.sql
@@ -0,0 +1,11 @@
+drop view if exists property_values;
+
+create view property_values as (
+ select
+ id
+ , pid
+ , emv_total as value_
+ , valid
+ from
+ parcel);
+
From dba7a29758849c19bc66bfc21c122a58d22b72ea Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 7 Aug 2024 13:44:05 -0400
Subject: [PATCH 021/142] add code to create real_estate_transactions table
---
etl/real_estate_transactions.sql | 72 ++++++++++++++++++++++++++++++++
1 file changed, 72 insertions(+)
create mode 100644 etl/real_estate_transactions.sql
diff --git a/etl/real_estate_transactions.sql b/etl/real_estate_transactions.sql
new file mode 100644
index 00000000..e02ffee8
--- /dev/null
+++ b/etl/real_estate_transactions.sql
@@ -0,0 +1,72 @@
+drop table if exists real_estate_transactions_scraped;
+
+create table real_estate_transactions_scraped (
+ parcel_id text
+ , address text
+ , sale_date date
+ , sale_price numeric
+ , building_area numeric
+ , beds numeric
+ , baths numeric
+ , stories numeric
+ , year_built numeric
+ , neighborhood text
+ , property_type text
+);
+
+\copy real_estate_transactions_scraped from 'zoning/data/processed/real_estate_transactions/real_estate_transactions.csv' with csv header delimiter ',';
+drop table if exists real_estate_transactions_raw;
+
+create table real_estate_transactions_raw (
+ sale_id int
+ , ecrv text
+ , sale_date date
+ , excluded_from_ratio_study text
+ , pin text
+ , num_parcels_in_sale int
+ , formatted_address text
+ , land_sale text
+ , community_cd int
+ , community_desc text
+ , nbhd_cd int
+ , nbhd_desc text
+ , ward int
+ , proptype_cd text
+ , proptype_desc text
+ , grantee1 text
+ , grantee2 text
+ , grantor1 text
+ , grantor2 text
+ , adj_sale_price int
+ , gross_sale_price int
+ , downpayment int
+ , x numeric
+ , y numeric
+ , fid int
+);
+
+\copy real_estate_transactions_raw from 'zoning/data/raw/real_estate_transactions/Property_Sales_2019_to_2023.csv' with csv header delimiter ',';
+drop table if exists real_estate_transactions;
+
+create table real_estate_transactions (
+ id serial primary key
+ , parcel_id int references parcel (id)
+ , address text
+ , sale_date date
+ , sale_price numeric
+ , neighborhood text
+ , property_type text
+);
+
+insert into real_estate_transactions (parcel_id , address , sale_date , sale_price , neighborhood , property_type)
+select
+ parcel.id
+ , address
+ , scraped.sale_date
+ , sale_price
+ , neighborhood
+ , property_type
+from
+ real_estate_transactions_scraped as scraped
+ join parcel on pid = parcel_id
+ and scraped.sale_date <@ valid;
From 6c2d43d1844a55bb639bfce186c310bab0c44df9 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 7 Aug 2024 17:48:13 -0400
Subject: [PATCH 022/142] add code to load acs demographic data
---
etl/acs.sql | 27 ++++++
etl/acs_schema.sql | 50 ++++++++++++
etl/load_acs_raw.py | 195 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 272 insertions(+)
create mode 100644 etl/acs.sql
create mode 100644 etl/acs_schema.sql
create mode 100644 etl/load_acs_raw.py
diff --git a/etl/acs.sql b/etl/acs.sql
new file mode 100644
index 00000000..4026036e
--- /dev/null
+++ b/etl/acs.sql
@@ -0,0 +1,27 @@
+insert into acs_tract
+select
+ id
+ , year_
+ , name_
+ , value_
+from
+ acs_tract_raw as t1
+ join census_tract as t2 on t1.statefp = t2.statefp
+ and t1.countyfp = t2.countyfp
+ and t1.tractce = t2.tractce
+ and to_date(t1.year_::text , 'YYYY') <@ t2.valid;
+
+insert into acs_bg
+select
+ id
+ , year_
+ , name_
+ , value_
+from
+ acs_bg_raw as t1
+ join census_bg as t2 on t1.statefp = t2.statefp
+ and t1.countyfp = t2.countyfp
+ and t1.tractce = t2.tractce
+ and t1.blkgrpce = t2.blkgrpce
+ and to_date(t1.year_::text , 'YYYY') <@ t2.valid;
+
diff --git a/etl/acs_schema.sql b/etl/acs_schema.sql
new file mode 100644
index 00000000..8acb6088
--- /dev/null
+++ b/etl/acs_schema.sql
@@ -0,0 +1,50 @@
+drop table if exists acs_variable cascade;
+
+create table acs_variable (
+ name_ text primary key
+ , description text not null
+);
+
+drop table if exists acs_tract_raw cascade;
+
+create table acs_tract_raw (
+ statefp text
+ , countyfp text
+ , tractce text
+ , year_ int
+ , name_ text
+ , value_ numeric
+);
+
+drop table if exists acs_bg_raw cascade;
+
+create table acs_bg_raw (
+ statefp text
+ , countyfp text
+ , tractce text
+ , blkgrpce text
+ , year_ int
+ , name_ text
+ , value_ numeric
+);
+
+drop table if exists acs_tract cascade;
+
+create table acs_tract (
+ id int references census_tract (id)
+ , year_ int not null
+ , name_ text references acs_variable (name_)
+ , value_ numeric
+ , primary key (id , year_ , name_)
+);
+
+drop table if exists acs_bg cascade;
+
+create table acs_bg (
+ id int references census_bg (id)
+ , year_ int not null
+ , name_ text references acs_variable (name_)
+ , value_ numeric
+ , primary key (id , year_ , name_)
+);
+
diff --git a/etl/load_acs_raw.py b/etl/load_acs_raw.py
new file mode 100644
index 00000000..bc1264be
--- /dev/null
+++ b/etl/load_acs_raw.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+
+import logging
+import os
+import psycopg2
+
+from db import HOST, USER
+
+log = logging.getLogger(__name__)
+
+YEAR_RANGE = range(2013, 2023)
+ACS_CODES = {
+ "B03002_003E": "population_white_non_hispanic",
+ "B03002_004E": "population_black_non_hispanic",
+ "B03002_005E": "population_asian_non_hispanic",
+ "B03002_006E": "population_native_hawaiian_or_pacific_islander_non_hispanic",
+ "B03002_007E": "population_american_indian_or_alaska_native_non_hispanic",
+ "B03002_008E": "population_other_non_hispanic",
+ "B03002_009E": "population_multiple_races_non_hispanic",
+ "B03002_010E": "population_multiple_races_and_other_non_hispanic",
+ "B07204_001E": "geographic_mobility_total_responses",
+ "B07204_002E": "geographic_mobility_same_house_1_year_ago",
+ "B07204_004E": "geographic_mobility_different_house_1_year_ago_same_city",
+ "B07204_005E": "geographic_mobility_different_house_1_year_ago_same_county",
+ "B07204_006E": "geographic_mobility_different_house_1_year_ago_same_state",
+ "B07204_007E": "geographic_mobility_different_house_1_year_ago_same_country",
+ "B07204_016E": "geographic_mobility_different_house_1_year_ago_abroad",
+ "B01003_001E": "population",
+ "B02001_002E": "white",
+ "B02001_003E": "black",
+ "B02001_004E": "american_indian_or_alaska_native",
+ "B02001_005E": "asian",
+ "B02001_006E": "native_hawaiian_or_pacific_islander",
+ "B03001_003E": "population_hispanic_or_latino",
+ "B02001_007E": "other_race",
+ "B02001_008E": "multiple_races",
+ "B02001_009E": "multiple_races_and_other_race",
+ "B02001_010E": "two_or_more_races_excluding_other",
+ "B02015_002E": "east_asian_chinese",
+ "B02015_003E": "east_asian_hmong",
+ "B02015_004E": "east_asian_japanese",
+ "B02015_005E": "east_asian_korean",
+ "B02015_006E": "east_asian_mongolian",
+ "B02015_007E": "east_asian_okinawan",
+ "B02015_008E": "east_asian_taiwanese",
+ "B02015_009E": "east_asian_other",
+ "B02015_010E": "southeast_asian_burmese",
+ "B02015_011E": "southeast_asian_cambodian",
+ "B02015_012E": "southeast_asian_filipino",
+ "B02015_013E": "southeast_asian_indonesian",
+ "B02015_014E": "southeast_asian_laotian",
+ "B02015_015E": "southeast_asian_malaysian",
+ "B02015_016E": "southeast_asian_mien",
+ "B02015_017E": "southeast_asian_singaporean",
+ "B02015_018E": "southeast_asian_thai",
+ "B02015_019E": "southeast_asian_viet",
+ "B02015_020E": "southeast_asian_other",
+ "B02015_021E": "south_asian_asian_indian",
+ "B02015_022E": "south_asian_bangladeshi",
+ "B02015_023E": "south_asian_bhutanese",
+ "B02015_024E": "south_asian_nepalese",
+ "B02015_025E": "south_asian_pakistani",
+ "B02015_026E": "south_asian_sikh",
+ "B02015_027E": "south_asian_sri_lankan",
+ "B02015_028E": "south_asian_other",
+ "B02015_029E": "central_asian_kazakh",
+ "B02015_030E": "central_asian_uzbek",
+ "B02015_031E": "central_asian_other",
+ "B02015_032E": "other_asian_specified",
+ "B02015_033E": "other_asian_not_specified",
+ "B19013_001E": "median_household_income",
+ "B19013A_001E": "median_household_income_white",
+ "B19013H_001E": "median_household_income_white_non_hispanic",
+ "B19013I_001E": "median_household_income_hispanic",
+ "B19013B_001E": "median_household_income_black",
+ "B19013C_001E": "median_household_income_american_indian_or_alaska_native",
+ "B19013D_001E": "median_household_income_asian",
+ "B19013E_001E": "median_household_income_native_hawaiian_or_pacific_islander",
+ "B19013F_001E": "median_household_income_other_race",
+ "B19013G_001E": "median_household_income_multiple_races",
+ "B19019_002E": "median_household_income_1_person_households",
+ "B19019_003E": "median_household_income_2_person_households",
+ "B19019_004E": "median_household_income_3_person_households",
+ "B19019_005E": "median_household_income_4_person_households",
+ "B19019_006E": "median_household_income_5_person_households",
+ "B19019_007E": "median_household_income_6_person_households",
+ "B19019_008E": "median_household_income_7_or_more_person_households",
+ "B01002_001E": "median_age",
+ "B01002_002E": "median_age_male",
+ "B01002_003E": "median_age_female",
+ "B25031_001E": "median_gross_rent",
+ "B25031_002E": "median_gross_rent_0_bedrooms",
+ "B25031_003E": "median_gross_rent_1_bedrooms",
+ "B25031_004E": "median_gross_rent_2_bedrooms",
+ "B25031_005E": "median_gross_rent_3_bedrooms",
+ "B25031_006E": "median_gross_rent_4_bedrooms",
+ "B25031_007E": "median_gross_rent_5_bedrooms",
+ "B25032_001E": "total_housing_units",
+ "B25032_002E": "total_owner_occupied_housing_units",
+ "B25032_013E": "total_renter_occupied_housing_units",
+ "B25070_001E": "median_gross_rent_as_percentage_of_household_income",
+}
+
+
+def main():
+ conn = psycopg2.connect(host=HOST, user=USER, database="cities")
+ cur = conn.cursor()
+
+ with open("etl/acs_schema.sql", "r") as f:
+ cur.execute(f.read())
+
+ for code, desc in ACS_CODES.items():
+ cur.execute("insert into acs_variable values (%s, %s)", (code, desc))
+ conn.commit()
+
+ cur.execute("drop table if exists acs_tract_temp")
+ cur.execute(
+ "create temp table acs_tract_temp (statefp text, countyfp text, tractce text, value numeric)"
+ )
+
+ for code in ACS_CODES.keys():
+ desc = ACS_CODES[code]
+ for year in YEAR_RANGE:
+ log.info(f"Loading {desc} for {year}")
+ filename = f"zoning/data/raw/demographics/tracts/{desc}/{year}.csv"
+ if not os.path.isfile(filename):
+ logging.info(f"File {filename} does not exist")
+ continue
+
+ cur.execute("truncate acs_tract_temp")
+
+ with open(filename, "r") as f:
+ cur.copy_expert("copy acs_tract_temp from stdin with csv header", f)
+
+ cur.execute(
+ "insert into acs_tract_raw select statefp, countyfp, tractce, %s, %s, value from acs_tract_temp",
+ (year, code),
+ )
+ # cur.execute(
+ # """
+ # insert into acs_tract
+ # select t2.id, year, name_, value
+ # from (select statefp, countyfp, tractce, %s as year, %s as name_, value from acs_tract_temp) as t1
+ # join census_tract as t2
+ # on to_date(year::varchar, 'YYYY') <@ valid
+ # and t1.statefp = t2.statefp
+ # and t1.countyfp = t2.countyfp
+ # and t1.tractce = t2.tractce
+ # """,
+ # (year, code),
+ # )
+ conn.commit()
+
+ cur.execute("drop table if exists acs_bg_temp")
+ cur.execute(
+ "create temp table acs_bg_temp (statefp text, countyfp text, tractce text, blkgrpce text, value numeric)"
+ )
+
+ for code in ACS_CODES.keys():
+ desc = ACS_CODES[code]
+ for year in YEAR_RANGE:
+ log.info(f"Loading {desc} for {year}")
+ filename = f"zoning/data/raw/demographics/block_groups/{desc}/{year}.csv"
+ if not os.path.isfile(filename):
+ logging.info(f"File {filename} does not exist")
+ continue
+
+ cur.execute("truncate acs_bg_temp")
+
+ with open(filename, "r") as f:
+ cur.copy_expert("copy acs_bg_temp from stdin with csv header", f)
+ cur.execute(
+ "insert into acs_bg_raw select statefp, countyfp, tractce, blkgrpce, %s, %s, value from acs_bg_temp",
+ (year, code),
+ )
+
+ # cur.execute(
+ # """
+ # insert into acs_bg
+ # select t2.id, year, name_, value
+ # from (select statefp, countyfp, tractce, blkgrpce, %s as year, %s as name_, value from acs_bg_temp) as t1
+ # join census_bg as t2
+ # on to_date(year::varchar, 'YYYY') <@ valid
+ # and t1.statefp = t2.statefp
+ # and t1.countyfp = t2.countyfp
+ # and t1.tractce = t2.tractce
+ # """,
+ # (year, code),
+ # )
+ conn.commit()
+
+
+if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
+ main()
From fef6573828885c43a729185cc6e010616120d1ca Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 7 Aug 2024 17:56:09 -0400
Subject: [PATCH 023/142] remove commented code
---
etl/load_acs_raw.py | 27 ---------------------------
1 file changed, 27 deletions(-)
diff --git a/etl/load_acs_raw.py b/etl/load_acs_raw.py
index bc1264be..4ed52239 100644
--- a/etl/load_acs_raw.py
+++ b/etl/load_acs_raw.py
@@ -136,19 +136,6 @@ def main():
"insert into acs_tract_raw select statefp, countyfp, tractce, %s, %s, value from acs_tract_temp",
(year, code),
)
- # cur.execute(
- # """
- # insert into acs_tract
- # select t2.id, year, name_, value
- # from (select statefp, countyfp, tractce, %s as year, %s as name_, value from acs_tract_temp) as t1
- # join census_tract as t2
- # on to_date(year::varchar, 'YYYY') <@ valid
- # and t1.statefp = t2.statefp
- # and t1.countyfp = t2.countyfp
- # and t1.tractce = t2.tractce
- # """,
- # (year, code),
- # )
conn.commit()
cur.execute("drop table if exists acs_bg_temp")
@@ -173,20 +160,6 @@ def main():
"insert into acs_bg_raw select statefp, countyfp, tractce, blkgrpce, %s, %s, value from acs_bg_temp",
(year, code),
)
-
- # cur.execute(
- # """
- # insert into acs_bg
- # select t2.id, year, name_, value
- # from (select statefp, countyfp, tractce, blkgrpce, %s as year, %s as name_, value from acs_bg_temp) as t1
- # join census_bg as t2
- # on to_date(year::varchar, 'YYYY') <@ valid
- # and t1.statefp = t2.statefp
- # and t1.countyfp = t2.countyfp
- # and t1.tractce = t2.tractce
- # """,
- # (year, code),
- # )
conn.commit()
From 6a05387a16b823a8722107f269534ea1b45f1e31 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 8 Aug 2024 10:03:55 -0400
Subject: [PATCH 024/142] add code to load fair market rents
---
etl/fair_market_rents.sql | 63 +++++++++++++++++++++
etl/fair_market_rents_schema.sql | 12 ++++
etl/load_fair_market_rents_raw.py | 94 +++++++++++++++++++++++++++++++
3 files changed, 169 insertions(+)
create mode 100644 etl/fair_market_rents.sql
create mode 100644 etl/fair_market_rents_schema.sql
create mode 100644 etl/load_fair_market_rents_raw.py
diff --git a/etl/fair_market_rents.sql b/etl/fair_market_rents.sql
new file mode 100644
index 00000000..d2eb3137
--- /dev/null
+++ b/etl/fair_market_rents.sql
@@ -0,0 +1,63 @@
+drop table if exists fair_market_rents cascade;
+
+create table fair_market_rents (
+ zip_id int references zip_code (id)
+ , rent numeric
+ , num_bedrooms int
+ , year_ int
+);
+
+insert into fair_market_rents (zip_id , rent , num_bedrooms , year_)
+with fmr_zip as (
+ select
+ zip_code.id as zip_id
+ , rent_br0
+ , rent_br1
+ , rent_br2
+ , rent_br3
+ , rent_br4
+ , year_
+ from
+ fair_market_rents_raw
+ join zip_code on zip_code.zip_code = fair_market_rents_raw.zip
+ and zip_code.valid @> to_date(year_::text , 'YYYY'))
+ select
+ zip_id
+ , rent_br0
+ , 0
+ , year_
+ from
+ fmr_zip
+ union
+ select
+ zip_id
+ , rent_br1
+ , 1
+ , year_
+ from
+ fmr_zip
+ union
+ select
+ zip_id
+ , rent_br2
+ , 2
+ , year_
+ from
+ fmr_zip
+ union
+ select
+ zip_id
+ , rent_br3
+ , 3
+ , year_
+ from
+ fmr_zip
+ union
+ select
+ zip_id
+ , rent_br4
+ , 4
+ , year_
+ from
+ fmr_zip;
+
diff --git a/etl/fair_market_rents_schema.sql b/etl/fair_market_rents_schema.sql
new file mode 100644
index 00000000..4fd2ac52
--- /dev/null
+++ b/etl/fair_market_rents_schema.sql
@@ -0,0 +1,12 @@
+drop table if exists fair_market_rents_raw cascade;
+
+create table fair_market_rents_raw (
+ zip text
+ , rent_br0 numeric
+ , rent_br1 numeric
+ , rent_br2 numeric
+ , rent_br3 numeric
+ , rent_br4 numeric
+ , year_ int
+);
+
diff --git a/etl/load_fair_market_rents_raw.py b/etl/load_fair_market_rents_raw.py
new file mode 100644
index 00000000..565c8a05
--- /dev/null
+++ b/etl/load_fair_market_rents_raw.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+
+import logging
+import os
+import glob
+from io import StringIO
+
+import psycopg2
+import pandas as pd
+
+from db import HOST, USER
+
+log = logging.getLogger(__name__)
+
+RAW_DATA_DIRECTORY = "zoning/data/raw/demographics/zip_codes/fair_market_rents"
+
+
+def preprocess_csv_to_df(filename):
+ year = filename.split("_")[-1].replace(".csv", "")
+
+ df = pd.read_csv(filename, dtype=str, na_values={})
+
+ rename_dict = {}
+ for col in list(df.columns):
+ if "zip" in col.lower() or col == "zcta":
+ rename_dict[col] = "zip_code"
+ elif "BR" in col and "90" not in col and "110" not in col:
+ rename_dict[col] = "rent_br" + col.lower().split("br")[0][-1]
+ elif "area_rent_br" in col:
+ rename_dict[col] = "rent_br" + col[-1]
+ elif "safmr" in col and "90" not in col and "110" not in col:
+ rename_dict[col] = "rent_br" + col.split("_")[-1][0]
+
+ df = df.rename(columns=rename_dict)[
+ [
+ "zip_code",
+ "rent_br0",
+ "rent_br1",
+ "rent_br2",
+ "rent_br3",
+ "rent_br4",
+ ]
+ ]
+
+ for col in df.columns:
+ if "rent_" in col:
+ df[col] = [x.replace("$", "").replace(",", "") for x in df[col]]
+
+ return (year, df)
+
+
+def copy_from_stringio(cur, df, table):
+ """Here we are going save the dataframe in memory and use copy_from() to copy it to the table"""
+ buf = StringIO()
+ df.to_csv(buf, index=False, header=False)
+ buf.seek(0)
+ cur.copy_from(buf, table, sep=",")
+
+
+def main():
+ conn = psycopg2.connect(host=HOST, user=USER, database="cities")
+ cur = conn.cursor()
+
+ with open("etl/fair_market_rents_schema.sql", "r") as f:
+ cur.execute(f.read())
+
+ cur.execute("drop table if exists fmr_temp")
+ cur.execute(
+ """
+ create temp table fmr_temp (
+ zip text
+ , rent_br0 numeric
+ , rent_br1 numeric
+ , rent_br2 numeric
+ , rent_br3 numeric
+ , rent_br4 numeric)
+ """
+ )
+
+ for filename in glob.glob(f"{RAW_DATA_DIRECTORY}/*.csv"):
+ (year, df) = preprocess_csv_to_df(filename)
+ cur.execute("truncate fmr_temp")
+ copy_from_stringio(cur, df, "fmr_temp")
+
+ cur.execute(
+ "insert into fair_market_rents_raw select *, %s as year from fmr_temp",
+ (year,),
+ )
+ conn.commit()
+
+
+if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
+ main()
From dc6acb76d1d7045d7f734fa125cc237d2719650e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 8 Aug 2024 10:53:54 -0400
Subject: [PATCH 025/142] add code to load usps migration data
---
etl/load_usps_migration_raw.py | 64 ++++++++++++
etl/usps_migration.sql | 157 ++++++++++++++++++++++++++++++
etl/usps_migration_raw_schema.sql | 22 +++++
3 files changed, 243 insertions(+)
create mode 100644 etl/load_usps_migration_raw.py
create mode 100644 etl/usps_migration.sql
create mode 100644 etl/usps_migration_raw_schema.sql
diff --git a/etl/load_usps_migration_raw.py b/etl/load_usps_migration_raw.py
new file mode 100644
index 00000000..c05f8e0b
--- /dev/null
+++ b/etl/load_usps_migration_raw.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+import glob
+import logging
+import psycopg2
+
+from db import HOST, USER
+
+log = logging.getLogger(__name__)
+
+
+RAW_DATA_DIRECTORY = "zoning/data/raw/demographics/zip_codes/usps_migration"
+
+
+def main():
+ conn = psycopg2.connect(host=HOST, user=USER, database="cities")
+ cur = conn.cursor()
+
+ with open("etl/usps_migration_raw_schema.sql", "r") as f:
+ cur.execute(f.read())
+
+ cur.execute("drop table if exists m_temp")
+ cur.execute(
+ """
+ create temp table m_temp (
+ yyyymm text
+ , zip_code text
+ , city text
+ , state text
+ , total_from_zip numeric
+ , total_from_zip_business numeric
+ , total_from_zip_family numeric
+ , total_from_zip_individual numeric
+ , total_from_zip_perm numeric
+ , total_from_zip_temp numeric
+ , total_to_zip numeric
+ , total_to_zip_business numeric
+ , total_to_zip_family numeric
+ , total_to_zip_individual numeric
+ , total_to_zip_perm numeric
+ , total_to_zip_temp numeric
+ )
+ """
+ )
+
+ for filename in glob.glob(f"{RAW_DATA_DIRECTORY}/*.csv"):
+ log.info(f"Loading {filename}")
+ year = filename.split("/")[-1].split(".")[0].replace("Y", "")
+
+ cur.execute("truncate m_temp")
+
+ with open(filename, "r") as f:
+ cur.copy_expert("copy m_temp from stdin with csv header", f)
+
+ cur.execute(
+ "insert into usps_migration_raw select *, %s from m_temp",
+ (year,),
+ )
+ conn.commit()
+
+
+if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
+ main()
diff --git a/etl/usps_migration.sql b/etl/usps_migration.sql
new file mode 100644
index 00000000..0f6394de
--- /dev/null
+++ b/etl/usps_migration.sql
@@ -0,0 +1,157 @@
+drop type if exists usps_migration_flow_direction cascade;
+
+create type usps_migration_flow_direction as enum (
+ 'in'
+ , 'out'
+);
+
+drop enum if exists usps_migration_flow_type cascade;
+
+create type usps_migration_flow_type as enum (
+ 'total'
+ , 'business'
+ , 'family'
+ , 'individual'
+ , 'perm'
+ , 'temp'
+);
+
+drop table if exists usps_migration cascade;
+
+create table usps_migration (
+ date_ date not null check (extract(day from date_) = 1) -- granularity is year-month
+ , zip_id int references zip_code (id)
+ , direction usps_migration_flow_direction not null
+ , type_ usps_migration_flow_type not null
+ , flow numeric
+ , primary key (date_ , zip_id , direction , type_)
+);
+
+-- explain insert into usps_migration (date_, zip_id, direction, type_, flow)
+insert into usps_migration with process_date as (
+ select
+ to_date(yyyymm
+ , 'YYYYMM') as date_
+ , *
+ from
+ usps_migration_raw
+)
+, add_zip_id as (
+ select
+ zip_code.id as zip_id
+ , mr.*
+ from
+ process_date as mr
+ join zip_code on zip_code.zip_code = replace(mr.zip_code
+ , '='
+ , '')
+ and zip_code.valid @> to_date(year_::text
+ , 'YYYY'))
+ select
+ date_
+ , zip_id
+ , 'in'::usps_migration_flow_direction
+ , 'total'::usps_migration_flow_type
+ , total_from_zip
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'in'::usps_migration_flow_direction
+ , 'business'::usps_migration_flow_type
+ , total_from_zip_business
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'in'::usps_migration_flow_direction
+ , 'family'::usps_migration_flow_type
+ , total_from_zip_family
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'in'::usps_migration_flow_direction
+ , 'individual'::usps_migration_flow_type
+ , total_from_zip_individual
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'in'::usps_migration_flow_direction
+ , 'perm'::usps_migration_flow_type
+ , total_from_zip_perm
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'in'::usps_migration_flow_direction
+ , 'temp'::usps_migration_flow_type
+ , total_from_zip_temp
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'out'::usps_migration_flow_direction
+ , 'total'::usps_migration_flow_type
+ , total_to_zip
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'out'::usps_migration_flow_direction
+ , 'business'::usps_migration_flow_type
+ , total_to_zip_business
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'out'::usps_migration_flow_direction
+ , 'family'::usps_migration_flow_type
+ , total_to_zip_family
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'out'::usps_migration_flow_direction
+ , 'individual'::usps_migration_flow_type
+ , total_to_zip_individual
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'out'::usps_migration_flow_direction
+ , 'perm'::usps_migration_flow_type
+ , total_to_zip_perm
+ from
+ add_zip_id
+ union all
+ select
+ date_
+ , zip_id
+ , 'out'::usps_migration_flow_direction
+ , 'temp'::usps_migration_flow_type
+ , total_to_zip_temp
+ from
+ add_zip_id;
+
diff --git a/etl/usps_migration_raw_schema.sql b/etl/usps_migration_raw_schema.sql
new file mode 100644
index 00000000..50a823ff
--- /dev/null
+++ b/etl/usps_migration_raw_schema.sql
@@ -0,0 +1,22 @@
+drop table if exists usps_migration_raw cascade;
+
+create table usps_migration_raw (
+ yyyymm text
+ , zip_code text
+ , city text
+ , state text
+ , total_from_zip numeric
+ , total_from_zip_business numeric
+ , total_from_zip_family numeric
+ , total_from_zip_individual numeric
+ , total_from_zip_perm numeric
+ , total_from_zip_temp numeric
+ , total_to_zip numeric
+ , total_to_zip_business numeric
+ , total_to_zip_family numeric
+ , total_to_zip_individual numeric
+ , total_to_zip_perm numeric
+ , total_to_zip_temp numeric
+ , year_ int
+);
+
From f868a53f379f17a6155cef58ed49fa06ae38612a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 8 Aug 2024 10:59:45 -0400
Subject: [PATCH 026/142] remove comment
---
etl/usps_migration.sql | 2 --
1 file changed, 2 deletions(-)
diff --git a/etl/usps_migration.sql b/etl/usps_migration.sql
index 0f6394de..df30498c 100644
--- a/etl/usps_migration.sql
+++ b/etl/usps_migration.sql
@@ -27,7 +27,6 @@ create table usps_migration (
, primary key (date_ , zip_id , direction , type_)
);
--- explain insert into usps_migration (date_, zip_id, direction, type_, flow)
insert into usps_migration with process_date as (
select
to_date(yyyymm
@@ -154,4 +153,3 @@ insert into usps_migration with process_date as (
, total_to_zip_temp
from
add_zip_id;
-
From 04b83cdc17cc1409b27c293474ff4175afdf7237 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 8 Aug 2024 11:18:43 -0400
Subject: [PATCH 027/142] rename schema.sql to parcel_schema.sql
---
etl/load_parcels.py | 2 +-
etl/{schema.sql => parcel_schema.sql} | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
rename etl/{schema.sql => parcel_schema.sql} (99%)
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
index 6604281f..ff2d5c67 100644
--- a/etl/load_parcels.py
+++ b/etl/load_parcels.py
@@ -10,7 +10,7 @@
conn = psycopg2.connect(host=HOST, user=USER, database="cities")
cur = conn.cursor()
-with open("etl/schema.sql", "r") as f:
+with open("etl/parcel_schema.sql", "r") as f:
cur.execute(f.read())
conn.commit()
diff --git a/etl/schema.sql b/etl/parcel_schema.sql
similarity index 99%
rename from etl/schema.sql
rename to etl/parcel_schema.sql
index 1029c451..24bf8523 100644
--- a/etl/schema.sql
+++ b/etl/parcel_schema.sql
@@ -33,3 +33,4 @@ comment on column parcel.emv_land is 'Estimated Market Value, land';
comment on column parcel.emv_building is 'Estimated Market Value, buildings';
comment on column parcel.emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
+
From d3fdee842d4cc52f0e95c4647be8e817ffd4fa55 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 8 Aug 2024 11:20:07 -0400
Subject: [PATCH 028/142] clean up load_parcels.py
---
etl/load_parcels.py | 80 +++++++++++++++++++++++++--------------------
1 file changed, 45 insertions(+), 35 deletions(-)
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
index ff2d5c67..7a114b34 100644
--- a/etl/load_parcels.py
+++ b/etl/load_parcels.py
@@ -1,44 +1,54 @@
#!/usr/bin/env python
+import logging
import psycopg2
from db import HOST, USER
+log = logging.getLogger(__name__)
+
PARCEL_YEARS = range(2002, 2024)
COUNTY_ID = "053"
-conn = psycopg2.connect(host=HOST, user=USER, database="cities")
-cur = conn.cursor()
-
-with open("etl/parcel_schema.sql", "r") as f:
- cur.execute(f.read())
-conn.commit()
-
-# select distinct geometry from all parcel tables
-distinct_geom = " union ".join(
- f"select geom from parcel_raw_{year} where upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'"
- for year in PARCEL_YEARS
-)
-parcel_geom_load = f"insert into parcel_geom (geom) {distinct_geom};"
-print("Executing:", parcel_geom_load)
-cur.execute(parcel_geom_load)
-conn.commit()
-
-# insert parcel data into parcel table
-parcel_data = " union all ".join(
- f"""
- select replace(pin, '{COUNTY_ID}-', ''), '[{year-1}-01-01,{year}-01-01)'::daterange, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
- from parcel_raw_{year}, parcel_geom
- where parcel_raw_{year}.geom = parcel_geom.geom
- and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
- """
- for year in PARCEL_YEARS
-)
-
-parcel_load = f"""
-insert into parcel (pid, valid, emv_land, emv_building, emv_total, year_built, sale_date, sale_value, geom_id)
- {parcel_data}
- """
-print("Executing:", parcel_load)
-cur.execute(parcel_load)
-conn.commit()
+
+def main():
+ conn = psycopg2.connect(host=HOST, user=USER, database="cities")
+ cur = conn.cursor()
+
+ with open("etl/parcel_schema.sql", "r") as f:
+ cur.execute(f.read())
+ conn.commit()
+
+ # select distinct geometry from all parcel tables
+ distinct_geom = " union ".join(
+ f"select geom from parcel_raw_{year} where upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'"
+ for year in PARCEL_YEARS
+ )
+ parcel_geom_load = f"insert into parcel_geom (geom) {distinct_geom};"
+ log.info("Executing: %s", parcel_geom_load)
+ cur.execute(parcel_geom_load)
+ conn.commit()
+
+ # insert parcel data into parcel table
+ parcel_data = " union all ".join(
+ f"""
+ select replace(pin, '{COUNTY_ID}-', ''), '[{year-1}-01-01,{year}-01-01)'::daterange, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
+ from parcel_raw_{year}, parcel_geom
+ where parcel_raw_{year}.geom = parcel_geom.geom
+ and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
+ """
+ for year in PARCEL_YEARS
+ )
+
+ parcel_load = f"""
+ insert into parcel (pid, valid, emv_land, emv_building, emv_total, year_built, sale_date, sale_value, geom_id)
+ {parcel_data}
+ """
+ log.info("Executing: %s", parcel_load)
+ cur.execute(parcel_load)
+ conn.commit()
+
+
+if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
+ main()
From 14a297b08919db2ef14a331a4ef8d04af41ccb8b Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 8 Aug 2024 11:53:49 -0400
Subject: [PATCH 029/142] reformat
---
etl/census_schema.sql | 1 +
etl/parcel_to_bg.sql | 1 +
etl/permit_schema.sql | 384 +++++++++++++------------------
etl/real_estate_transactions.sql | 1 +
etl/usps_migration.sql | 1 +
etl/zip_schema.sql | 16 +-
6 files changed, 184 insertions(+), 220 deletions(-)
diff --git a/etl/census_schema.sql b/etl/census_schema.sql
index 521bd20b..b90b5a90 100644
--- a/etl/census_schema.sql
+++ b/etl/census_schema.sql
@@ -88,3 +88,4 @@ from (
join census_tract using (statefp , countyfp , tractce)
where
census_tract.valid && bg.valid;
+
diff --git a/etl/parcel_to_bg.sql b/etl/parcel_to_bg.sql
index ebee0dde..5dbc7ac9 100644
--- a/etl/parcel_to_bg.sql
+++ b/etl/parcel_to_bg.sql
@@ -115,3 +115,4 @@ with parcel_with_geom as (
, 'closest'::parcel_census_bg_type
from
parcel_closest;
+
diff --git a/etl/permit_schema.sql b/etl/permit_schema.sql
index e1dc5df2..c5c7e5e9 100644
--- a/etl/permit_schema.sql
+++ b/etl/permit_schema.sql
@@ -1,82 +1,57 @@
drop table if exists residential_permit cascade;
create table residential_permit (
- id serial primary key,
- ctu_id text,
- coctu_id text,
- year int,
- tenure text,
- housing_ty text,
- res_permit text,
- address text,
- zip_code text,
- name text,
- buildings int,
- units int,
- age_restri int,
- memory_car int,
- assisted int,
- com_off_re boolean,
- sqf numeric,
- public_fun boolean,
- permit_val numeric,
- community_ text,
- notes text,
- pin text,
- geom geometry (multipoint, 26915)
+ id serial primary key
+ , ctu_id text
+ , coctu_id text
+ , year int
+ , tenure text
+ , housing_ty text
+ , res_permit text
+ , address text
+ , zip_code text
+ , name text
+ , buildings int
+ , units int
+ , age_restri int
+ , memory_car int
+ , assisted int
+ , com_off_re boolean
+ , sqf numeric
+ , public_fun boolean
+ , permit_val numeric
+ , community_ text
+ , notes text
+ , pin text
+ , geom geometry(multipoint , 26915)
);
-create index residential_permit_geom_idx on residential_permit using gist (
- geom
-);
+create index residential_permit_geom_idx on residential_permit using gist (geom);
-insert into residential_permit (
- ctu_id,
- coctu_id,
- year,
- tenure,
- housing_ty,
- res_permit,
- address,
- zip_code,
- name,
- buildings,
- units,
- age_restri,
- memory_car,
- assisted,
- com_off_re,
- sqf,
- public_fun,
- permit_val,
- community_,
- notes,
- pin,
- geom
-)
+insert into residential_permit (ctu_id , coctu_id , year , tenure , housing_ty , res_permit , address , zip_code , name , buildings , units , age_restri , memory_car , assisted , com_off_re , sqf , public_fun , permit_val , community_ , notes , pin , geom)
select
- ctu_id,
- coctu_id,
- year::int,
- tenure,
- housing_ty,
- res_permit,
- address,
- zip_code,
- name,
- buildings,
- units,
- age_restri,
- memory_car,
- assisted,
- com_off_re = 'Y',
- sqf,
- public_fun = 'Y',
- permit_val,
- community_,
- notes,
- pin,
- geom
+ ctu_id
+ , coctu_id
+ , year::int
+ , tenure
+ , housing_ty
+ , res_permit
+ , address
+ , zip_code
+ , name
+ , buildings
+ , units
+ , age_restri
+ , memory_car
+ , assisted
+ , com_off_re = 'Y'
+ , sqf
+ , public_fun = 'Y'
+ , permit_val
+ , community_
+ , notes
+ , pin
+ , geom
from
residential_permits_raw
where
@@ -86,132 +61,108 @@ where
drop table if exists residential_permit_parcel;
create table residential_permit_parcel (
- permit_id int references residential_permit (id),
- parcel_id int references parcel (id),
- type_ region_tag_type
+ permit_id int references residential_permit (id)
+ , parcel_id int references parcel (id)
+ , type_ region_tag_type
);
with within as (
select
- residential_permit.id as permit_id,
- parcel.id as parcel_id
+ residential_permit.id as permit_id
+ , parcel.id as parcel_id
from
parcel_with_geom as parcel
- join residential_permit on st_within(
- residential_permit.geom,
- parcel.geom
- )
- and to_date(
- year::text,
- 'YYYY'
- ) <@ parcel.valid
-),
-not_within as (
+ join residential_permit on st_within (residential_permit.geom
+ , parcel.geom)
+ and to_date(year::text
+ , 'YYYY') <@ parcel.valid
+)
+, not_within as (
select
- id,
- year,
- geom
+ id
+ , year
+ , geom
from
residential_permit
where
not exists (
- select permit_id
+ select
+ permit_id
from
within
where
- permit_id = id
- )
-),
-closest as (
+ permit_id = id)
+)
+, closest as (
select distinct on (permit.id)
- permit.id as permit_id,
- parcel.id as parcel_id
+ permit.id as permit_id
+ , parcel.id as parcel_id
from
not_within as permit
- join parcel_with_geom as parcel
- on st_dwithin(permit.geom, parcel.geom, 100.0) and to_date(
- year::text,
- 'YYYY'
- ) <@ parcel.valid
- order by
- permit_id,
- st_distance(
- permit.geom,
- parcel.geom
- )
-)
-insert into residential_permit_parcel select
- permit_id,
- parcel_id,
- 'within'::region_tag_type
-from
- within
-union all
-select
- permit_id,
- parcel_id,
- 'closest'::region_tag_type
+ join parcel_with_geom as parcel on st_dwithin (permit.geom
+ , parcel.geom
+ , 100.0)
+ and to_date(year::text
+ , 'YYYY') <@ parcel.valid
+ order by
+ permit_id
+ , st_distance (permit.geom
+ , parcel.geom))
+ insert into residential_permit_parcel
+ select
+ permit_id
+ , parcel_id
+ , 'within'::region_tag_type
+ from
+ within
+ union all
+ select
+ permit_id
+ , parcel_id
+ , 'closest'::region_tag_type
from
closest;
drop table if exists commercial_permit cascade;
create table commercial_permit (
- id serial primary key,
- ctu_id text,
- coctu_id text,
- year int,
- nonres_gro text,
- nonres_sub text,
- nonres_typ text,
- bldg_name text,
- bldg_desc text,
- permit_typ text,
- permit_val numeric,
- sqf int,
- address text,
- zip_code text,
- pin text,
- geom geometry (multipoint, 26915)
+ id serial primary key
+ , ctu_id text
+ , coctu_id text
+ , year int
+ , nonres_gro text
+ , nonres_sub text
+ , nonres_typ text
+ , bldg_name text
+ , bldg_desc text
+ , permit_typ text
+ , permit_val numeric
+ , sqf int
+ , address text
+ , zip_code text
+ , pin text
+ , geom geometry(multipoint , 26915)
);
-create index commercial_permit_geom_idx on commercial_permit using gist (
- geom
-);
+create index commercial_permit_geom_idx on commercial_permit using gist (geom);
-insert into commercial_permit (
- ctu_id,
- coctu_id,
- year,
- nonres_gro,
- nonres_sub,
- nonres_typ,
- bldg_name,
- bldg_desc,
- permit_typ,
- permit_val,
- sqf,
- address,
- zip_code,
- pin,
- geom
-)
+insert into commercial_permit (ctu_id , coctu_id , year , nonres_gro , nonres_sub , nonres_typ , bldg_name , bldg_desc , permit_typ , permit_val , sqf , address , zip_code , pin , geom)
select
- ctu_id,
- coctu_id,
- year::int,
- nonres_gro,
- nonres_sub,
- nonres_typ,
- bldg_name,
- bldg_desc,
- permit_typ,
- permit_val,
- sqf,
- address,
- zip_code,
- pin,
- geom
+ ctu_id
+ , coctu_id
+ , year::int
+ , nonres_gro
+ , nonres_sub
+ , nonres_typ
+ , bldg_name
+ , bldg_desc
+ , permit_typ
+ , permit_val
+ , sqf
+ , address
+ , zip_code
+ , pin
+ , geom
from
commercial_permits_raw
where
@@ -221,70 +172,65 @@ where
drop table if exists commercial_permit_parcel;
create table commercial_permit_parcel (
- permit_id int references commercial_permit (id),
- parcel_id int references parcel (id),
- type_ region_tag_type
+ permit_id int references commercial_permit (id)
+ , parcel_id int references parcel (id)
+ , type_ region_tag_type
);
with within as (
select
- commercial_permit.id as permit_id,
- parcel.id as parcel_id
+ commercial_permit.id as permit_id
+ , parcel.id as parcel_id
from
parcel_with_geom as parcel
- join commercial_permit on st_within(
- commercial_permit.geom,
- parcel.geom
- )
- and to_date(
- year::text,
- 'YYYY'
- ) <@ parcel.valid
-),
-not_within as (
+ join commercial_permit on st_within (commercial_permit.geom
+ , parcel.geom)
+ and to_date(year::text
+ , 'YYYY') <@ parcel.valid
+)
+, not_within as (
select
- id,
- year,
- geom
+ id
+ , year
+ , geom
from
commercial_permit
where
not exists (
- select permit_id
+ select
+ permit_id
from
within
where
- permit_id = id
- )
-),
-closest as (
+ permit_id = id)
+)
+, closest as (
select distinct on (permit.id)
- permit.id as permit_id,
- parcel.id as parcel_id
+ permit.id as permit_id
+ , parcel.id as parcel_id
from
not_within as permit
- join parcel_with_geom as parcel
- on st_dwithin(permit.geom, parcel.geom, 100.0) and to_date(
- year::text,
- 'YYYY'
- ) <@ parcel.valid
- order by
- permit_id,
- st_distance(
- permit.geom,
- parcel.geom
- )
-)
-insert into commercial_permit_parcel select
- permit_id,
- parcel_id,
- 'within'::region_tag_type
-from
- within
-union all
-select
- permit_id,
- parcel_id,
- 'closest'::region_tag_type
+ join parcel_with_geom as parcel on st_dwithin (permit.geom
+ , parcel.geom
+ , 100.0)
+ and to_date(year::text
+ , 'YYYY') <@ parcel.valid
+ order by
+ permit_id
+ , st_distance (permit.geom
+ , parcel.geom))
+ insert into commercial_permit_parcel
+ select
+ permit_id
+ , parcel_id
+ , 'within'::region_tag_type
+ from
+ within
+ union all
+ select
+ permit_id
+ , parcel_id
+ , 'closest'::region_tag_type
from
closest;
+
diff --git a/etl/real_estate_transactions.sql b/etl/real_estate_transactions.sql
index e02ffee8..6980b5ed 100644
--- a/etl/real_estate_transactions.sql
+++ b/etl/real_estate_transactions.sql
@@ -70,3 +70,4 @@ from
real_estate_transactions_scraped as scraped
join parcel on pid = parcel_id
and scraped.sale_date <@ valid;
+
diff --git a/etl/usps_migration.sql b/etl/usps_migration.sql
index df30498c..9a123bb4 100644
--- a/etl/usps_migration.sql
+++ b/etl/usps_migration.sql
@@ -153,3 +153,4 @@ insert into usps_migration with process_date as (
, total_to_zip_temp
from
add_zip_id;
+
diff --git a/etl/zip_schema.sql b/etl/zip_schema.sql
index d1a93ab7..1d9ab6c8 100644
--- a/etl/zip_schema.sql
+++ b/etl/zip_schema.sql
@@ -1,4 +1,4 @@
-drop table if exists zip_code;
+drop table if exists zip_code cascade;
create table zip_code (
id serial primary key
@@ -9,3 +9,17 @@ create table zip_code (
create index zip_code_geom_idx on zip_code using gist (geom);
+insert into zip_code (zip_code , valid , geom)
+select
+ zcta5ce20
+ , '[2020-01-01,)'::daterange
+ , geom
+from
+ zip_raw_2020
+union
+select
+ zcta
+ , '[2000-01-01,2020-01-01)'::daterange
+ , ST_Transform (geom , 4269)
+from
+ zip_raw_2000
From 520efed06a2b0f42e72b8a1254fb6283e8d9724f Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 9 Aug 2024 14:13:25 -0400
Subject: [PATCH 030/142] add segregation index
---
etl/segregation.sql | 92 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
create mode 100644 etl/segregation.sql
diff --git a/etl/segregation.sql b/etl/segregation.sql
new file mode 100644
index 00000000..ba25e911
--- /dev/null
+++ b/etl/segregation.sql
@@ -0,0 +1,92 @@
+create or replace view categories as select * from (
+ values
+ ('population_white_non_hispanic'),
+ ('population_black_non_hispanic'),
+ ('population_hispanic_or_latino'),
+ ('population_asian_non_hispanic'),
+ ('population_native_hawaiian_or_pacific_islander_non_hispanic'),
+ ('population_american_indian_or_alaska_native_non_hispanic'),
+ ('population_multiple_races_non_hispanic'),
+ ('population_other_non_hispanic')
+) as t (description);
+
+drop type if exists reference_distribution cascade;
+create type reference_distribution as enum (
+ 'uniform'
+ , 'annual_city'
+ , 'average_city'
+);
+
+
+-- Segregation index for each tract for each year, computed for each reference
+-- distribution.
+--
+-- The segregation index is the KL-divergence between the distribution of
+-- population in a tract and a reference distribution. For example, a tract that
+-- has many more white people than the average for the city will have a high
+-- segregation index for the 'average_city' distribution.
+
+drop table if exists segregation;
+
+create table segregation as (
+with
+ pop_tyc as
+ ( -- Population by tract, year, and category
+ select id, year_, description, value_
+ from acs_tract
+ join acs_variable using (name_)
+ join categories using (description)
+ ),
+ pop_ty as
+ ( -- Population by tract and year (note: using 'population' variable instead of aggregating categories)
+ select id, year_, value_
+ from acs_tract join acs_variable using (name_)
+ where description = 'population'
+ ),
+ pop_yc as
+ ( -- Population by year and category
+ select year_, description, sum(value_) as value_
+ from pop_tyc group by year_, description
+ ),
+ pop_y as
+ ( -- Population by year
+ select year_, sum(value_) as value_ from pop_ty group by year_
+ ),
+ dist_yc as
+ ( -- Distribution of population by year and category
+ select description, c.year_,
+ case t.value_ when 0 then 0 else c.value_ / t.value_ end as value_
+ from pop_yc as c join pop_y as t using (year_)
+ ),
+ dist_tyc as
+ ( -- Distribution of population by tract, year, and category
+ select id, year_, description,
+ case t.value_ when 0 then 0 else p.value_ / t.value_ end as value_
+ from pop_tyc as p join pop_ty as t using (year_, id)
+ ),
+ uniform_dist as
+ ( -- Uniform distribution across categories
+ with n_cat as (select count(*) as n_cat from categories)
+ select description, 1.0 / n_cat as value_
+ from categories, n_cat
+ ),
+ average_dist as
+ ( -- Average of the annual citywide distributions
+ select description, avg(value_) as value_
+ from dist_yc
+ group by description
+ )
+select id, year_, dist, sum(case when p = 0 or q = 0 then 0 else p * ln(p / q) end) as segregation_index
+ from
+ (
+ select id, year_, 'uniform'::reference_distribution as dist, dist_tyc.value_ as p, uniform_dist.value_ as q
+ from dist_tyc join uniform_dist using (description)
+ union all
+ select id, year_, 'annual_city'::reference_distribution as dist, dist_tyc.value_ as p, dist_yc.value_ as q
+ from dist_tyc join dist_yc using (year_, description)
+ union all
+ select id, year_, 'average_city'::reference_distribution as dist, dist_tyc.value_ as p, average_dist.value_ as q
+ from dist_tyc join average_dist using (description)
+ )
+ group by id, year_, dist
+);
From 12c528971ab8a23d7d3f3670bcb94a5ce4846d61 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 13 Aug 2024 17:44:58 -0400
Subject: [PATCH 031/142] add dbt version of transformations
---
dbt/.gitignore | 3 +
dbt/README.md | 15 +++++
dbt/analyses/.gitkeep | 0
dbt/dbt_project.yml | 28 ++++++++
dbt/macros/.gitkeep | 0
dbt/macros/tag_regions.sql | 67 +++++++++++++++++++
dbt/models/acs_block_group.sql | 23 +++++++
dbt/models/acs_tract.sql | 23 +++++++
dbt/models/census_block_groups.sql | 53 +++++++++++++++
dbt/models/census_tracts.sql | 26 +++++++
dbt/models/commercial_permits.sql | 13 ++++
dbt/models/commercial_permits_base.sql | 18 +++++
dbt/models/commercial_permits_to_parcels.sql | 21 ++++++
dbt/models/fair_market_rents.sql | 32 +++++++++
dbt/models/parcels.sql | 21 ++++++
dbt/models/parcels_base.sql | 33 +++++++++
dbt/models/parcels_to_census_block_groups.sql | 21 ++++++
dbt/models/parcels_to_zip_codes.sql | 21 ++++++
dbt/models/residential_permits.sql | 13 ++++
dbt/models/residential_permits_base.sql | 25 +++++++
dbt/models/residential_permits_to_parcels.sql | 21 ++++++
dbt/models/usps_migration.sql | 44 ++++++++++++
dbt/models/zip_codes.sql | 20 ++++++
dbt/package-lock.yml | 4 ++
dbt/packages.yml | 3 +
dbt/seeds/.gitkeep | 0
dbt/seeds/region_tag_type.csv | 4 ++
dbt/snapshots/.gitkeep | 0
dbt/tests/.gitkeep | 0
29 files changed, 552 insertions(+)
create mode 100644 dbt/.gitignore
create mode 100644 dbt/README.md
create mode 100644 dbt/analyses/.gitkeep
create mode 100644 dbt/dbt_project.yml
create mode 100644 dbt/macros/.gitkeep
create mode 100644 dbt/macros/tag_regions.sql
create mode 100644 dbt/models/acs_block_group.sql
create mode 100644 dbt/models/acs_tract.sql
create mode 100644 dbt/models/census_block_groups.sql
create mode 100644 dbt/models/census_tracts.sql
create mode 100644 dbt/models/commercial_permits.sql
create mode 100644 dbt/models/commercial_permits_base.sql
create mode 100644 dbt/models/commercial_permits_to_parcels.sql
create mode 100644 dbt/models/fair_market_rents.sql
create mode 100644 dbt/models/parcels.sql
create mode 100644 dbt/models/parcels_base.sql
create mode 100644 dbt/models/parcels_to_census_block_groups.sql
create mode 100644 dbt/models/parcels_to_zip_codes.sql
create mode 100644 dbt/models/residential_permits.sql
create mode 100644 dbt/models/residential_permits_base.sql
create mode 100644 dbt/models/residential_permits_to_parcels.sql
create mode 100644 dbt/models/usps_migration.sql
create mode 100644 dbt/models/zip_codes.sql
create mode 100644 dbt/package-lock.yml
create mode 100644 dbt/packages.yml
create mode 100644 dbt/seeds/.gitkeep
create mode 100644 dbt/seeds/region_tag_type.csv
create mode 100644 dbt/snapshots/.gitkeep
create mode 100644 dbt/tests/.gitkeep
diff --git a/dbt/.gitignore b/dbt/.gitignore
new file mode 100644
index 00000000..23e952a5
--- /dev/null
+++ b/dbt/.gitignore
@@ -0,0 +1,3 @@
+target/
+dbt_packages/
+logs/
\ No newline at end of file
diff --git a/dbt/README.md b/dbt/README.md
new file mode 100644
index 00000000..7874ac84
--- /dev/null
+++ b/dbt/README.md
@@ -0,0 +1,15 @@
+Welcome to your new dbt project!
+
+### Using the starter project
+
+Try running the following commands:
+- dbt run
+- dbt test
+
+
+### Resources:
+- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
+- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
+- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
+- Find [dbt events](https://events.getdbt.com) near you
+- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
diff --git a/dbt/analyses/.gitkeep b/dbt/analyses/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml
new file mode 100644
index 00000000..e4b65a64
--- /dev/null
+++ b/dbt/dbt_project.yml
@@ -0,0 +1,28 @@
+
+# Name your project! Project names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'cities'
+version: '1.0.0'
+
+# This setting configures which "profile" dbt uses for this project.
+profile: 'cities'
+
+# These configurations specify where dbt should look for different types of files.
+# The `model-paths` config, for example, states that models in this project can be
+# found in the "models/" directory. You probably won't need to change these!
+model-paths: ["models"]
+analysis-paths: ["analyses"]
+test-paths: ["tests"]
+seed-paths: ["seeds"]
+macro-paths: ["macros"]
+snapshot-paths: ["snapshots"]
+
+clean-targets: # directories to be removed by `dbt clean`
+ - "target"
+ - "dbt_packages"
+
+
+vars:
+ # years for which we have census tract/block group data
+ census_years: [2010, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]
diff --git a/dbt/macros/.gitkeep b/dbt/macros/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/dbt/macros/tag_regions.sql b/dbt/macros/tag_regions.sql
new file mode 100644
index 00000000..a9c64bae
--- /dev/null
+++ b/dbt/macros/tag_regions.sql
@@ -0,0 +1,67 @@
+-- Tag regions with their containing/most intersecting/closest parent regions.
+-- child_table: table with the child regions
+-- parent_table: table with the parent regions
+-- max_distance: maximum distance to consider a region as a parent (meters)
+{% macro tag_regions(child_table, parent_table, max_distance=100) %}
+(
+with child as (
+ select * from {{child_table}}
+)
+, parent as (
+ select * from {{parent_table}}
+)
+, within as (
+ select child.id as child_id
+ , parent.id as parent_id
+ , child.valid * parent.valid as valid
+ from
+ child
+ inner join parent
+ on ST_Within (child.geom, parent.geom)
+ and child.valid && parent.valid
+)
+, not_within as (
+ select * from child
+ where not exists (select child_id from within where child_id = id)
+)
+, largest_overlap as (
+ select distinct on (child.id)
+ child.id as child_id
+ , parent.id as parent_id
+ , child.valid * parent.valid as valid
+ from
+ not_within as child
+ inner join parent
+ on ST_Intersects (child.geom, parent.geom)
+ and child.valid && parent.valid
+ order by
+ child_id,
+ ST_Area (ST_Intersection (child.geom, parent.geom)) desc
+)
+, no_overlap as (
+ select * from not_within
+ where not exists (
+ select child_id from largest_overlap where child_id = id
+ )
+)
+, closest as (
+ select distinct on (child.id)
+ child.id as child_id
+ , parent.id as parent_id
+ , child.valid * parent.valid as valid
+ from
+ no_overlap as child
+ inner join parent
+ on child.valid && parent.valid
+ and ST_DWithin (child.geom, parent.geom, {{max_distance}})
+ order by
+ child_id,
+ ST_Distance (child.geom, parent.geom)
+)
+select *, 'within' as type_ from within
+union all
+select *, 'most_overlap' as type_ from largest_overlap
+union all
+select *, 'closest' as type_ from closest
+)
+{% endmacro %}
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
new file mode 100644
index 00000000..382d69d5
--- /dev/null
+++ b/dbt/models/acs_block_group.sql
@@ -0,0 +1,23 @@
+with
+census_block_groups as (
+ select
+ census_block_group_id
+ , statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , valid
+ from
+ {{ ref('census_block_groups') }}
+)
+
+select
+ census_block_group_id
+ , year_
+ , name_
+ , value_
+from
+ acs_bg_raw
+ inner join census_block_groups using (statefp, countyfp, tractce, blkgrpce)
+where
+ to_date(acs_bg_raw.year_::text , 'YYYY') <@ census_block_groups.valid
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
new file mode 100644
index 00000000..f71b7088
--- /dev/null
+++ b/dbt/models/acs_tract.sql
@@ -0,0 +1,23 @@
+with
+census_tracts as (
+ select
+ census_tract_id
+ , statefp
+ , countyfp
+ , tractce
+ , valid
+
+ from {{ ref("census_tracts") }}
+)
+
+select
+ census_tract_id
+ , acs_tract_raw.year_
+ , acs_tract_raw.name_
+ , acs_tract_raw.value_
+from
+ acs_tract_raw
+ inner join census_tracts
+ using (statefp, countyfp, tractce)
+ where
+ to_date(acs_tract_raw.year_::text , 'YYYY') <@ census_tracts.valid
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
new file mode 100644
index 00000000..766a2c7c
--- /dev/null
+++ b/dbt/models/census_block_groups.sql
@@ -0,0 +1,53 @@
+with
+census_tracts as (
+ select
+ census_tract_id
+ , statefp
+ , countyfp
+ , tractce
+ , valid
+ from {{ ref("census_tracts") }}
+),
+census_block_groups as (
+ {% for year_ in var('census_years') %}
+ select
+ {{ 'statefp' if year_ >= 2013 else 'state' }} as statefp
+ , {{ 'countyfp' if year_ >= 2013 else 'county' }} as countyfp
+ , {{ 'tractce' if year_ >= 2013 else 'tract' }} as tractce
+ , {{ 'blkgrpce' if year_ >= 2013 else 'blkgrp' }} as blkgrpce
+ , {{ 'geoidfq' if year_ >= 2023 else
+ 'affgeoid' if year_ >= 2013 else
+ 'geo_id' }} as geoidfq
+ , '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
+ , geom
+ from
+ minneapolis.cb_{{ year_ }}_27_bg_500k
+ {% if not loop.last %}union all{% endif %}
+ {% endfor %}
+),
+census_block_groups_with_tracts as (
+ select
+ census_block_groups.statefp
+ , census_block_groups.countyfp
+ , census_block_groups.tractce
+ , census_block_groups.blkgrpce
+ , census_block_groups.geoidfq
+ , census_tracts.census_tract_id
+ , (census_block_groups.valid * census_tracts.valid) as valid
+ , census_block_groups.geom
+ from census_block_groups
+ inner join census_tracts using (statefp , countyfp , tractce)
+ where
+ census_tracts.valid && census_block_groups.valid
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_block_group_id
+ , statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , geoidfq
+ , census_tract_id
+ , valid
+ , geom
+from census_block_groups_with_tracts
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
new file mode 100644
index 00000000..c6c40cff
--- /dev/null
+++ b/dbt/models/census_tracts.sql
@@ -0,0 +1,26 @@
+with census_tracts as (
+{% for year_ in var('census_years') %}
+select
+ {{ 'statefp' if year_ >= 2013 else 'state' }} as statefp
+ , {{ 'countyfp' if year_ >= 2013 else 'county' }} as countyfp
+ , {{ 'tractce' if year_ >= 2013 else 'tract' }} as tractce
+ , {{ 'geoidfq' if year_ >= 2023 else
+ 'affgeoid' if year_ >= 2013 else
+ 'geo_id' }} as geoidfq
+ , '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
+ , geom
+from
+ minneapolis.cb_{{ year_ }}_27_tract_500k
+{% if not loop.last %}union all{% endif %}
+{% endfor %}
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_tract_id
+ , statefp
+ , countyfp
+ , tractce
+ , geoidfq
+ , valid
+ , geom
+from
+ census_tracts
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
new file mode 100644
index 00000000..d3eb1f74
--- /dev/null
+++ b/dbt/models/commercial_permits.sql
@@ -0,0 +1,13 @@
+with
+commercial_permits_to_parcels as (
+ select
+ commercial_permit_id
+ , parcel_id
+ from {{ ref("commercial_permits_to_parcels") }}
+)
+select
+ {{ dbt_utils.star(ref('commercial_permits_base')) }}
+ , parcel_id
+from
+ {{ ref('commercial_permits_base') }}
+ left join commercial_permits_to_parcels using (commercial_permit_id)
diff --git a/dbt/models/commercial_permits_base.sql b/dbt/models/commercial_permits_base.sql
new file mode 100644
index 00000000..100bdc32
--- /dev/null
+++ b/dbt/models/commercial_permits_base.sql
@@ -0,0 +1,18 @@
+select
+ sde_id as commercial_permit_id
+ , year::int as year_
+ , nonres_gro as group_
+ , nonres_sub as subgroup
+ , nonres_typ as type_category
+ , bldg_name as building_name
+ , bldg_desc as building_description
+ , permit_typ as permit_type
+ , permit_val as permit_value
+ , sqf as square_feet
+ , address
+ , geom
+ from
+ commercial_permits_raw
+ where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/commercial_permits_to_parcels.sql b/dbt/models/commercial_permits_to_parcels.sql
new file mode 100644
index 00000000..7c31e7ca
--- /dev/null
+++ b/dbt/models/commercial_permits_to_parcels.sql
@@ -0,0 +1,21 @@
+with
+commercial_permits as (
+ select
+ commercial_permit_id as id
+ , daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
+ , geom
+ from {{ ref("commercial_permits_base") }}
+)
+, parcels as (
+ select
+ parcel_id as id
+ , valid
+ , geom
+ from {{ ref("parcels") }}
+)
+select
+ child_id as commercial_permit_id
+ , parent_id as parcel_id
+ , valid
+ , type_
+from {{ tag_regions("commercial_permits", "parcels") }}
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
new file mode 100644
index 00000000..605f040c
--- /dev/null
+++ b/dbt/models/fair_market_rents.sql
@@ -0,0 +1,32 @@
+{% set num_bedrooms = range(0, 5) %}
+
+with
+zip_codes as (
+ select
+ zip_code_id
+ , zip_code
+ , valid
+ from {{ ref('zip_codes') }}
+)
+, fmr_zip as (
+ select
+ zip_codes.zip_code_id
+ {% for bedroom in num_bedrooms %}
+ , fair_market_rents_raw.rent_br{{ bedroom }}
+ {% endfor %}
+ , fair_market_rents_raw.year_
+ from
+ fair_market_rents_raw
+ inner join zip_codes
+ on zip_codes.zip_code = fair_market_rents_raw.zip
+ and zip_codes.valid @> to_date(year_::text , 'YYYY')
+)
+{% for bedroom in num_bedrooms %}
+select
+ zip_code_id
+ , rent_br{{ bedroom }} as rent
+ , 0 as num_bedrooms
+ , year_
+from fmr_zip
+{% if not loop.last %} union all {% endif %}
+{% endfor %}
diff --git a/dbt/models/parcels.sql b/dbt/models/parcels.sql
new file mode 100644
index 00000000..f3482927
--- /dev/null
+++ b/dbt/models/parcels.sql
@@ -0,0 +1,21 @@
+with
+parcels_to_zip_codes as (
+ select
+ parcel_id
+ , zip_code_id
+ from {{ref('parcels_to_zip_codes')}}
+),
+parcels_to_census_block_groups as (
+ select
+ parcel_id
+ , census_block_group_id
+ from {{ref('parcels_to_census_block_groups')}}
+)
+select
+ {{ dbt_utils.star(ref('parcels_base')) }}
+ , zip_code_id
+ , census_block_group_id
+from
+ {{ ref('parcels_base') }}
+ left join parcels_to_zip_codes using (parcel_id)
+ left join parcels_to_census_block_groups using (parcel_id)
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
new file mode 100644
index 00000000..f1cdbf36
--- /dev/null
+++ b/dbt/models/parcels_base.sql
@@ -0,0 +1,33 @@
+{% set years = range(2002, 2024) %}
+{% set city = 'MINNEAPOLIS' %}
+{% set county_id = '053' %}
+
+with parcels as (
+ {% for year_ in years %}
+ select
+ replace(pin, '{{ county_id }}-', '') as pin,
+ '[{{ year_ - 1 }}-01-01,{{ year_ }}-01-01)'::daterange as valid,
+ nullif(emv_land, 0) as emv_land,
+ nullif(emv_bldg, 0) as emv_bldg,
+ nullif(emv_total, 0) as emv_total,
+ nullif(year_built, 0) as year_built,
+ sale_date,
+ nullif(sale_value, 0) as sale_value,
+ geom
+ from minneapolis.parcels{{ year_ }}hennepin
+ where upper({{ "city" if year_ < 2018 else "ctu_name" }}) = '{{ city }}'
+ {% if not loop.last %}union all{% endif %}
+ {% endfor %}
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['pin', 'valid']) }} as parcel_id
+ , pin
+ , valid
+ , emv_land
+ , emv_bldg
+ , emv_total
+ , year_built
+ , sale_date
+ , sale_value
+ , geom
+from parcels
diff --git a/dbt/models/parcels_to_census_block_groups.sql b/dbt/models/parcels_to_census_block_groups.sql
new file mode 100644
index 00000000..2f2bd0f8
--- /dev/null
+++ b/dbt/models/parcels_to_census_block_groups.sql
@@ -0,0 +1,21 @@
+with
+parcels as (
+ select
+ parcel_id as id
+ , valid
+ , ST_Transform(geom, 4269) as geom
+ from {{ ref("parcels_base") }}
+),
+census_block_groups as (
+ select
+ census_block_group_id as id
+ , valid
+ , geom
+ from {{ ref("census_block_groups") }}
+)
+select
+ child_id as parcel_id
+ , parent_id as census_block_group_id
+ , valid
+ , type_
+from {{ tag_regions("parcels", "census_block_groups") }}
diff --git a/dbt/models/parcels_to_zip_codes.sql b/dbt/models/parcels_to_zip_codes.sql
new file mode 100644
index 00000000..aac320c0
--- /dev/null
+++ b/dbt/models/parcels_to_zip_codes.sql
@@ -0,0 +1,21 @@
+with
+parcels as (
+ select
+ parcel_id as id
+ , valid
+ , ST_Transform(geom, 4269) as geom
+ from {{ ref("parcels_base") }}
+),
+zip_codes as (
+ select
+ zip_code_id as id
+ , valid
+ , geom
+ from {{ ref("zip_codes") }}
+)
+select
+ child_id as parcel_id
+ , parent_id as zip_code_id
+ , valid
+ , type_
+from {{ tag_regions("parcels", "zip_codes") }}
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
new file mode 100644
index 00000000..869e41d9
--- /dev/null
+++ b/dbt/models/residential_permits.sql
@@ -0,0 +1,13 @@
+with
+residential_permits_to_parcels as (
+ select
+ residential_permit_id
+ , parcel_id
+ from {{ ref("residential_permits_to_parcels") }}
+)
+select
+ {{ dbt_utils.star(ref('residential_permits_base')) }}
+ , parcel_id
+from
+ {{ ref('residential_permits_base') }}
+ left join residential_permits_to_parcels using (residential_permit_id)
diff --git a/dbt/models/residential_permits_base.sql b/dbt/models/residential_permits_base.sql
new file mode 100644
index 00000000..a5bf8e0b
--- /dev/null
+++ b/dbt/models/residential_permits_base.sql
@@ -0,0 +1,25 @@
+select
+ sde_id as residential_permit_id
+ , year::int as year_
+ , tenure
+ , housing_ty as housing_type
+ , res_permit as permit_type
+ , address
+ , name as name_
+ , buildings as num_buildings
+ , units as num_units
+ , age_restri as num_age_restricted_units
+ , memory_car as num_memory_care_units
+ , assisted as num_assisted_living_units
+ , com_off_re = 'Y' as is_commercial_and_residential
+ , sqf as square_feet
+ , public_fun = 'Y' as is_public_funded
+ , permit_val as permit_value
+ , community_ as community_designation
+ , notes
+ , geom
+from
+ residential_permits_raw
+where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/residential_permits_to_parcels.sql b/dbt/models/residential_permits_to_parcels.sql
new file mode 100644
index 00000000..2c90dc32
--- /dev/null
+++ b/dbt/models/residential_permits_to_parcels.sql
@@ -0,0 +1,21 @@
+with
+residential_permits as (
+ select
+ residential_permit_id as id
+ , daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
+ , geom
+ from {{ ref("residential_permits_base") }}
+)
+, parcels as (
+ select
+ parcel_id as id
+ , valid
+ , geom
+ from {{ ref("parcels") }}
+)
+select
+ child_id as residential_permit_id
+ , parent_id as parcel_id
+ , valid
+ , type_
+from {{ tag_regions("residential_permits", "parcels") }}
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
new file mode 100644
index 00000000..031446ab
--- /dev/null
+++ b/dbt/models/usps_migration.sql
@@ -0,0 +1,44 @@
+{% set usps_migration_flow_types = ['business', 'family', 'individual', 'perm', 'temp'] %}
+{% set usps_migration_flow_directions = ['from', 'to'] %}
+
+with process_date as (
+ select to_date(yyyymm, 'YYYYMM') as date_, *
+ from usps_migration_raw
+)
+, zip_codes as (
+ select
+ zip_code_id
+ , zip_code
+ , valid
+ from
+ {{ ref('zip_codes') }}
+)
+, add_zip_id as (
+ select zip_code_id, process_date.*
+ from
+ process_date
+ inner join zip_codes
+ on zip_codes.zip_code = replace(process_date.zip_code, '=', '')
+ and process_date.date_ <@ zip_codes.valid
+)
+{% for flow_direction in usps_migration_flow_directions %}
+ select
+ date_
+ , zip_code_id
+ , '{{ flow_direction }}' as flow_direction
+ , 'total' as flow_type
+ , total_{{ flow_direction }}_zip as flow_value
+ from add_zip_id
+ union all
+ {% for flow_type in usps_migration_flow_types %}
+ select
+ date_
+ , zip_code_id
+ , '{{ flow_direction }}' as flow_direction
+ , '{{ flow_type }}' as flow_type
+ , total_{{ flow_direction }}_zip_{{ flow_type }} as flow_value
+ from add_zip_id
+ {% if not loop.last %} union all {% endif %}
+ {% endfor %}
+{% if not loop.last %} union all {% endif %}
+{% endfor %}
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
new file mode 100644
index 00000000..048f82f9
--- /dev/null
+++ b/dbt/models/zip_codes.sql
@@ -0,0 +1,20 @@
+with
+zip_codes as (
+select
+ zcta5ce20 as zip_code,
+ '[2020-01-01,)'::daterange as valid,
+ geom
+from zip_raw_2020
+union all
+select
+ zcta as zip_code,
+ '[2000-01-01,2020-01-01)'::daterange as valid,
+ ST_Transform(geom, 4269) as geom
+from zip_raw_2000
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id
+ , zip_code
+ , valid
+ , geom
+from zip_codes
diff --git a/dbt/package-lock.yml b/dbt/package-lock.yml
new file mode 100644
index 00000000..feb1453d
--- /dev/null
+++ b/dbt/package-lock.yml
@@ -0,0 +1,4 @@
+packages:
+ - git: https://github.com/dbt-labs/dbt-utils.git
+ revision: 85ade29c3e69bed3a13812c716c19eea9a0551c4
+sha1_hash: c4c136ad4314bafcbe374c3b08b8711f1da046b7
diff --git a/dbt/packages.yml b/dbt/packages.yml
new file mode 100644
index 00000000..4f2aa773
--- /dev/null
+++ b/dbt/packages.yml
@@ -0,0 +1,3 @@
+packages:
+ - git: "https://github.com/dbt-labs/dbt-utils.git"
+ revision: 1.2.0
diff --git a/dbt/seeds/.gitkeep b/dbt/seeds/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/dbt/seeds/region_tag_type.csv b/dbt/seeds/region_tag_type.csv
new file mode 100644
index 00000000..a85bccfd
--- /dev/null
+++ b/dbt/seeds/region_tag_type.csv
@@ -0,0 +1,4 @@
+type_
+within
+most_overlap
+closest
diff --git a/dbt/snapshots/.gitkeep b/dbt/snapshots/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/dbt/tests/.gitkeep b/dbt/tests/.gitkeep
new file mode 100644
index 00000000..e69de29b
From 6b43cc1a90dfd10b4b29e762c8acc9e6ee28fc98 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 10:20:28 -0400
Subject: [PATCH 032/142] remove unused seed
---
dbt/seeds/region_tag_type.csv | 4 ----
1 file changed, 4 deletions(-)
delete mode 100644 dbt/seeds/region_tag_type.csv
diff --git a/dbt/seeds/region_tag_type.csv b/dbt/seeds/region_tag_type.csv
deleted file mode 100644
index a85bccfd..00000000
--- a/dbt/seeds/region_tag_type.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-type_
-within
-most_overlap
-closest
From 0eaf473eab17974cef59f8c974ffd90ee08d5fb4 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 10:20:39 -0400
Subject: [PATCH 033/142] add segregation index model
---
dbt/macros/safe_divide.sql | 3 +
dbt/models/segregation_indexes.sql | 110 ++++++++++++++++++++++++++++
dbt/seeds/population_categories.csv | 9 +++
3 files changed, 122 insertions(+)
create mode 100644 dbt/macros/safe_divide.sql
create mode 100644 dbt/models/segregation_indexes.sql
create mode 100644 dbt/seeds/population_categories.csv
diff --git a/dbt/macros/safe_divide.sql b/dbt/macros/safe_divide.sql
new file mode 100644
index 00000000..7d1d5723
--- /dev/null
+++ b/dbt/macros/safe_divide.sql
@@ -0,0 +1,3 @@
+{% macro safe_divide(num, dem) %}
+ (case when {{ dem }} = 0 then 0 else {{ num }} / {{ dem }} end)
+{% endmacro %}
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
new file mode 100644
index 00000000..2d2c3cac
--- /dev/null
+++ b/dbt/models/segregation_indexes.sql
@@ -0,0 +1,110 @@
+-- Segregation index for each tract for each year, computed for each reference
+-- distribution.
+--
+-- The segregation index is the KL-divergence between the distribution of
+-- population in a tract and a reference distribution. For example, a tract that
+-- has many more white people than the average for the city will have a high
+-- segregation index for the 'average_city' distribution.
+with
+ categories as (
+ select category from {{ ref("population_categories") }}
+ )
+ , acs_tract as (
+ select
+ census_tract_id
+ , year_
+ , name_
+ , value_
+ from {{ ref("acs_tract") }}
+ )
+ , pop_tyc as
+ ( -- Population by tract, year, and category
+ select acs_tract.census_tract_id, acs_tract.year_, categories.category, acs_tract.value_
+ from acs_tract
+ join acs_variable using (name_)
+ join categories on categories.category = acs_variable.description
+ ),
+ pop_ty as
+ ( -- Population by tract and year (note: using 'population' variable instead of aggregating categories)
+ select census_tract_id, year_, value_
+ from acs_tract join acs_variable using (name_)
+ where acs_variable.description = 'population'
+ ),
+ pop_yc as
+ ( -- Population by year and category
+ select year_, category, sum(value_) as value_
+ from pop_tyc
+ group by year_, category
+ ),
+ pop_y as
+ ( -- Population by year
+ select year_, sum(value_) as value_
+ from pop_ty
+ group by year_
+ ),
+ dist_yc as
+ ( -- Distribution of population by year and category
+ select
+ pop_yc.year_,
+ pop_yc.category,
+ {{ safe_divide('pop_yc.value_', 'pop_y.value_') }} as value_
+ from pop_yc
+ inner join pop_y using (year_)
+ ),
+ dist_tyc as
+ ( -- Distribution of population by tract, year, and category
+ select
+ pop_tyc.census_tract_id,
+ pop_tyc.year_,
+ pop_tyc.category,
+ {{ safe_divide('pop_tyc.value_', 'pop_ty.value_') }} as value_
+ from pop_tyc
+ inner join pop_ty using (year_, census_tract_id)
+ ),
+ uniform_dist as
+ ( -- Uniform distribution across categories
+ with n_cat as (select count(*) as n_cat from categories)
+ select category, 1.0 / n_cat as value_
+ from categories, n_cat
+ ),
+ average_dist as
+ ( -- Average of the annual citywide distributions
+ select category, avg(value_) as value_
+ from dist_yc
+ group by category
+ )
+select
+ census_tract_id,
+ year_,
+ dist as distribution,
+ sum(case when p = 0 or q = 0 then 0 else p * ln(p / q) end) as segregation_index
+from
+ (
+ select
+ dist_tyc.census_tract_id,
+ dist_tyc.year_,
+ dist_tyc.value_ as p,
+ uniform_dist.value_ as q,
+ 'uniform' as dist
+ from dist_tyc
+ inner join uniform_dist using (category)
+ union all
+ select
+ dist_tyc.census_tract_id,
+ dist_tyc.year_,
+ dist_tyc.value_ as p,
+ dist_yc.value_ as q,
+ 'annual_city' as dist
+ from dist_tyc
+ inner join dist_yc using (year_, category)
+ union all
+ select
+ dist_tyc.census_tract_id,
+ dist_tyc.year_,
+ dist_tyc.value_ as p,
+ average_dist.value_ as q,
+ 'average_city' as dist
+ from dist_tyc
+ inner join average_dist using (category)
+ )
+group by census_tract_id, year_, dist
diff --git a/dbt/seeds/population_categories.csv b/dbt/seeds/population_categories.csv
new file mode 100644
index 00000000..79e93b14
--- /dev/null
+++ b/dbt/seeds/population_categories.csv
@@ -0,0 +1,9 @@
+category
+population_white_non_hispanic
+population_black_non_hispanic
+population_hispanic_or_latino
+population_asian_non_hispanic
+population_native_hawaiian_or_pacific_islander_non_hispanic
+population_american_indian_or_alaska_native_non_hispanic
+population_multiple_races_non_hispanic
+population_other_non_hispanic
From bcd84152182d2fc84ed754dde6042a9b3022fe6d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:00:56 -0400
Subject: [PATCH 034/142] use 2010 census tract/bg data for all years before
2013
---
dbt/models/census_block_groups.sql | 27 +++++++++++++++++----------
dbt/models/census_tracts.sql | 20 +++++++++++++-------
2 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index 766a2c7c..cad69186 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -11,17 +11,24 @@ census_tracts as (
census_block_groups as (
{% for year_ in var('census_years') %}
select
- {{ 'statefp' if year_ >= 2013 else 'state' }} as statefp
- , {{ 'countyfp' if year_ >= 2013 else 'county' }} as countyfp
- , {{ 'tractce' if year_ >= 2013 else 'tract' }} as tractce
- , {{ 'blkgrpce' if year_ >= 2013 else 'blkgrp' }} as blkgrpce
- , {{ 'geoidfq' if year_ >= 2023 else
- 'affgeoid' if year_ >= 2013 else
- 'geo_id' }} as geoidfq
- , '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
- , geom
+ {% if year_ == 2010 %}
+ state as statefp
+ , county countyfp
+ , tract as tractce
+ , blkgrp as blkgrpce
+ , geo_id as geoidfq
+ , '[,2013-01-01)'::daterange as valid -- use 2010 data for all years before 2013
+ {% else %}
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
+ , '[{{ year_ }}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
+ {% endif %}
+ , geom
from
- minneapolis.cb_{{ year_ }}_27_bg_500k
+ minneapolis.cb_{{ year_ }}_27_bg_500k
{% if not loop.last %}union all{% endif %}
{% endfor %}
),
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index c6c40cff..5d48b46b 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -1,13 +1,19 @@
with census_tracts as (
-{% for year_ in var('census_years') %}
+ {% for year_ in var('census_years') %}
select
- {{ 'statefp' if year_ >= 2013 else 'state' }} as statefp
- , {{ 'countyfp' if year_ >= 2013 else 'county' }} as countyfp
- , {{ 'tractce' if year_ >= 2013 else 'tract' }} as tractce
- , {{ 'geoidfq' if year_ >= 2023 else
- 'affgeoid' if year_ >= 2013 else
- 'geo_id' }} as geoidfq
+ {% if year_ == 2010 %}
+ state as statefp
+ , county as countyfp
+ , tract as tractce
+ , geo_id as geoidfq
+ , '[,2013-01-01)'::daterange as valid
+ {% else %}
+ statefp
+ , countyfp
+ , tractce
+ , {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
+{% endif %}
, geom
from
minneapolis.cb_{{ year_ }}_27_tract_500k
From f7e78b7a5c2d6efd13b11eb8667e55660ed874eb Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:01:31 -0400
Subject: [PATCH 035/142] split zip codes into three models and aggregate
regions to avoid duplicates
---
dbt/models/zip_codes.sql | 8 ++++----
dbt/models/zip_codes_2000.sql | 6 ++++++
dbt/models/zip_codes_2020.sql | 4 ++++
3 files changed, 14 insertions(+), 4 deletions(-)
create mode 100644 dbt/models/zip_codes_2000.sql
create mode 100644 dbt/models/zip_codes_2020.sql
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 048f82f9..4e75d2a7 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -1,16 +1,16 @@
with
zip_codes as (
select
- zcta5ce20 as zip_code,
+ zip_code,
'[2020-01-01,)'::daterange as valid,
geom
-from zip_raw_2020
+from {{ ref('zip_codes_2020') }}
union all
select
- zcta as zip_code,
+ zip_code,
'[2000-01-01,2020-01-01)'::daterange as valid,
ST_Transform(geom, 4269) as geom
-from zip_raw_2000
+from {{ ref('zip_codes_2000') }}
)
select
{{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id
diff --git a/dbt/models/zip_codes_2000.sql b/dbt/models/zip_codes_2000.sql
new file mode 100644
index 00000000..fd9219ee
--- /dev/null
+++ b/dbt/models/zip_codes_2000.sql
@@ -0,0 +1,6 @@
+select
+ zcta as zip_code,
+ ST_Union(geom) as geom
+from
+ zip_raw_2000
+group by zcta
diff --git a/dbt/models/zip_codes_2020.sql b/dbt/models/zip_codes_2020.sql
new file mode 100644
index 00000000..2bbc29e7
--- /dev/null
+++ b/dbt/models/zip_codes_2020.sql
@@ -0,0 +1,4 @@
+select
+ zcta5ce20 as zip_code,
+ geom
+from zip_raw_2020
From 43675a22a79cbcb50ecef9e4599cf1c689eb6bc9 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:02:26 -0400
Subject: [PATCH 036/142] switch to package syntax
---
dbt/package-lock.yml | 6 +++---
dbt/packages.yml | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/dbt/package-lock.yml b/dbt/package-lock.yml
index feb1453d..5e486a0d 100644
--- a/dbt/package-lock.yml
+++ b/dbt/package-lock.yml
@@ -1,4 +1,4 @@
packages:
- - git: https://github.com/dbt-labs/dbt-utils.git
- revision: 85ade29c3e69bed3a13812c716c19eea9a0551c4
-sha1_hash: c4c136ad4314bafcbe374c3b08b8711f1da046b7
+ - package: dbt-labs/dbt_utils
+ version: 1.2.0
+sha1_hash: d4f259856543b0ef301e0b3b0bbc94ccb6b12a54
diff --git a/dbt/packages.yml b/dbt/packages.yml
index 4f2aa773..b9609fcb 100644
--- a/dbt/packages.yml
+++ b/dbt/packages.yml
@@ -1,3 +1,3 @@
packages:
- - git: "https://github.com/dbt-labs/dbt-utils.git"
- revision: 1.2.0
+ - package: dbt-labs/dbt_utils
+ version: 1.2.0
From 31512934911c3a7bd3598f6adc96d47b75ca3fd9 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:03:27 -0400
Subject: [PATCH 037/142] improve performance of region tagging by adding
indexes
---
dbt/macros/tag_regions.sql | 6 ++++--
dbt/models/census_block_groups.sql | 11 +++++++++++
dbt/models/parcels_base.sql | 13 ++++++++++++-
dbt/models/parcels_to_census_block_groups.sql | 10 ++++++++++
dbt/models/parcels_to_zip_codes.sql | 6 ++++++
dbt/models/zip_codes.sql | 10 ++++++++++
6 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/dbt/macros/tag_regions.sql b/dbt/macros/tag_regions.sql
index a9c64bae..ae76c040 100644
--- a/dbt/macros/tag_regions.sql
+++ b/dbt/macros/tag_regions.sql
@@ -4,10 +4,12 @@
-- max_distance: maximum distance to consider a region as a parent (meters)
{% macro tag_regions(child_table, parent_table, max_distance=100) %}
(
-with child as (
+-- the not materialized keyword allows us to use indexes on the child and parent
+-- tables
+with child as not materialized (
select * from {{child_table}}
)
-, parent as (
+, parent as not materialized (
select * from {{parent_table}}
)
, within as (
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index cad69186..20f3a089 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -1,3 +1,14 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_block_group_id'], 'unique': true},
+ {'columns': ['geom'], 'type': 'gist'},
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
with
census_tracts as (
select
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index f1cdbf36..904cd6bf 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id'], 'unique': true},
+ {'columns': ['geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
{% set years = range(2002, 2024) %}
{% set city = 'MINNEAPOLIS' %}
{% set county_id = '053' %}
@@ -5,6 +15,7 @@
with parcels as (
{% for year_ in years %}
select
+ ogc_fid,
replace(pin, '{{ county_id }}-', '') as pin,
'[{{ year_ - 1 }}-01-01,{{ year_ }}-01-01)'::daterange as valid,
nullif(emv_land, 0) as emv_land,
@@ -20,7 +31,7 @@ with parcels as (
{% endfor %}
)
select
- {{ dbt_utils.generate_surrogate_key(['pin', 'valid']) }} as parcel_id
+ {{ dbt_utils.generate_surrogate_key(['ogc_fid', 'valid']) }} as parcel_id
, pin
, valid
, emv_land
diff --git a/dbt/models/parcels_to_census_block_groups.sql b/dbt/models/parcels_to_census_block_groups.sql
index 2f2bd0f8..9215c72d 100644
--- a/dbt/models/parcels_to_census_block_groups.sql
+++ b/dbt/models/parcels_to_census_block_groups.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id']},
+ {'columns': ['census_block_group_id']}
+ ]
+ )
+}}
+
with
parcels as (
select
diff --git a/dbt/models/parcels_to_zip_codes.sql b/dbt/models/parcels_to_zip_codes.sql
index aac320c0..f97afcad 100644
--- a/dbt/models/parcels_to_zip_codes.sql
+++ b/dbt/models/parcels_to_zip_codes.sql
@@ -1,3 +1,9 @@
+{{
+ config(
+ materialized='table'
+ )
+}}
+
with
parcels as (
select
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 4e75d2a7..48180e1d 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['zip_code_id'], 'unique': true},
+ {'columns': ['geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
with
zip_codes as (
select
From 50fc6f28f7befd5a2b3bb343117232f28ed7010d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:04:05 -0400
Subject: [PATCH 038/142] add data tests
---
dbt/models/schema.yml | 181 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 181 insertions(+)
create mode 100644 dbt/models/schema.yml
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
new file mode 100644
index 00000000..eb57b33f
--- /dev/null
+++ b/dbt/models/schema.yml
@@ -0,0 +1,181 @@
+models:
+ - name: census_tracts
+ columns:
+ - name: census_tract_id
+ data_tests:
+ - unique
+ - not_null
+
+ - name: census_block_groups
+ columns:
+ - name: census_block_group_id
+ data_tests:
+ - unique
+ - not_null
+ - name: census_tract_id
+ data_tests:
+ - relationships:
+ to: ref('census_tracts')
+ field: census_tract_id
+
+ - name: acs_tract
+ data_tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - census_tract_id
+ - year_
+ - name_
+ columns:
+ - name: census_tract_id
+ data_tests:
+ - relationships:
+ to: ref('census_tracts')
+ field: census_tract_id
+
+ - name: acs_block_group
+ data_tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - census_block_group_id
+ - year_
+ - name_
+ columns:
+ - name: census_block_group_id
+ data_tests:
+ - relationships:
+ to: ref('census_block_groups')
+ field: census_block_group_id
+
+ - name: segregation_indexes
+ data_tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - census_tract_id
+ - year_
+ - distribution
+ columns:
+ - name: census_tract_id
+ data_tests:
+ - relationships:
+ to: ref('census_tracts')
+ field: census_tract_id
+
+ - name: parcels
+ columns:
+ - name: parcel_id
+ data_tests:
+ - unique
+ - not_null
+ - name: zip_code_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('zip_codes')
+ field: zip_code_id
+ - name: census_block_group_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('census_block_groups')
+ field: census_block_group_id
+
+ - name: parcels_to_census_block_groups
+ data_tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - parcel_id
+ - census_block_group_id
+ columns:
+ - name: parcel_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('parcels')
+ field: parcel_id
+ - name: census_block_group_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('census_block_groups')
+ field: census_block_group_id
+
+ - name: parcels_to_zip_codes
+ data_tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - parcel_id
+ - zip_code_id
+ columns:
+ - name: parcel_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('parcels')
+ field: parcel_id
+ - name: zip_code_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('zip_codes')
+ field: zip_code_id
+
+ - name: zip_codes_2000
+ columns:
+ - name: zip_code
+ data_tests:
+ - not_null
+ - unique
+
+ - name: zip_codes_2020
+ columns:
+ - name: zip_code
+ data_tests:
+ - not_null
+ - unique
+
+ - name: zip_codes
+ columns:
+ - name: zip_code_id
+ data_tests:
+ - not_null
+ - unique
+
+ - name: usps_migration
+ data_tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - parcel_id
+ - zip_code_id
+ columns:
+ - name: zip_code_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('zip_codes')
+ field: zip_code_id
+
+ - name: commercial_permits
+ columns:
+ - name: commercial_permit_id
+ data_tests:
+ - not_null
+ - unique
+ - name: parcel_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('parcels')
+ field: parcel_id
+
+ - name: residential_permits
+ columns:
+ - name: residential_permit_id
+ data_tests:
+ - not_null
+ - unique
+ - name: parcel_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('parcels')
+ field: parcel_id
From ef8024bf084c98df35ebdc0445ae4303ac542d5b Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:14:57 -0400
Subject: [PATCH 039/142] fix tests
---
dbt/models/schema.yml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index eb57b33f..6ef336f6 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -144,8 +144,10 @@ models:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- - parcel_id
+ - date_
- zip_code_id
+ - flow_direction
+ - flow_type
columns:
- name: zip_code_id
data_tests:
From 46de3ffba1fe68c002e14b83f26b564f388c5b3e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:37:02 -0400
Subject: [PATCH 040/142] add sources when available
---
dbt/models/census_block_groups.sql | 2 +-
dbt/models/census_tracts.sql | 2 +-
dbt/models/city_boundary.sql | 4 ++
dbt/models/neighborhoods.sql | 6 +++
dbt/models/parcels_base.sql | 2 +-
dbt/models/schema.yml | 69 ++++++++++++++++++++++++++++++
dbt/models/wards.sql | 5 +++
7 files changed, 87 insertions(+), 3 deletions(-)
create mode 100644 dbt/models/city_boundary.sql
create mode 100644 dbt/models/neighborhoods.sql
create mode 100644 dbt/models/wards.sql
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index 20f3a089..6c0c31ce 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -39,7 +39,7 @@ census_block_groups as (
{% endif %}
, geom
from
- minneapolis.cb_{{ year_ }}_27_bg_500k
+ {{ source('minneapolis', 'cb_' ~ year_ ~ '_27_bg_500k') }}
{% if not loop.last %}union all{% endif %}
{% endfor %}
),
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 5d48b46b..05a79469 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -16,7 +16,7 @@ select
{% endif %}
, geom
from
- minneapolis.cb_{{ year_ }}_27_tract_500k
+ {{ source('minneapolis', 'cb_' ~ year_ ~ '_27_tract_500k') }}
{% if not loop.last %}union all{% endif %}
{% endfor %}
)
diff --git a/dbt/models/city_boundary.sql b/dbt/models/city_boundary.sql
new file mode 100644
index 00000000..fe44dbe0
--- /dev/null
+++ b/dbt/models/city_boundary.sql
@@ -0,0 +1,4 @@
+select
+ geom
+from
+ {{ source('minneapolis', 'minneapolis_city_boundary') }}
diff --git a/dbt/models/neighborhoods.sql b/dbt/models/neighborhoods.sql
new file mode 100644
index 00000000..9cc596bb
--- /dev/null
+++ b/dbt/models/neighborhoods.sql
@@ -0,0 +1,6 @@
+select
+ bdnum as neighborhood_id
+ , bdname as name_
+ , geom
+from
+ {{ source('minneapolis', 'minneapolis_neighborhoods') }}
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 904cd6bf..f8a6b1f8 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -25,7 +25,7 @@ with parcels as (
sale_date,
nullif(sale_value, 0) as sale_value,
geom
- from minneapolis.parcels{{ year_ }}hennepin
+ from {{ source('minneapolis', 'parcels' ~ year_ ~ 'hennepin') }}
where upper({{ "city" if year_ < 2018 else "ctu_name" }}) = '{{ city }}'
{% if not loop.last %}union all{% endif %}
{% endfor %}
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 6ef336f6..a6b20449 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -1,3 +1,58 @@
+sources:
+ - name: minneapolis
+ database: cities
+ schema: minneapolis
+ tables:
+ - name: parcels2002hennepin
+ - name: parcels2003hennepin
+ - name: parcels2004hennepin
+ - name: parcels2005hennepin
+ - name: parcels2006hennepin
+ - name: parcels2007hennepin
+ - name: parcels2008hennepin
+ - name: parcels2009hennepin
+ - name: parcels2010hennepin
+ - name: parcels2011hennepin
+ - name: parcels2012hennepin
+ - name: parcels2013hennepin
+ - name: parcels2014hennepin
+ - name: parcels2015hennepin
+ - name: parcels2016hennepin
+ - name: parcels2017hennepin
+ - name: parcels2018hennepin
+ - name: parcels2019hennepin
+ - name: parcels2020hennepin
+ - name: parcels2021hennepin
+ - name: parcels2022hennepin
+ - name: parcels2023hennepin
+ - name: cb_2010_27_bg_500k
+ - name: cb_2010_27_tract_500k
+ - name: cb_2013_27_bg_500k
+ - name: cb_2013_27_tract_500k
+ - name: cb_2014_27_bg_500k
+ - name: cb_2014_27_tract_500k
+ - name: cb_2015_27_bg_500k
+ - name: cb_2015_27_tract_500k
+ - name: cb_2016_27_bg_500k
+ - name: cb_2016_27_tract_500k
+ - name: cb_2017_27_bg_500k
+ - name: cb_2017_27_tract_500k
+ - name: cb_2018_27_bg_500k
+ - name: cb_2018_27_tract_500k
+ - name: cb_2019_27_bg_500k
+ - name: cb_2019_27_tract_500k
+ - name: cb_2020_27_bg_500k
+ - name: cb_2020_27_tract_500k
+ - name: cb_2021_27_bg_500k
+ - name: cb_2021_27_tract_500k
+ - name: cb_2022_27_bg_500k
+ - name: cb_2022_27_tract_500k
+ - name: cb_2023_27_bg_500k
+ - name: cb_2023_27_tract_500k
+ - name: minneapolis_city_boundary
+ - name: minneapolis_neighborhoods
+ - name: minneapolis_wards
+
models:
- name: census_tracts
columns:
@@ -181,3 +236,17 @@ models:
- relationships:
to: ref('parcels')
field: parcel_id
+
+ - name: neighborhoods
+ columns:
+ - name: neighborhood_id
+ data_tests:
+ - not_null
+ - unique
+
+ - name: wards
+ columns:
+ - name: ward_id
+ data_tests:
+ - not_null
+ - unique
diff --git a/dbt/models/wards.sql b/dbt/models/wards.sql
new file mode 100644
index 00000000..67f67211
--- /dev/null
+++ b/dbt/models/wards.sql
@@ -0,0 +1,5 @@
+select
+ bdnum as ward_id
+ , geom
+from
+ {{ source('minneapolis', 'minneapolis_wards') }}
From ed11162721847d05435866462a3d2008541b0de6 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 15:47:10 -0400
Subject: [PATCH 041/142] add remaining sources referencing old data loads
---
dbt/models/acs_block_group.sql | 15 ++++++-
dbt/models/acs_tract.sql | 2 +-
dbt/models/commercial_permits_base.sql | 2 +-
dbt/models/fair_market_rents.sql | 2 +-
dbt/models/residential_permits_base.sql | 2 +-
dbt/models/schema.yml | 56 +++++++++++++++----------
dbt/models/usps_migration.sql | 2 +-
dbt/models/zip_codes_2000.sql | 2 +-
dbt/models/zip_codes_2020.sql | 2 +-
9 files changed, 54 insertions(+), 31 deletions(-)
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index 382d69d5..24151720 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -10,14 +10,25 @@ census_block_groups as (
from
{{ ref('census_block_groups') }}
)
-
+, acs_bg as (
+ select
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , year_
+ , name_
+ , value_
+ from
+ {{ source('minneapolis_old', 'acs_bg_raw') }}
+)
select
census_block_group_id
, year_
, name_
, value_
from
- acs_bg_raw
+ acs_bg
inner join census_block_groups using (statefp, countyfp, tractce, blkgrpce)
where
to_date(acs_bg_raw.year_::text , 'YYYY') <@ census_block_groups.valid
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index f71b7088..fc47e66d 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -16,7 +16,7 @@ select
, acs_tract_raw.name_
, acs_tract_raw.value_
from
- acs_tract_raw
+ {{ source('minneapolis_old', 'acs_tract_raw') }}
inner join census_tracts
using (statefp, countyfp, tractce)
where
diff --git a/dbt/models/commercial_permits_base.sql b/dbt/models/commercial_permits_base.sql
index 100bdc32..246a8c03 100644
--- a/dbt/models/commercial_permits_base.sql
+++ b/dbt/models/commercial_permits_base.sql
@@ -12,7 +12,7 @@ select
, address
, geom
from
- commercial_permits_raw
+ {{ source('minneapolis_old', 'commercial_permits_raw') }}
where
co_code = '053'
and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index 605f040c..e42fff62 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -16,7 +16,7 @@ zip_codes as (
{% endfor %}
, fair_market_rents_raw.year_
from
- fair_market_rents_raw
+ {{ source('minneapolis_old', 'fair_market_rents_raw') }}
inner join zip_codes
on zip_codes.zip_code = fair_market_rents_raw.zip
and zip_codes.valid @> to_date(year_::text , 'YYYY')
diff --git a/dbt/models/residential_permits_base.sql b/dbt/models/residential_permits_base.sql
index a5bf8e0b..455a8dde 100644
--- a/dbt/models/residential_permits_base.sql
+++ b/dbt/models/residential_permits_base.sql
@@ -19,7 +19,7 @@ select
, notes
, geom
from
- residential_permits_raw
+ {{ source('minneapolis_old', 'residential_permits_raw') }}
where
co_code = '053'
and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index a6b20449..02746939 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -1,30 +1,20 @@
sources:
+ - name: minneapolis_old
+ database: cities
+ schema: public
+ tables:
+ - name: acs_bg_raw
+ - name: acs_tract_raw
+ - name: commercial_permits_raw
+ - name: fair_market_rents_raw
+ - name: residential_permits_raw
+ - name: usps_migration
+ - name: zip_raw_2000
+ - name: zip_raw_2020
- name: minneapolis
database: cities
schema: minneapolis
tables:
- - name: parcels2002hennepin
- - name: parcels2003hennepin
- - name: parcels2004hennepin
- - name: parcels2005hennepin
- - name: parcels2006hennepin
- - name: parcels2007hennepin
- - name: parcels2008hennepin
- - name: parcels2009hennepin
- - name: parcels2010hennepin
- - name: parcels2011hennepin
- - name: parcels2012hennepin
- - name: parcels2013hennepin
- - name: parcels2014hennepin
- - name: parcels2015hennepin
- - name: parcels2016hennepin
- - name: parcels2017hennepin
- - name: parcels2018hennepin
- - name: parcels2019hennepin
- - name: parcels2020hennepin
- - name: parcels2021hennepin
- - name: parcels2022hennepin
- - name: parcels2023hennepin
- name: cb_2010_27_bg_500k
- name: cb_2010_27_tract_500k
- name: cb_2013_27_bg_500k
@@ -52,6 +42,28 @@ sources:
- name: minneapolis_city_boundary
- name: minneapolis_neighborhoods
- name: minneapolis_wards
+ - name: parcels2002hennepin
+ - name: parcels2003hennepin
+ - name: parcels2004hennepin
+ - name: parcels2005hennepin
+ - name: parcels2006hennepin
+ - name: parcels2007hennepin
+ - name: parcels2008hennepin
+ - name: parcels2009hennepin
+ - name: parcels2010hennepin
+ - name: parcels2011hennepin
+ - name: parcels2012hennepin
+ - name: parcels2013hennepin
+ - name: parcels2014hennepin
+ - name: parcels2015hennepin
+ - name: parcels2016hennepin
+ - name: parcels2017hennepin
+ - name: parcels2018hennepin
+ - name: parcels2019hennepin
+ - name: parcels2020hennepin
+ - name: parcels2021hennepin
+ - name: parcels2022hennepin
+ - name: parcels2023hennepin
models:
- name: census_tracts
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 031446ab..3a140eac 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -3,7 +3,7 @@
with process_date as (
select to_date(yyyymm, 'YYYYMM') as date_, *
- from usps_migration_raw
+ from {{ source('minneapolis_old', 'usps_migration') }}
)
, zip_codes as (
select
diff --git a/dbt/models/zip_codes_2000.sql b/dbt/models/zip_codes_2000.sql
index fd9219ee..d6b18b05 100644
--- a/dbt/models/zip_codes_2000.sql
+++ b/dbt/models/zip_codes_2000.sql
@@ -2,5 +2,5 @@ select
zcta as zip_code,
ST_Union(geom) as geom
from
- zip_raw_2000
+ {{ source('minneapolis_old', 'zip_raw_2000') }}
group by zcta
diff --git a/dbt/models/zip_codes_2020.sql b/dbt/models/zip_codes_2020.sql
index 2bbc29e7..038ac2c9 100644
--- a/dbt/models/zip_codes_2020.sql
+++ b/dbt/models/zip_codes_2020.sql
@@ -1,4 +1,4 @@
select
zcta5ce20 as zip_code,
geom
-from zip_raw_2020
+from {{ source('minneapolis_old', 'zip_raw_2020') }}
From 9d6863449a39aec29669f691c734ee96ffc6ad7d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 16:35:14 -0400
Subject: [PATCH 042/142] create model for parking data
---
dbt/models/parking_base.sql | 30 ++++++++++++++++++++++++++++++
dbt/models/schema.yml | 1 +
2 files changed, 31 insertions(+)
create mode 100644 dbt/models/parking_base.sql
diff --git a/dbt/models/parking_base.sql b/dbt/models/parking_base.sql
new file mode 100644
index 00000000..6f4e6cdb
--- /dev/null
+++ b/dbt/models/parking_base.sql
@@ -0,0 +1,30 @@
+with
+ parking_raw as (
+ select
+ ogc_fid
+ , "date"
+ , "project na"
+ , address
+ , neighborho
+ , ward
+ , "downtown y"
+ , "housing un"
+ , "car parkin"
+ , "bike parki"
+ , "year"
+ , geom
+ from {{ source('minneapolis', 'parking_parcels') }}
+ )
+select
+ ogc_fid as parking_id
+ , to_date("year" || '-' || "date", 'YYYY-DD-Mon') as date_
+ , "project na" as project_name
+ , address
+ , neighborho as neighborhood
+ , ward
+ , "downtown y" = 'Y' as is_downtown
+ , "housing un" as num_housing_units
+ , "car parkin" as num_car_parking_spaces
+ , "bike parki" as num_bike_parking_spaces
+ , geom
+from parking_raw
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 02746939..7b11f865 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -64,6 +64,7 @@ sources:
- name: parcels2021hennepin
- name: parcels2022hennepin
- name: parcels2023hennepin
+ - name: parking_parcels
models:
- name: census_tracts
From b76e6b775970036194a2993921a76fd09ac0cebd Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 17:11:45 -0400
Subject: [PATCH 043/142] fix source
---
dbt/models/schema.yml | 2 +-
dbt/models/usps_migration.sql | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 7b11f865..fb829015 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -8,7 +8,7 @@ sources:
- name: commercial_permits_raw
- name: fair_market_rents_raw
- name: residential_permits_raw
- - name: usps_migration
+ - name: usps_migration_raw
- name: zip_raw_2000
- name: zip_raw_2020
- name: minneapolis
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 3a140eac..4fa46045 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -3,7 +3,7 @@
with process_date as (
select to_date(yyyymm, 'YYYYMM') as date_, *
- from {{ source('minneapolis_old', 'usps_migration') }}
+ from {{ source('minneapolis_old', 'usps_migration_raw') }}
)
, zip_codes as (
select
From c450b7d5c8e79e58ec2ccbfd95dceebfc0543aa9 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 17:12:12 -0400
Subject: [PATCH 044/142] add parking to parcel mapping
---
dbt/models/{parking_base.sql => parking.sql} | 0
dbt/models/parking_to_parcels.sql | 31 ++++++++++++++++++++
2 files changed, 31 insertions(+)
rename dbt/models/{parking_base.sql => parking.sql} (100%)
create mode 100644 dbt/models/parking_to_parcels.sql
diff --git a/dbt/models/parking_base.sql b/dbt/models/parking.sql
similarity index 100%
rename from dbt/models/parking_base.sql
rename to dbt/models/parking.sql
diff --git a/dbt/models/parking_to_parcels.sql b/dbt/models/parking_to_parcels.sql
new file mode 100644
index 00000000..994ef232
--- /dev/null
+++ b/dbt/models/parking_to_parcels.sql
@@ -0,0 +1,31 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parking_id']},
+ {'columns': ['parcel_id']}
+ ]
+ )
+}}
+
+with
+ parking as (
+ select
+ parking_id as id
+ , daterange(date_, date_, '[]') as valid
+ , ST_Transform(geom, 26915) as geom
+ from {{ ref('parking_base') }}
+ )
+ , parcels as (
+ select
+ parcel_id as id
+ , valid
+ , geom
+ from {{ ref('parcels_base') }}
+ )
+select
+ child_id as parking_id
+ , parent_id as parcel_id
+ , valid
+ , type_
+from {{ tag_regions("parking", "parcels") }}
From 516c56a59139715f2a76b99896770c61f28da1b2 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 17:12:30 -0400
Subject: [PATCH 045/142] add indexes to improve perf
---
dbt/models/parcels_to_census_block_groups.sql | 2 +-
dbt/models/parcels_to_zip_codes.sql | 6 +++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/dbt/models/parcels_to_census_block_groups.sql b/dbt/models/parcels_to_census_block_groups.sql
index 9215c72d..07caa1fb 100644
--- a/dbt/models/parcels_to_census_block_groups.sql
+++ b/dbt/models/parcels_to_census_block_groups.sql
@@ -2,7 +2,7 @@
config(
materialized='table',
indexes = [
- {'columns': ['parcel_id']},
+ {'columns': ['parcel_id'], 'unique': true},
{'columns': ['census_block_group_id']}
]
)
diff --git a/dbt/models/parcels_to_zip_codes.sql b/dbt/models/parcels_to_zip_codes.sql
index f97afcad..2519888a 100644
--- a/dbt/models/parcels_to_zip_codes.sql
+++ b/dbt/models/parcels_to_zip_codes.sql
@@ -1,6 +1,10 @@
{{
config(
- materialized='table'
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id'], 'unique': true},
+ {'columns': ['zip_code_id']}
+ ]
)
}}
From 310a6da78ff4ce9ddad4dde4915bba7d995577a5 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 17:12:40 -0400
Subject: [PATCH 046/142] fix source
---
dbt/models/acs_block_group.sql | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index 24151720..2e85556e 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -23,12 +23,12 @@ census_block_groups as (
{{ source('minneapolis_old', 'acs_bg_raw') }}
)
select
- census_block_group_id
- , year_
- , name_
- , value_
+ census_block_groups.census_block_group_id
+ , acs_bg.year_
+ , acs_bg.name_
+ , acs_bg.value_
from
acs_bg
inner join census_block_groups using (statefp, countyfp, tractce, blkgrpce)
where
- to_date(acs_bg_raw.year_::text , 'YYYY') <@ census_block_groups.valid
+ to_date(acs_bg.year_::text , 'YYYY') <@ census_block_groups.valid
From e0f9760a00a5b5ccef38026a4422ff597e98eb8e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 17:33:13 -0400
Subject: [PATCH 047/142] more perf tuning
---
dbt/models/commercial_permits.sql | 39 +++++++++++-----
dbt/models/commercial_permits_base.sql | 18 --------
dbt/models/commercial_permits_to_parcels.sql | 14 +++++-
dbt/models/parking_to_parcels.sql | 2 +-
dbt/models/residential_permits.sql | 44 ++++++++++++++-----
dbt/models/residential_permits_base.sql | 25 -----------
dbt/models/residential_permits_to_parcels.sql | 14 +++++-
dbt/models/schema.yml | 24 ++++++++--
8 files changed, 106 insertions(+), 74 deletions(-)
delete mode 100644 dbt/models/commercial_permits_base.sql
delete mode 100644 dbt/models/residential_permits_base.sql
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index d3eb1f74..d3adfbe0 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -1,13 +1,28 @@
-with
-commercial_permits_to_parcels as (
- select
- commercial_permit_id
- , parcel_id
- from {{ ref("commercial_permits_to_parcels") }}
-)
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['commercial_permit_id'], 'unique': true},
+ {'columns': ['geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
select
- {{ dbt_utils.star(ref('commercial_permits_base')) }}
- , parcel_id
-from
- {{ ref('commercial_permits_base') }}
- left join commercial_permits_to_parcels using (commercial_permit_id)
+ sde_id as commercial_permit_id
+ , year::int as year_
+ , nonres_gro as group_
+ , nonres_sub as subgroup
+ , nonres_typ as type_category
+ , bldg_name as building_name
+ , bldg_desc as building_description
+ , permit_typ as permit_type
+ , permit_val as permit_value
+ , sqf as square_feet
+ , address
+ , geom
+ from
+ {{ source('minneapolis_old', 'commercial_permits_raw') }}
+ where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/commercial_permits_base.sql b/dbt/models/commercial_permits_base.sql
deleted file mode 100644
index 246a8c03..00000000
--- a/dbt/models/commercial_permits_base.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-select
- sde_id as commercial_permit_id
- , year::int as year_
- , nonres_gro as group_
- , nonres_sub as subgroup
- , nonres_typ as type_category
- , bldg_name as building_name
- , bldg_desc as building_description
- , permit_typ as permit_type
- , permit_val as permit_value
- , sqf as square_feet
- , address
- , geom
- from
- {{ source('minneapolis_old', 'commercial_permits_raw') }}
- where
- co_code = '053'
- and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/commercial_permits_to_parcels.sql b/dbt/models/commercial_permits_to_parcels.sql
index 7c31e7ca..de1df444 100644
--- a/dbt/models/commercial_permits_to_parcels.sql
+++ b/dbt/models/commercial_permits_to_parcels.sql
@@ -1,17 +1,27 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['commercial_permit_id']},
+ {'columns': ['parcel_id']}
+ ]
+ )
+}}
+
with
commercial_permits as (
select
commercial_permit_id as id
, daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
, geom
- from {{ ref("commercial_permits_base") }}
+ from {{ ref("commercial_permits") }}
)
, parcels as (
select
parcel_id as id
, valid
, geom
- from {{ ref("parcels") }}
+ from {{ ref("parcels_base") }}
)
select
child_id as commercial_permit_id
diff --git a/dbt/models/parking_to_parcels.sql b/dbt/models/parking_to_parcels.sql
index 994ef232..21c20edc 100644
--- a/dbt/models/parking_to_parcels.sql
+++ b/dbt/models/parking_to_parcels.sql
@@ -14,7 +14,7 @@ with
parking_id as id
, daterange(date_, date_, '[]') as valid
, ST_Transform(geom, 26915) as geom
- from {{ ref('parking_base') }}
+ from {{ ref('parking') }}
)
, parcels as (
select
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 869e41d9..3a4841bc 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -1,13 +1,35 @@
-with
-residential_permits_to_parcels as (
- select
- residential_permit_id
- , parcel_id
- from {{ ref("residential_permits_to_parcels") }}
-)
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['residential_permit_id'], 'unique': true},
+ {'columns': ['geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
select
- {{ dbt_utils.star(ref('residential_permits_base')) }}
- , parcel_id
+ sde_id as residential_permit_id
+ , year::int as year_
+ , tenure
+ , housing_ty as housing_type
+ , res_permit as permit_type
+ , address
+ , name as name_
+ , buildings as num_buildings
+ , units as num_units
+ , age_restri as num_age_restricted_units
+ , memory_car as num_memory_care_units
+ , assisted as num_assisted_living_units
+ , com_off_re = 'Y' as is_commercial_and_residential
+ , sqf as square_feet
+ , public_fun = 'Y' as is_public_funded
+ , permit_val as permit_value
+ , community_ as community_designation
+ , notes
+ , geom
from
- {{ ref('residential_permits_base') }}
- left join residential_permits_to_parcels using (residential_permit_id)
+ {{ source('minneapolis_old', 'residential_permits_raw') }}
+where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/residential_permits_base.sql b/dbt/models/residential_permits_base.sql
deleted file mode 100644
index 455a8dde..00000000
--- a/dbt/models/residential_permits_base.sql
+++ /dev/null
@@ -1,25 +0,0 @@
-select
- sde_id as residential_permit_id
- , year::int as year_
- , tenure
- , housing_ty as housing_type
- , res_permit as permit_type
- , address
- , name as name_
- , buildings as num_buildings
- , units as num_units
- , age_restri as num_age_restricted_units
- , memory_car as num_memory_care_units
- , assisted as num_assisted_living_units
- , com_off_re = 'Y' as is_commercial_and_residential
- , sqf as square_feet
- , public_fun = 'Y' as is_public_funded
- , permit_val as permit_value
- , community_ as community_designation
- , notes
- , geom
-from
- {{ source('minneapolis_old', 'residential_permits_raw') }}
-where
- co_code = '053'
- and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/residential_permits_to_parcels.sql b/dbt/models/residential_permits_to_parcels.sql
index 2c90dc32..7f9ea59c 100644
--- a/dbt/models/residential_permits_to_parcels.sql
+++ b/dbt/models/residential_permits_to_parcels.sql
@@ -1,17 +1,27 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['residential_permit_id']},
+ {'columns': ['parcel_id']}
+ ]
+ )
+}}
+
with
residential_permits as (
select
residential_permit_id as id
, daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
, geom
- from {{ ref("residential_permits_base") }}
+ from {{ ref("residential_permits") }}
)
, parcels as (
select
parcel_id as id
, valid
, geom
- from {{ ref("parcels") }}
+ from {{ ref("parcels_base") }}
)
select
child_id as residential_permit_id
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index fb829015..7957c54b 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -230,6 +230,22 @@ models:
data_tests:
- not_null
- unique
+
+ - name: residential_permits
+ columns:
+ - name: residential_permit_id
+ data_tests:
+ - not_null
+ - unique
+
+ - name: residential_permits_to_parcels
+ columns:
+ - name: residential_permit_id
+ data_tests:
+ - not_null
+ - relationships:
+ to: ref('residential_permits')
+ field: residential_permit_id
- name: parcel_id
data_tests:
- not_null
@@ -237,12 +253,14 @@ models:
to: ref('parcels')
field: parcel_id
- - name: residential_permits
+ - name: commercial_permits_to_parcels
columns:
- - name: residential_permit_id
+ - name: commercial_permit_id
data_tests:
- not_null
- - unique
+ - relationships:
+ to: ref('commercial_permits')
+ field: commercial_permit_id
- name: parcel_id
data_tests:
- not_null
From 3ae366e008fec9ea30009d70846548f9da7e8f2e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 14 Aug 2024 18:29:01 -0400
Subject: [PATCH 048/142] add postgrest config and example schema
---
api/postgrest.conf | 107 +++++++++++++++++++++++++++++++++++++++++++++
api/schema.sql | 33 ++++++++++++++
2 files changed, 140 insertions(+)
create mode 100644 api/postgrest.conf
create mode 100644 api/schema.sql
diff --git a/api/postgrest.conf b/api/postgrest.conf
new file mode 100644
index 00000000..097fba01
--- /dev/null
+++ b/api/postgrest.conf
@@ -0,0 +1,107 @@
+## Admin server used for checks. It's disabled by default unless a port is specified.
+# admin-server-port = 3001
+
+## The database role to use when no client authentication is provided
+db-anon-role = "web_anon"
+
+## Notification channel for reloading the schema cache
+db-channel = "pgrst"
+
+## Enable or disable the notification channel
+db-channel-enabled = true
+
+## Enable in-database configuration
+db-config = true
+
+## Function for in-database configuration
+## db-pre-config = "postgrest.pre_config"
+
+## Extra schemas to add to the search_path of every request
+db-extra-search-path = "public"
+
+## Limit rows in response
+# db-max-rows = 1000
+
+## Allow getting the EXPLAIN plan through the `Accept: application/vnd.pgrst.plan` header
+# db-plan-enabled = false
+
+## Number of open connections in the pool
+db-pool = 10
+
+## Time in seconds to wait to acquire a slot from the connection pool
+# db-pool-acquisition-timeout = 10
+
+## Time in seconds after which to recycle pool connections
+# db-pool-max-lifetime = 1800
+
+## Time in seconds after which to recycle unused pool connections
+# db-pool-max-idletime = 30
+
+## Allow automatic database connection retrying
+# db-pool-automatic-recovery = true
+
+## Stored proc to exec immediately after auth
+# db-pre-request = "stored_proc_name"
+
+## Enable or disable prepared statements. disabling is only necessary when behind a connection pooler.
+## When disabled, statements will be parametrized but won't be prepared.
+db-prepared-statements = true
+
+## The name of which database schema to expose to REST clients
+db-schemas = "api"
+
+## How to terminate database transactions
+## Possible values are:
+## commit (default)
+## Transaction is always committed, this can not be overriden
+## commit-allow-override
+## Transaction is committed, but can be overriden with Prefer tx=rollback header
+## rollback
+## Transaction is always rolled back, this can not be overriden
+## rollback-allow-override
+## Transaction is rolled back, but can be overriden with Prefer tx=commit header
+db-tx-end = "commit"
+
+## The standard connection URI format, documented at
+## https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
+db-uri = "postgresql://postgres@34.123.100.76:5432/cities"
+
+# jwt-aud = "your_audience_claim"
+
+## Jspath to the role claim key
+jwt-role-claim-key = ".role"
+
+## Choose a secret, JSON Web Key (or set) to enable JWT auth
+## (use "@filename" to load from separate file)
+# jwt-secret = "secret_with_at_least_32_characters"
+jwt-secret-is-base64 = false
+
+## Enables and set JWT Cache max lifetime, disables caching with 0
+# jwt-cache-max-lifetime = 0
+
+## Logging level, the admitted values are: crit, error, warn, info and debug.
+log-level = "error"
+
+## Determine if the OpenAPI output should follow or ignore role privileges or be disabled entirely.
+## Admitted values: follow-privileges, ignore-privileges, disabled
+openapi-mode = "follow-privileges"
+
+## Base url for the OpenAPI output
+openapi-server-proxy-uri = ""
+
+## Configurable CORS origins
+# server-cors-allowed-origins = ""
+
+server-host = "!4"
+server-port = 3000
+
+## Allow getting the request-response timing information through the `Server-Timing` header
+server-timing-enabled = false
+
+## Unix socket location
+## if specified it takes precedence over server-port
+# server-unix-socket = "/tmp/pgrst.sock"
+
+## Unix socket file mode
+## When none is provided, 660 is applied by default
+# server-unix-socket-mode = "660"
diff --git a/api/schema.sql b/api/schema.sql
new file mode 100644
index 00000000..7167ade7
--- /dev/null
+++ b/api/schema.sql
@@ -0,0 +1,33 @@
+drop schema if exists api cascade;
+
+create schema api;
+
+create view api.parcels as (
+ select * from dbt.parcels
+);
+
+create view api.census_tracts as (
+ select * from dbt.census_tracts
+);
+
+create view api.census_block_groups as (
+ select * from dbt.census_block_groups
+);
+
+create view api.zip_codes as (
+ select * from dbt.zip_codes
+);
+
+create view api.emv_in_downtown_west as (
+ select dbt.parcels.pin, dbt.parcels.emv_land
+ from dbt.parcels
+ inner join dbt.neighborhoods
+ on st_intersects(st_transform(dbt.parcels.geom, 3857), dbt.neighborhoods.geom)
+ where dbt.neighborhoods.name_ = 'Downtown West'
+);
+
+drop role if exists web_anon;
+create role web_anon nologin;
+grant usage on schema api to web_anon;
+grant select on all tables in schema api to web_anon;
+grant web_anon to postgres;
From 2ae47247e7c8c83ad20582e482541709811d949a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 16 Aug 2024 12:50:45 -0400
Subject: [PATCH 049/142] import new sources and begin converting
---
dbt/models/census_block_groups.sql | 2 +-
dbt/models/census_tracts.sql | 2 +-
dbt/models/city_boundary.sql | 2 +-
dbt/models/commercial_permits.sql | 2 +-
dbt/models/fair_market_rents.sql | 21 +++--
dbt/models/neighborhoods.sql | 2 +-
dbt/models/parcels_base.sql | 2 +-
dbt/models/residential_permits.sql | 2 +-
dbt/models/schema.yml | 133 ++++++++++++++++++-----------
dbt/models/wards.sql | 2 +-
dbt/package-lock.yml | 4 +-
dbt/packages.yml | 2 +
12 files changed, 110 insertions(+), 66 deletions(-)
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index 6c0c31ce..a52c2f47 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -39,7 +39,7 @@ census_block_groups as (
{% endif %}
, geom
from
- {{ source('minneapolis', 'cb_' ~ year_ ~ '_27_bg_500k') }}
+ {{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_bg_500k') }}
{% if not loop.last %}union all{% endif %}
{% endfor %}
),
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 05a79469..634e18ac 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -16,7 +16,7 @@ select
{% endif %}
, geom
from
- {{ source('minneapolis', 'cb_' ~ year_ ~ '_27_tract_500k') }}
+ {{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_tract_500k') }}
{% if not loop.last %}union all{% endif %}
{% endfor %}
)
diff --git a/dbt/models/city_boundary.sql b/dbt/models/city_boundary.sql
index fe44dbe0..b34a22ec 100644
--- a/dbt/models/city_boundary.sql
+++ b/dbt/models/city_boundary.sql
@@ -1,4 +1,4 @@
select
geom
from
- {{ source('minneapolis', 'minneapolis_city_boundary') }}
+ {{ source('minneapolis', 'city_boundary_minneapolis') }}
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index d3adfbe0..349b6c8e 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -22,7 +22,7 @@ select
, address
, geom
from
- {{ source('minneapolis_old', 'commercial_permits_raw') }}
+ {{ source('minneapolis', 'commercial_permits_nonresidentialconstruction') }}
where
co_code = '053'
and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index e42fff62..9927b36f 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -8,24 +8,35 @@ zip_codes as (
, valid
from {{ ref('zip_codes') }}
)
+, fair_market_rents as (
+ select
+ zip_code
+ , rent_br0
+ , rent_br1
+ , rent_br2
+ , rent_br3
+ , rent_br4
+ , year_
+ from {{ ref('fair_market_rents_union') }}
+)
, fmr_zip as (
select
zip_codes.zip_code_id
{% for bedroom in num_bedrooms %}
- , fair_market_rents_raw.rent_br{{ bedroom }}
+ , fair_market_rents.rent_br{{ bedroom }}
{% endfor %}
- , fair_market_rents_raw.year_
+ , fair_market_rents.year_
from
- {{ source('minneapolis_old', 'fair_market_rents_raw') }}
+ fair_market_rents
inner join zip_codes
- on zip_codes.zip_code = fair_market_rents_raw.zip
+ on zip_codes.zip_code = fair_market_rents.zip_code
and zip_codes.valid @> to_date(year_::text , 'YYYY')
)
{% for bedroom in num_bedrooms %}
select
zip_code_id
, rent_br{{ bedroom }} as rent
- , 0 as num_bedrooms
+ , {{ bedroom }} as num_bedrooms
, year_
from fmr_zip
{% if not loop.last %} union all {% endif %}
diff --git a/dbt/models/neighborhoods.sql b/dbt/models/neighborhoods.sql
index 9cc596bb..b031cf08 100644
--- a/dbt/models/neighborhoods.sql
+++ b/dbt/models/neighborhoods.sql
@@ -3,4 +3,4 @@ select
, bdname as name_
, geom
from
- {{ source('minneapolis', 'minneapolis_neighborhoods') }}
+ {{ source('minneapolis', 'neighborhoods_minneapolis') }}
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index f8a6b1f8..3671a586 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -25,7 +25,7 @@ with parcels as (
sale_date,
nullif(sale_value, 0) as sale_value,
geom
- from {{ source('minneapolis', 'parcels' ~ year_ ~ 'hennepin') }}
+ from {{ source('minneapolis', 'parcels_shp_plan_regonal_' ~ year_ ~ '_parcels' ~ year_ ~ 'hennepin') }}
where upper({{ "city" if year_ < 2018 else "ctu_name" }}) = '{{ city }}'
{% if not loop.last %}union all{% endif %}
{% endfor %}
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 3a4841bc..95ee9a9d 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -29,7 +29,7 @@ select
, notes
, geom
from
- {{ source('minneapolis_old', 'residential_permits_raw') }}
+ {{ source('minneapolis', 'residential_permits_residentialpermits') }}
where
co_code = '053'
and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 7957c54b..3e1e334b 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -5,9 +5,6 @@ sources:
tables:
- name: acs_bg_raw
- name: acs_tract_raw
- - name: commercial_permits_raw
- - name: fair_market_rents_raw
- - name: residential_permits_raw
- name: usps_migration_raw
- name: zip_raw_2000
- name: zip_raw_2020
@@ -15,55 +12,87 @@ sources:
database: cities
schema: minneapolis
tables:
- - name: cb_2010_27_bg_500k
- - name: cb_2010_27_tract_500k
- - name: cb_2013_27_bg_500k
- - name: cb_2013_27_tract_500k
- - name: cb_2014_27_bg_500k
- - name: cb_2014_27_tract_500k
- - name: cb_2015_27_bg_500k
- - name: cb_2015_27_tract_500k
- - name: cb_2016_27_bg_500k
- - name: cb_2016_27_tract_500k
- - name: cb_2017_27_bg_500k
- - name: cb_2017_27_tract_500k
- - name: cb_2018_27_bg_500k
- - name: cb_2018_27_tract_500k
- - name: cb_2019_27_bg_500k
- - name: cb_2019_27_tract_500k
- - name: cb_2020_27_bg_500k
- - name: cb_2020_27_tract_500k
- - name: cb_2021_27_bg_500k
- - name: cb_2021_27_tract_500k
- - name: cb_2022_27_bg_500k
- - name: cb_2022_27_tract_500k
- - name: cb_2023_27_bg_500k
- - name: cb_2023_27_tract_500k
- - name: minneapolis_city_boundary
- - name: minneapolis_neighborhoods
- - name: minneapolis_wards
- - name: parcels2002hennepin
- - name: parcels2003hennepin
- - name: parcels2004hennepin
- - name: parcels2005hennepin
- - name: parcels2006hennepin
- - name: parcels2007hennepin
- - name: parcels2008hennepin
- - name: parcels2009hennepin
- - name: parcels2010hennepin
- - name: parcels2011hennepin
- - name: parcels2012hennepin
- - name: parcels2013hennepin
- - name: parcels2014hennepin
- - name: parcels2015hennepin
- - name: parcels2016hennepin
- - name: parcels2017hennepin
- - name: parcels2018hennepin
- - name: parcels2019hennepin
- - name: parcels2020hennepin
- - name: parcels2021hennepin
- - name: parcels2022hennepin
- - name: parcels2023hennepin
+ - name: residential_permits_residentialpermits
+ - name: commercial_permits_nonresidentialconstruction
+ - name: high_frequency_transit_2015_freq_350_ft_buffer
+ - name: high_frequency_transit_2015_freq_lines
+ - name: high_frequency_transit_2015_freq_quarter_and_half_mile_buffer
+ - name: high_frequency_transit_2015_freq_rail_stops
+ - name: high_frequency_transit_2016_freq_350_ft_buffer
+ - name: high_frequency_transit_2016_freq_lines
+ - name: high_frequency_transit_2016_freq_quarter_and_half_mile_buffer
+ - name: fair_market_rents_2012
+ - name: fair_market_rents_2013
+ - name: fair_market_rents_2014
+ - name: fair_market_rents_2015
+ - name: fair_market_rents_2016
+ - name: fair_market_rents_2017
+ - name: fair_market_rents_2018
+ - name: fair_market_rents_2019
+ - name: fair_market_rents_2020
+ - name: fair_market_rents_2021
+ - name: fair_market_rents_2022
+ - name: fair_market_rents_2023
+ - name: fair_market_rents_2024
+ - name: downtown
+ - name: university
+ - name: usps_y2018
+ - name: usps_y2019
+ - name: usps_y2020
+ - name: usps_y2021
+ - name: usps_y2022
+ - name: usps_y2023
+ - name: zip_codes_tl_2020_us_zcta510
+ - name: zip_codes_tl_2020_us_zcta520
+ - name: census_cb_2010_27_bg_500k
+ - name: census_cb_2010_27_tract_500k
+ - name: census_cb_2013_27_bg_500k
+ - name: census_cb_2013_27_tract_500k
+ - name: census_cb_2014_27_bg_500k
+ - name: census_cb_2014_27_tract_500k
+ - name: census_cb_2015_27_bg_500k
+ - name: census_cb_2015_27_tract_500k
+ - name: census_cb_2016_27_bg_500k
+ - name: census_cb_2016_27_tract_500k
+ - name: census_cb_2017_27_bg_500k
+ - name: census_cb_2017_27_tract_500k
+ - name: census_cb_2018_27_bg_500k
+ - name: census_cb_2018_27_tract_500k
+ - name: census_cb_2019_27_bg_500k
+ - name: census_cb_2019_27_tract_500k
+ - name: census_cb_2020_27_bg_500k
+ - name: census_cb_2020_27_tract_500k
+ - name: census_cb_2021_27_bg_500k
+ - name: census_cb_2021_27_tract_500k
+ - name: census_cb_2022_27_bg_500k
+ - name: census_cb_2022_27_tract_500k
+ - name: census_cb_2023_27_bg_500k
+ - name: census_cb_2023_27_tract_500k
+ - name: city_boundary_minneapolis
+ - name: neighborhoods_minneapolis
+ - name: wards_minneapolis
+ - name: parcels_shp_plan_regonal_2002_parcels2002hennepin
+ - name: parcels_shp_plan_regonal_2003_parcels2003hennepin
+ - name: parcels_shp_plan_regonal_2004_parcels2004hennepin
+ - name: parcels_shp_plan_regonal_2005_parcels2005hennepin
+ - name: parcels_shp_plan_regonal_2006_parcels2006hennepin
+ - name: parcels_shp_plan_regonal_2007_parcels2007hennepin
+ - name: parcels_shp_plan_regonal_2008_parcels2008hennepin
+ - name: parcels_shp_plan_regonal_2009_parcels2009hennepin
+ - name: parcels_shp_plan_regonal_2010_parcels2010hennepin
+ - name: parcels_shp_plan_regonal_2011_parcels2011hennepin
+ - name: parcels_shp_plan_regonal_2012_parcels2012hennepin
+ - name: parcels_shp_plan_regonal_2013_parcels2013hennepin
+ - name: parcels_shp_plan_regonal_2014_parcels2014hennepin
+ - name: parcels_shp_plan_regonal_2015_parcels2015hennepin
+ - name: parcels_shp_plan_regonal_2016_parcels2016hennepin
+ - name: parcels_shp_plan_regonal_2017_parcels2017hennepin
+ - name: parcels_shp_plan_regonal_2018_parcels2018hennepin
+ - name: parcels_shp_plan_regonal_2019_parcels2019hennepin
+ - name: parcels_shp_plan_regonal_2020_parcels2020hennepin
+ - name: parcels_shp_plan_regonal_2021_parcels2021hennepin
+ - name: parcels_shp_plan_regonal_2022_parcels2022hennepin
+ - name: parcels_shp_plan_regonal_2023_parcels2023hennepin
- name: parking_parcels
models:
diff --git a/dbt/models/wards.sql b/dbt/models/wards.sql
index 67f67211..d809d3ad 100644
--- a/dbt/models/wards.sql
+++ b/dbt/models/wards.sql
@@ -2,4 +2,4 @@ select
bdnum as ward_id
, geom
from
- {{ source('minneapolis', 'minneapolis_wards') }}
+ {{ source('minneapolis', 'wards_minneapolis') }}
diff --git a/dbt/package-lock.yml b/dbt/package-lock.yml
index 5e486a0d..5231cc02 100644
--- a/dbt/package-lock.yml
+++ b/dbt/package-lock.yml
@@ -1,4 +1,6 @@
packages:
- package: dbt-labs/dbt_utils
version: 1.2.0
-sha1_hash: d4f259856543b0ef301e0b3b0bbc94ccb6b12a54
+ - package: dbt-labs/codegen
+ version: 0.12.1
+sha1_hash: 37aba29ba147b9afff74716d974b60c54b7f1a1d
diff --git a/dbt/packages.yml b/dbt/packages.yml
index b9609fcb..27ef0473 100644
--- a/dbt/packages.yml
+++ b/dbt/packages.yml
@@ -1,3 +1,5 @@
packages:
- package: dbt-labs/dbt_utils
version: 1.2.0
+ - package: dbt-labs/codegen
+ version: 0.12.1
From a71845046f006cc7bd7126151560b648df933d21 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 16 Aug 2024 12:58:05 -0400
Subject: [PATCH 050/142] more conversion
---
dbt/models/fair_market_rents_union.sql | 15 +++++++++++++++
dbt/models/schema.yml | 1 -
dbt/models/usps_migration.sql | 4 ++--
dbt/models/usps_migration_union.sql | 23 +++++++++++++++++++++++
4 files changed, 40 insertions(+), 3 deletions(-)
create mode 100644 dbt/models/fair_market_rents_union.sql
create mode 100644 dbt/models/usps_migration_union.sql
diff --git a/dbt/models/fair_market_rents_union.sql b/dbt/models/fair_market_rents_union.sql
new file mode 100644
index 00000000..696d0a34
--- /dev/null
+++ b/dbt/models/fair_market_rents_union.sql
@@ -0,0 +1,15 @@
+{% set years = range(2012, 2025) %}
+
+{% for year_ in years %}
+select
+ zip_code
+ , rent_br0
+ , rent_br1
+ , rent_br2
+ , rent_br3
+ , rent_br4
+ , year as year_
+from
+ {{ source('minneapolis', 'fair_market_rents_' ~ year_) }}
+{% if not loop.last %} union all {% endif %}
+{% endfor %}
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 3e1e334b..a1f5753f 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -5,7 +5,6 @@ sources:
tables:
- name: acs_bg_raw
- name: acs_tract_raw
- - name: usps_migration_raw
- name: zip_raw_2000
- name: zip_raw_2020
- name: minneapolis
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 4fa46045..6a5954e3 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -2,8 +2,8 @@
{% set usps_migration_flow_directions = ['from', 'to'] %}
with process_date as (
- select to_date(yyyymm, 'YYYYMM') as date_, *
- from {{ source('minneapolis_old', 'usps_migration_raw') }}
+ select to_date(yyyy_mm, 'YYYYMM') as date_, *
+ from {{ ref('usps_migration_union') }}
)
, zip_codes as (
select
diff --git a/dbt/models/usps_migration_union.sql b/dbt/models/usps_migration_union.sql
new file mode 100644
index 00000000..e1e790e7
--- /dev/null
+++ b/dbt/models/usps_migration_union.sql
@@ -0,0 +1,23 @@
+{% set years = range(2018, 2024) %}
+
+{% for year_ in years %}
+ select
+ "YYYYMM" as yyyy_mm
+ , "ZIPCODE" as zip_code
+ , "CITY" as city
+ , "STATE" as state_
+ , "TOTAL_FROM_ZIP" as total_from_zip
+ , "TOTAL_BUSINESS" as total_from_zip_business
+ , "TOTAL_FAMILY" as total_from_zip_family
+ , "TOTAL_INDIVIDUAL" as total_from_zip_individual
+ , "TOTAL_PERM" as total_from_zip_perm
+ , "TOTAL_TEMP" as total_from_zip_temp
+ , "TOTAL_TO_ZIP" as total_to_zip
+ , "TOTAL_BUSINESS_dup" as total_to_zip_business
+ , "TOTAL_FAMILY_dup" as total_to_zip_family
+ , "TOTAL_INDIVIDUAL_dup" as total_to_zip_individual
+ , "TOTAL_PERM_dup" as total_to_zip_perm
+ , "TOTAL_TEMP_dup" as total_to_zip_temp
+ from {{ source('minneapolis', 'usps_y' ~ year_) }}
+{% if not loop.last %} union all {% endif %}
+{% endfor %}
From 15827e31e22f749c47af634b81c395b46635c120 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 16 Aug 2024 13:38:10 -0400
Subject: [PATCH 051/142] more conversion
---
dbt/models/acs_block_group.sql | 9 ++++
dbt/models/acs_tract.sql | 10 +++-
dbt/models/all_zip_codes.sql | 20 +++++++
dbt/models/all_zip_codes_2010.sql | 5 ++
dbt/models/all_zip_codes_2020.sql | 4 ++
dbt/models/parcels_base.sql | 2 +-
dbt/models/schema.yml | 13 +++--
dbt/models/usps_migration.sql | 9 ++++
dbt/models/zip_codes.sql | 35 ++++++------
dbt/models/zip_codes_2000.sql | 6 ---
dbt/models/zip_codes_2020.sql | 4 --
dbt/seeds/acs_variables.csv | 90 +++++++++++++++++++++++++++++++
12 files changed, 172 insertions(+), 35 deletions(-)
create mode 100644 dbt/models/all_zip_codes.sql
create mode 100644 dbt/models/all_zip_codes_2010.sql
create mode 100644 dbt/models/all_zip_codes_2020.sql
delete mode 100644 dbt/models/zip_codes_2000.sql
delete mode 100644 dbt/models/zip_codes_2020.sql
create mode 100644 dbt/seeds/acs_variables.csv
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index 2e85556e..98545ebb 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -1,3 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_block_group_id', 'year_', 'name_'], 'unique': true},
+ ]
+ )
+}}
+
with
census_block_groups as (
select
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index fc47e66d..52c9517c 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -1,3 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id', 'year_', 'name_'], 'unique': true},
+ ]
+ )
+}}
+
with
census_tracts as (
select
@@ -9,7 +18,6 @@ census_tracts as (
from {{ ref("census_tracts") }}
)
-
select
census_tract_id
, acs_tract_raw.year_
diff --git a/dbt/models/all_zip_codes.sql b/dbt/models/all_zip_codes.sql
new file mode 100644
index 00000000..ac438099
--- /dev/null
+++ b/dbt/models/all_zip_codes.sql
@@ -0,0 +1,20 @@
+with
+zip_codes as (
+select
+ zip_code,
+ '[2020-01-01,)'::daterange as valid,
+ geom
+from {{ ref('all_zip_codes_2020') }}
+union all
+select
+ zip_code,
+ '[,2020-01-01)'::daterange as valid,
+ geom
+from {{ ref('all_zip_codes_2010') }}
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id
+ , zip_code
+ , valid
+ , geom
+from zip_codes
diff --git a/dbt/models/all_zip_codes_2010.sql b/dbt/models/all_zip_codes_2010.sql
new file mode 100644
index 00000000..8cdafd23
--- /dev/null
+++ b/dbt/models/all_zip_codes_2010.sql
@@ -0,0 +1,5 @@
+select
+ zcta5ce10 as zip_code,
+ geom
+from
+ {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta510') }}
diff --git a/dbt/models/all_zip_codes_2020.sql b/dbt/models/all_zip_codes_2020.sql
new file mode 100644
index 00000000..aee015ae
--- /dev/null
+++ b/dbt/models/all_zip_codes_2020.sql
@@ -0,0 +1,4 @@
+select
+ zcta5ce20 as zip_code,
+ geom
+from {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta520') }}
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 3671a586..8fb2f2dd 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -3,7 +3,7 @@
materialized='table',
indexes = [
{'columns': ['parcel_id'], 'unique': true},
- {'columns': ['geom'], 'type': 'gist'}
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
]
)
}}
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index a1f5753f..f78e1251 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -5,8 +5,6 @@ sources:
tables:
- name: acs_bg_raw
- name: acs_tract_raw
- - name: zip_raw_2000
- - name: zip_raw_2020
- name: minneapolis
database: cities
schema: minneapolis
@@ -215,20 +213,27 @@ models:
to: ref('zip_codes')
field: zip_code_id
- - name: zip_codes_2000
+ - name: all_zip_codes_2010
columns:
- name: zip_code
data_tests:
- not_null
- unique
- - name: zip_codes_2020
+ - name: all_zip_codes_2020
columns:
- name: zip_code
data_tests:
- not_null
- unique
+ - name: all_zip_codes
+ columns:
+ - name: zip_code_id
+ data_tests:
+ - not_null
+ - unique
+
- name: zip_codes
columns:
- name: zip_code_id
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 6a5954e3..7550f0d0 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -1,3 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['date_', 'zip_code_id', 'flow_direction', 'flow_type'], 'unique': true},
+ ]
+ )
+}}
+
{% set usps_migration_flow_types = ['business', 'family', 'individual', 'perm', 'temp'] %}
{% set usps_migration_flow_directions = ['from', 'to'] %}
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 48180e1d..17eac722 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -3,28 +3,25 @@
materialized='table',
indexes = [
{'columns': ['zip_code_id'], 'unique': true},
- {'columns': ['geom'], 'type': 'gist'}
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
]
)
}}
-with
-zip_codes as (
-select
- zip_code,
- '[2020-01-01,)'::daterange as valid,
- geom
-from {{ ref('zip_codes_2020') }}
-union all
-select
- zip_code,
- '[2000-01-01,2020-01-01)'::daterange as valid,
- ST_Transform(geom, 4269) as geom
-from {{ ref('zip_codes_2000') }}
+with city_boundary as (
+ select
+ st_transform(geom, 4269) as geom
+ from
+ {{ ref('city_boundary') }}
)
select
- {{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id
- , zip_code
- , valid
- , geom
-from zip_codes
+ all_zip_codes.zip_code_id
+ , all_zip_codes.zip_code
+ , all_zip_codes.valid
+ , all_zip_codes.geom
+from
+ {{ ref('all_zip_codes') }} as all_zip_codes,
+ city_boundary
+where
+ st_intersects(all_zip_codes.geom, city_boundary.geom)
+ and st_area(st_intersection(all_zip_codes.geom, city_boundary.geom)) / st_area(all_zip_codes.geom) > 0.2
diff --git a/dbt/models/zip_codes_2000.sql b/dbt/models/zip_codes_2000.sql
deleted file mode 100644
index d6b18b05..00000000
--- a/dbt/models/zip_codes_2000.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-select
- zcta as zip_code,
- ST_Union(geom) as geom
-from
- {{ source('minneapolis_old', 'zip_raw_2000') }}
-group by zcta
diff --git a/dbt/models/zip_codes_2020.sql b/dbt/models/zip_codes_2020.sql
deleted file mode 100644
index 038ac2c9..00000000
--- a/dbt/models/zip_codes_2020.sql
+++ /dev/null
@@ -1,4 +0,0 @@
-select
- zcta5ce20 as zip_code,
- geom
-from {{ source('minneapolis_old', 'zip_raw_2020') }}
diff --git a/dbt/seeds/acs_variables.csv b/dbt/seeds/acs_variables.csv
new file mode 100644
index 00000000..8cdeba7c
--- /dev/null
+++ b/dbt/seeds/acs_variables.csv
@@ -0,0 +1,90 @@
+variable,description
+B03002_003E,population_white_non_hispanic
+B03002_004E,population_black_non_hispanic
+B03002_005E,population_asian_non_hispanic
+B03002_006E,population_native_hawaiian_or_pacific_islander_non_hispanic
+B03002_007E,population_american_indian_or_alaska_native_non_hispanic
+B03002_008E,population_other_non_hispanic
+B03002_009E,population_multiple_races_non_hispanic
+B03002_010E,population_multiple_races_and_other_non_hispanic
+B07204_001E,geographic_mobility_total_responses
+B07204_002E,geographic_mobility_same_house_1_year_ago
+B07204_004E,geographic_mobility_different_house_1_year_ago_same_city
+B07204_005E,geographic_mobility_different_house_1_year_ago_same_county
+B07204_006E,geographic_mobility_different_house_1_year_ago_same_state
+B07204_007E,geographic_mobility_different_house_1_year_ago_same_country
+B07204_016E,geographic_mobility_different_house_1_year_ago_abroad
+B01003_001E,population
+B02001_002E,white
+B02001_003E,black
+B02001_004E,american_indian_or_alaska_native
+B02001_005E,asian
+B02001_006E,native_hawaiian_or_pacific_islander
+B03001_003E,population_hispanic_or_latino
+B02001_007E,other_race
+B02001_008E,multiple_races
+B02001_009E,multiple_races_and_other_race
+B02001_010E,two_or_more_races_excluding_other
+B02015_002E,east_asian_chinese
+B02015_003E,east_asian_hmong
+B02015_004E,east_asian_japanese
+B02015_005E,east_asian_korean
+B02015_006E,east_asian_mongolian
+B02015_007E,east_asian_okinawan
+B02015_008E,east_asian_taiwanese
+B02015_009E,east_asian_other
+B02015_010E,southeast_asian_burmese
+B02015_011E,southeast_asian_cambodian
+B02015_012E,southeast_asian_filipino
+B02015_013E,southeast_asian_indonesian
+B02015_014E,southeast_asian_laotian
+B02015_015E,southeast_asian_malaysian
+B02015_016E,southeast_asian_mien
+B02015_017E,southeast_asian_singaporean
+B02015_018E,southeast_asian_thai
+B02015_019E,southeast_asian_viet
+B02015_020E,southeast_asian_other
+B02015_021E,south_asian_asian_indian
+B02015_022E,south_asian_bangladeshi
+B02015_023E,south_asian_bhutanese
+B02015_024E,south_asian_nepalese
+B02015_025E,south_asian_pakistani
+B02015_026E,south_asian_sikh
+B02015_027E,south_asian_sri_lankan
+B02015_028E,south_asian_other
+B02015_029E,central_asian_kazakh
+B02015_030E,central_asian_uzbek
+B02015_031E,central_asian_other
+B02015_032E,other_asian_specified
+B02015_033E,other_asian_not_specified
+B19013_001E,median_household_income
+B19013A_001E,median_household_income_white
+B19013H_001E,median_household_income_white_non_hispanic
+B19013I_001E,median_household_income_hispanic
+B19013B_001E,median_household_income_black
+B19013C_001E,median_household_income_american_indian_or_alaska_native
+B19013D_001E,median_household_income_asian
+B19013E_001E,median_household_income_native_hawaiian_or_pacific_islander
+B19013F_001E,median_household_income_other_race
+B19013G_001E,median_household_income_multiple_races
+B19019_002E,median_household_income_1_person_households
+B19019_003E,median_household_income_2_person_households
+B19019_004E,median_household_income_3_person_households
+B19019_005E,median_household_income_4_person_households
+B19019_006E,median_household_income_5_person_households
+B19019_007E,median_household_income_6_person_households
+B19019_008E,median_household_income_7_or_more_person_households
+B01002_001E,median_age
+B01002_002E,median_age_male
+B01002_003E,median_age_female
+B25031_001E,median_gross_rent
+B25031_002E,median_gross_rent_0_bedrooms
+B25031_003E,median_gross_rent_1_bedrooms
+B25031_004E,median_gross_rent_2_bedrooms
+B25031_005E,median_gross_rent_3_bedrooms
+B25031_006E,median_gross_rent_4_bedrooms
+B25031_007E,median_gross_rent_5_bedrooms
+B25032_001E,total_housing_units
+B25032_002E,total_owner_occupied_housing_units
+B25032_013E,total_renter_occupied_housing_units
+B25070_001E,median_gross_rent_as_percentage_of_household_income
From 367ceb2e5d2c2cb29f541b3e4937ef817b01811c Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 16 Aug 2024 15:12:19 -0400
Subject: [PATCH 052/142] add acs tract wide table
---
dbt/models/acs_tract.sql | 20 +++++++---
dbt/models/acs_tract_clean.sql | 20 ++++++++++
dbt/models/acs_tract_wide.sql | 73 ++++++++++++++++++++++++++++++++++
3 files changed, 108 insertions(+), 5 deletions(-)
create mode 100644 dbt/models/acs_tract_clean.sql
create mode 100644 dbt/models/acs_tract_wide.sql
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index 52c9517c..7482f43a 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -18,14 +18,24 @@ census_tracts as (
from {{ ref("census_tracts") }}
)
+, acs_tract as (
+ select
+ statefp
+ , countyfp
+ , tractce
+ , year_
+ , name_
+ , value_
+ from {{ ref('acs_tract_clean') }}
+)
select
census_tract_id
- , acs_tract_raw.year_
- , acs_tract_raw.name_
- , acs_tract_raw.value_
+ , acs_tract.year_
+ , acs_tract.name_
+ , acs_tract.value_
from
- {{ source('minneapolis_old', 'acs_tract_raw') }}
+ acs_tract
inner join census_tracts
using (statefp, countyfp, tractce)
where
- to_date(acs_tract_raw.year_::text , 'YYYY') <@ census_tracts.valid
+ to_date(acs_tract.year_::text , 'YYYY') <@ census_tracts.valid
diff --git a/dbt/models/acs_tract_clean.sql b/dbt/models/acs_tract_clean.sql
new file mode 100644
index 00000000..bd5638a8
--- /dev/null
+++ b/dbt/models/acs_tract_clean.sql
@@ -0,0 +1,20 @@
+with
+acs_tract_raw as (
+ select
+ statefp
+ , countyfp
+ , tractce
+ , year_
+ , name_
+ , value_
+ from {{ source('minneapolis_old', 'acs_tract_raw') }}
+)
+select
+ statefp
+ , countyfp
+ , tractce
+ , year_
+ , name_
+ , case when value_ < 0 then null else value_ end as value_
+from
+ acs_tract_raw
diff --git a/dbt/models/acs_tract_wide.sql b/dbt/models/acs_tract_wide.sql
new file mode 100644
index 00000000..f83da90f
--- /dev/null
+++ b/dbt/models/acs_tract_wide.sql
@@ -0,0 +1,73 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['geoidfq', 'description']}
+ ]
+ )
+}}
+
+{% set years = range(2013, 2023) %}
+
+with acs_tract as (
+ select
+ census_tract_id
+ , year_
+ , name_
+ , value_
+ from {{ ref('acs_tract') }}
+)
+
+, census_tracts as (
+ select
+ census_tract_id
+ , geoidfq
+ from {{ ref("census_tracts") }}
+)
+
+, acs_variables as (
+ select
+ "variable"
+ , description
+ from {{ ref("acs_variables") }}
+)
+
+, acs_tract_extended as (
+ select
+ acs_tract.census_tract_id
+ , census_tracts.geoidfq
+ , acs_tract.year_
+ , acs_tract.name_
+ , acs_tract.value_
+ from
+ acs_tract
+ inner join census_tracts using (census_tract_id)
+)
+
+, distinct_tracts_and_variables as (
+ select distinct
+ geoidfq
+ , name_
+ from acs_tract_extended
+)
+
+select
+ distinct_tracts_and_variables.geoidfq
+ , acs_variables.description
+{% for year_ in years %}
+ , "{{ year_ }}"
+{% endfor %}
+from
+distinct_tracts_and_variables
+inner join acs_variables
+ on distinct_tracts_and_variables.name_ = acs_variables.variable
+{% for year_ in years %}
+left join
+(select
+ geoidfq
+ , name_
+ , value_ as "{{ year_}}"
+from acs_tract_extended
+where year_ = {{ year_ }})
+using (geoidfq, name_)
+{% endfor %}
From 9ea31f2c62db926663c3a64b8b7070fe953926e2 Mon Sep 17 00:00:00 2001
From: Michelangelo Naim
Date: Fri, 16 Aug 2024 17:25:04 -0400
Subject: [PATCH 053/142] adding file to load from bucket to db
---
load_data_server/load_server.py | 365 ++++++++++++++++++++++++++++++++
1 file changed, 365 insertions(+)
create mode 100644 load_data_server/load_server.py
diff --git a/load_data_server/load_server.py b/load_data_server/load_server.py
new file mode 100644
index 00000000..cbf82881
--- /dev/null
+++ b/load_data_server/load_server.py
@@ -0,0 +1,365 @@
+import os
+import re
+from dotenv import load_dotenv
+import subprocess
+import psycopg2
+from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
+import time
+import logging
+from google.cloud import storage
+import argparse
+from tqdm import tqdm
+
+# Load environment variables
+load_dotenv()
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# DATA INFO
+PROJECT_NAME = os.getenv('GOOGLE_CLOUD_PROJECT')
+BUCKET_NAME = os.getenv('GOOGLE_CLOUD_BUCKET')
+
+# Paths inside the bucket
+FOLDERS = [
+ 'fair_market_rents',
+]
+
+# DATABASE INFO
+SCHEMA = os.getenv('SCHEMA')
+HOST = os.getenv('HOST')
+DATABASE = os.getenv('DATABASE')
+USERNAME = os.getenv('USERNAME')
+PASSWORD = os.getenv('PASSWORD')
+
+OGR2OGR_OPTS = [
+ "--config", "PG_USE_COPY", "YES",
+ "-progress",
+ "-lco", "PRECISION=NO",
+ "-overwrite",
+ "-lco", "GEOMETRY_NAME=geom",
+ "-nlt", "PROMOTE_TO_MULTI",
+]
+DB_OPTS = [f"PG:dbname={DATABASE} host={HOST} user={USERNAME} password={PASSWORD} port=5432"]
+
+MAX_RETRIES = 3
+RETRY_DELAY = 5 # seconds
+
+def get_db_connection():
+ """Create a database connection with retries."""
+ for attempt in range(MAX_RETRIES):
+ try:
+ conn = psycopg2.connect(
+ host=HOST,
+ database=DATABASE,
+ user=USERNAME,
+ password=PASSWORD
+ )
+ conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+ return conn
+ except psycopg2.OperationalError as e:
+ if attempt < MAX_RETRIES - 1:
+ logging.warning(f"Connection attempt {attempt + 1} failed. Retrying in {RETRY_DELAY} seconds...")
+ time.sleep(RETRY_DELAY)
+ else:
+ logging.error(f"Failed to connect to the database after {MAX_RETRIES} attempts: {e}")
+ raise
+
+def create_schema_if_not_exists(conn):
+ """Create the schema if it doesn't exist."""
+ with conn.cursor() as cur:
+ cur.execute(f"SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = %s);", (SCHEMA,))
+ schema_exists = cur.fetchone()[0]
+
+ if not schema_exists:
+ cur.execute(f"CREATE SCHEMA {SCHEMA};")
+ logging.info(f"Schema '{SCHEMA}' created.")
+ else:
+ logging.info(f"Schema '{SCHEMA}' already exists.")
+
+def generate_table_name(blob_name):
+ """Generate a PostgreSQL-friendly table name from the blob name, including all parent folders and removing duplicates."""
+ table_name = os.path.splitext(blob_name)[0]
+ path_components = table_name.split('/')
+
+ # Remove any leading empty components
+ path_components = [comp for comp in path_components if comp]
+
+ table_name = '_'.join(path_components)
+ table_name = table_name.replace('-', '_').replace('.', '_')
+
+ words = table_name.split('_')
+ unique_words = []
+ for word in words:
+ if word.lower() not in (w.lower() for w in unique_words):
+ unique_words.append(word)
+
+ table_name = '_'.join(unique_words)
+ table_name = re.sub('_+', '_', table_name)
+
+ if table_name[0].isdigit():
+ table_name = 'f_' + table_name
+
+ if len(table_name) > 63:
+ table_name = table_name[:63]
+
+ table_name = table_name.rstrip('_')
+
+ return table_name.lower()
+
+def table_exists(conn, table_name):
+ """Check if a table exists in the specified schema."""
+ with conn.cursor() as cur:
+ cur.execute("""
+ SELECT EXISTS (
+ SELECT FROM information_schema.tables
+ WHERE table_schema = %s AND table_name = %s
+ );
+ """, (SCHEMA, table_name))
+ return cur.fetchone()[0]
+
+def drop_table_if_exists(conn, table_name):
+ """Drop the table if it exists."""
+ with conn.cursor() as cur:
+ cur.execute(f"DROP TABLE IF EXISTS {SCHEMA}.{table_name} CASCADE;")
+
+def load_into_server(conn, file_path, file_type):
+ table_name = os.path.splitext(os.path.basename(file_path))[0]
+ full_table_name = f"{SCHEMA}.{table_name}"
+
+ if table_exists(conn, table_name):
+ drop_table_if_exists(conn, table_name)
+
+ # Upload the file based on its type
+ if file_type == 'shp':
+ upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", full_table_name] + DB_OPTS + [file_path]
+ elif file_type == 'geojson':
+ upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-f", "PostgreSQL"] + DB_OPTS + [file_path, "-nln", full_table_name]
+ else:
+ logging.error(f"Unsupported file type: {file_type}")
+ return False
+
+ for attempt in range(MAX_RETRIES):
+ try:
+ subprocess.check_call(upload_command)
+ logging.info(f"Successfully loaded {file_path} into {full_table_name}")
+ return True
+ except subprocess.CalledProcessError as e:
+ if attempt < MAX_RETRIES - 1:
+ logging.warning(f"Attempt {attempt + 1} failed for {file_path}. Retrying in {RETRY_DELAY} seconds...")
+ time.sleep(RETRY_DELAY)
+ else:
+ logging.error(f"Failed to process {file_path} after {MAX_RETRIES} attempts: {e}")
+ return False
+
+def group_shapefile_components(blobs):
+ """Group Shapefile components together."""
+ shapefile_groups = {}
+ for blob in blobs:
+ name, ext = os.path.splitext(blob.name)
+ if ext.lower() in ['.shp', '.shx', '.dbf', '.prj']:
+ if name not in shapefile_groups:
+ shapefile_groups[name] = []
+ shapefile_groups[name].append(blob)
+ return shapefile_groups
+
+def process_geojson(conn, blob):
+ table_name = generate_table_name(blob.name)
+ if table_exists(conn, table_name):
+ return False # Table already exists, skip processing
+
+ full_table_name = f"{SCHEMA}.{table_name}"
+
+ file_path = os.path.join('/tmp', os.path.basename(blob.name))
+ blob.download_to_filename(file_path)
+
+ upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-f", "PostgreSQL"] + DB_OPTS + [file_path, "-nln", full_table_name]
+
+ success = False
+ for attempt in range(MAX_RETRIES):
+ try:
+ subprocess.check_call(upload_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ success = True
+ break
+ except subprocess.CalledProcessError:
+ if attempt < MAX_RETRIES - 1:
+ time.sleep(RETRY_DELAY)
+
+ os.remove(file_path)
+ return success
+
+def process_shapefile(conn, component_blobs):
+ shp_blob = next(blob for blob in component_blobs if blob.name.endswith('.shp'))
+ table_name = generate_table_name(shp_blob.name)
+
+ if table_exists(conn, table_name):
+ return False # Table already exists, skip processing
+
+ temp_dir = os.path.join('/tmp', table_name)
+ os.makedirs(temp_dir, exist_ok=True)
+
+ for blob in component_blobs:
+ file_ext = os.path.splitext(blob.name)[1]
+ file_name = f"{table_name}{file_ext}"
+ file_path = os.path.join(temp_dir, file_name)
+ blob.download_to_filename(file_path)
+
+ shp_file = f"{table_name}.shp"
+ shp_path = os.path.join(temp_dir, shp_file)
+
+ full_table_name = f"{SCHEMA}.{table_name}"
+
+ upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", full_table_name] + DB_OPTS + [shp_path]
+
+ success = False
+ for attempt in range(MAX_RETRIES):
+ try:
+ subprocess.check_call(upload_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ success = True
+ break
+ except subprocess.CalledProcessError:
+ if attempt < MAX_RETRIES - 1:
+ time.sleep(RETRY_DELAY)
+
+ for file in os.listdir(temp_dir):
+ os.remove(os.path.join(temp_dir, file))
+ os.rmdir(temp_dir)
+
+ return success
+
+def load_csv_into_server(conn, file_path, full_table_name):
+ """Load a CSV file into the PostgreSQL server."""
+ try:
+ with open(file_path, 'r') as f:
+ cursor = conn.cursor()
+ # Read and sanitize the header row
+ header = f.readline().strip().split(',')
+ sanitized_header = [re.sub(r'[^a-zA-Z0-9_]', '_', col.strip('"').strip()) for col in header]
+
+ # Ensure column names are unique
+ seen = set()
+ sanitized_header = [col if col not in seen and not seen.add(col) else f"{col}_dup" for col in sanitized_header]
+
+ create_table_sql = f"""
+ CREATE TABLE {full_table_name} (
+ {','.join([f'"{col}" TEXT' for col in sanitized_header])}
+ );
+ """
+ cursor.execute(create_table_sql)
+
+ # Reset file pointer to beginning
+ f.seek(0)
+
+ # Use COPY to load the data into the table
+ cursor.copy_expert(f"COPY {full_table_name} FROM STDIN WITH CSV HEADER", f)
+ conn.commit()
+ return True
+ except Exception as e:
+ print(f"Error loading CSV into {full_table_name}: {e}")
+ conn.rollback()
+ return False
+
+def process_csv(conn, blob):
+ """Process a CSV file from Google Cloud Storage and load it into the database."""
+ # Generate a table name based on the blob name
+ table_name = generate_table_name(blob.name)
+ full_table_name = f"{SCHEMA}.{table_name}"
+
+ # Check if the table already exists
+ if table_exists(conn, table_name):
+ return False # Table already exists, skip processing
+
+ # Download the CSV file to a temporary location
+ temp_file_name = f"temp_{table_name}.csv"
+ temp_file_path = os.path.join('/tmp', temp_file_name)
+ blob.download_to_filename(temp_file_path)
+
+ try:
+ # Load the CSV into the database
+ success = load_csv_into_server(conn, temp_file_path, full_table_name)
+ return success
+ finally:
+ # Clean up the temporary file
+ if os.path.exists(temp_file_path):
+ os.remove(temp_file_path)
+
+def count_processable_files(blobs):
+ """Count the number of files that will be processed."""
+ count = 0
+ shapefile_groups = group_shapefile_components(blobs)
+ for blob in blobs:
+ if blob.name.endswith('.geojson') or blob.name.endswith('.csv'):
+ count += 1
+ elif blob.name.endswith('.shp'):
+ base_name = os.path.splitext(blob.name)[0]
+ if base_name in shapefile_groups:
+ count += 1
+ return count
+
+def process_file(conn, blob, shapefile_groups, processed_shapefiles):
+ """Process a single file and return whether it was processed."""
+ if blob.name.endswith('.geojson'):
+ return process_geojson(conn, blob)
+ elif blob.name.endswith('.shp'):
+ base_name = os.path.splitext(blob.name)[0]
+ if base_name in shapefile_groups and base_name not in processed_shapefiles:
+ success = process_shapefile(conn, shapefile_groups[base_name])
+ if success:
+ processed_shapefiles.add(base_name)
+ return success
+ elif blob.name.endswith('.csv'):
+ return process_csv(conn, blob)
+ return False
+
+def download_and_process_files(bucket, conn, folder_prefix=''):
+ """Download and process files from the specified folder and its subfolders in the GCS bucket."""
+ blobs = list(bucket.list_blobs(prefix=folder_prefix))
+ total_files = count_processable_files(blobs)
+ shapefile_groups = group_shapefile_components(blobs)
+
+ processed_shapefiles = set()
+
+ with tqdm(total=total_files, desc="Processing files", unit="file") as pbar:
+ for blob in blobs:
+ if blob.name.endswith('/'): # This is a folder
+ continue
+ processed = process_file(conn, blob, shapefile_groups, processed_shapefiles)
+ if processed:
+ pbar.update(1)
+ else:
+ pbar.total -= 1
+ pbar.refresh()
+
+def main(process_entire_bucket=False):
+ try:
+ # Initialize Google Cloud Storage client
+ storage_client = storage.Client(project=PROJECT_NAME)
+ bucket = storage_client.bucket(BUCKET_NAME)
+
+ # Connect to the database
+ conn = get_db_connection()
+ create_schema_if_not_exists(conn)
+
+ if process_entire_bucket:
+ print("Processing entire bucket")
+ download_and_process_files(bucket, conn)
+ else:
+ # Process files in the specified folders
+ for folder in FOLDERS:
+ print(f"Processing folder: {folder}")
+ download_and_process_files(bucket, conn, folder)
+
+ print("Processing completed successfully.")
+
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ finally:
+ if 'conn' in locals() and conn:
+ conn.close()
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Process files from Google Cloud Storage bucket")
+ parser.add_argument('--full-bucket', action='store_true', help='Process the entire bucket instead of specific folders')
+ args = parser.parse_args()
+
+ main(process_entire_bucket=args.full_bucket)
\ No newline at end of file
From 889d8cec21deeb809f91eb8cd3d7deaf27eea6a0 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 16 Aug 2024 18:12:46 -0400
Subject: [PATCH 054/142] filter tracts in wide table using city boundary
---
api/postgrest.conf | 4 +--
api/schema.sql | 25 +++----------------
dbt/models/acs_tract_wide.sql | 15 ++++++++---
dbt/models/census_tracts_in_city_boundary.sql | 18 +++++++++++++
4 files changed, 34 insertions(+), 28 deletions(-)
create mode 100644 dbt/models/census_tracts_in_city_boundary.sql
diff --git a/api/postgrest.conf b/api/postgrest.conf
index 097fba01..ddb71965 100644
--- a/api/postgrest.conf
+++ b/api/postgrest.conf
@@ -93,10 +93,10 @@ openapi-server-proxy-uri = ""
# server-cors-allowed-origins = ""
server-host = "!4"
-server-port = 3000
+server-port = 3001
## Allow getting the request-response timing information through the `Server-Timing` header
-server-timing-enabled = false
+server-timing-enabled = true
## Unix socket location
## if specified it takes precedence over server-port
diff --git a/api/schema.sql b/api/schema.sql
index 7167ade7..8578cdbf 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -2,28 +2,9 @@ drop schema if exists api cascade;
create schema api;
-create view api.parcels as (
- select * from dbt.parcels
-);
-
-create view api.census_tracts as (
- select * from dbt.census_tracts
-);
-
-create view api.census_block_groups as (
- select * from dbt.census_block_groups
-);
-
-create view api.zip_codes as (
- select * from dbt.zip_codes
-);
-
-create view api.emv_in_downtown_west as (
- select dbt.parcels.pin, dbt.parcels.emv_land
- from dbt.parcels
- inner join dbt.neighborhoods
- on st_intersects(st_transform(dbt.parcels.geom, 3857), dbt.neighborhoods.geom)
- where dbt.neighborhoods.name_ = 'Downtown West'
+create view api.acs_tract_wide as (
+ select * from dbt.acs_tract_wide
+ order by random()
);
drop role if exists web_anon;
diff --git a/dbt/models/acs_tract_wide.sql b/dbt/models/acs_tract_wide.sql
index f83da90f..434d2d9e 100644
--- a/dbt/models/acs_tract_wide.sql
+++ b/dbt/models/acs_tract_wide.sql
@@ -2,7 +2,7 @@
config(
materialized='table',
indexes = [
- {'columns': ['geoidfq', 'description']}
+ {'columns': ['description']}
]
)
}}
@@ -18,11 +18,18 @@ with acs_tract as (
from {{ ref('acs_tract') }}
)
+, census_tracts_in_city_boundary as (
+ select
+ census_tract_id
+ from {{ ref("census_tracts_in_city_boundary") }}
+)
+
, census_tracts as (
select
census_tract_id
- , geoidfq
+ , substring(geoidfq from 10) as geoidfq
from {{ ref("census_tracts") }}
+ where census_tract_id in (select census_tract_id from census_tracts_in_city_boundary)
)
, acs_variables as (
@@ -52,8 +59,8 @@ with acs_tract as (
)
select
- distinct_tracts_and_variables.geoidfq
- , acs_variables.description
+ acs_variables.description
+ , distinct_tracts_and_variables.geoidfq as tract_id
{% for year_ in years %}
, "{{ year_ }}"
{% endfor %}
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
new file mode 100644
index 00000000..6f6febbe
--- /dev/null
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -0,0 +1,18 @@
+with census_tracts as (
+ select
+ census_tract_id
+ , geom
+ from {{ ref('census_tracts') }}
+)
+, city_boundary as (
+ select
+ st_transform(geom, 4269) as geom
+ from {{ ref('city_boundary') }}
+)
+select
+ census_tracts.census_tract_id
+from
+ census_tracts
+ , city_boundary
+where st_intersects(census_tracts.geom, city_boundary.geom)
+ and st_area(st_intersection(census_tracts.geom, city_boundary.geom)) / st_area(census_tracts.geom) > 0.2
From ed6c7e6e7ae07d2c4c8486f1fea02f647fe2c19d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 16:26:15 -0400
Subject: [PATCH 055/142] convert all geometry to standardized srid
---
dbt/dbt_project.yml | 1 +
dbt/models/all_zip_codes_2010.sql | 2 +-
dbt/models/all_zip_codes_2020.sql | 2 +-
dbt/models/census_block_groups.sql | 2 +-
dbt/models/census_tracts.sql | 2 +-
dbt/models/census_tracts_in_city_boundary.sql | 2 +-
dbt/models/city_boundary.sql | 2 +-
dbt/models/commercial_permits.sql | 2 +-
dbt/models/neighborhoods.sql | 2 +-
dbt/models/parcels_base.sql | 2 +-
dbt/models/parcels_to_census_block_groups.sql | 2 +-
dbt/models/parcels_to_zip_codes.sql | 2 +-
dbt/models/parking.sql | 2 +-
dbt/models/parking_to_parcels.sql | 2 +-
dbt/models/residential_permits.sql | 2 +-
dbt/models/zip_codes.sql | 2 +-
16 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml
index e4b65a64..34355ccf 100644
--- a/dbt/dbt_project.yml
+++ b/dbt/dbt_project.yml
@@ -24,5 +24,6 @@ clean-targets: # directories to be removed by `dbt clean`
vars:
+ srid: 26915 # use UTM zone 15N for all geometric data. note, this must have meters as the unit of measure
# years for which we have census tract/block group data
census_years: [2010, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]
diff --git a/dbt/models/all_zip_codes_2010.sql b/dbt/models/all_zip_codes_2010.sql
index 8cdafd23..e6f2c5c5 100644
--- a/dbt/models/all_zip_codes_2010.sql
+++ b/dbt/models/all_zip_codes_2010.sql
@@ -1,5 +1,5 @@
select
zcta5ce10 as zip_code,
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'zip_codes_tl_2020_us_zcta510') }}
diff --git a/dbt/models/all_zip_codes_2020.sql b/dbt/models/all_zip_codes_2020.sql
index aee015ae..9a9a77b0 100644
--- a/dbt/models/all_zip_codes_2020.sql
+++ b/dbt/models/all_zip_codes_2020.sql
@@ -1,4 +1,4 @@
select
zcta5ce20 as zip_code,
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta520') }}
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index a52c2f47..d3d8ac72 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -37,7 +37,7 @@ census_block_groups as (
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{ year_ }}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
{% endif %}
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_bg_500k') }}
{% if not loop.last %}union all{% endif %}
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 634e18ac..1119140c 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -14,7 +14,7 @@ select
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
{% endif %}
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_tract_500k') }}
{% if not loop.last %}union all{% endif %}
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index 6f6febbe..51e1d4e2 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -6,7 +6,7 @@ with census_tracts as (
)
, city_boundary as (
select
- st_transform(geom, 4269) as geom
+ geom
from {{ ref('city_boundary') }}
)
select
diff --git a/dbt/models/city_boundary.sql b/dbt/models/city_boundary.sql
index b34a22ec..1b7fc755 100644
--- a/dbt/models/city_boundary.sql
+++ b/dbt/models/city_boundary.sql
@@ -1,4 +1,4 @@
select
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'city_boundary_minneapolis') }}
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index 349b6c8e..b51cb23d 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -20,7 +20,7 @@ select
, permit_val as permit_value
, sqf as square_feet
, address
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'commercial_permits_nonresidentialconstruction') }}
where
diff --git a/dbt/models/neighborhoods.sql b/dbt/models/neighborhoods.sql
index b031cf08..bd3da714 100644
--- a/dbt/models/neighborhoods.sql
+++ b/dbt/models/neighborhoods.sql
@@ -1,6 +1,6 @@
select
bdnum as neighborhood_id
, bdname as name_
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'neighborhoods_minneapolis') }}
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 8fb2f2dd..6fb778f1 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -40,5 +40,5 @@ select
, year_built
, sale_date
, sale_value
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from parcels
diff --git a/dbt/models/parcels_to_census_block_groups.sql b/dbt/models/parcels_to_census_block_groups.sql
index 07caa1fb..bb6cc212 100644
--- a/dbt/models/parcels_to_census_block_groups.sql
+++ b/dbt/models/parcels_to_census_block_groups.sql
@@ -13,7 +13,7 @@ parcels as (
select
parcel_id as id
, valid
- , ST_Transform(geom, 4269) as geom
+ , geom
from {{ ref("parcels_base") }}
),
census_block_groups as (
diff --git a/dbt/models/parcels_to_zip_codes.sql b/dbt/models/parcels_to_zip_codes.sql
index 2519888a..6a045300 100644
--- a/dbt/models/parcels_to_zip_codes.sql
+++ b/dbt/models/parcels_to_zip_codes.sql
@@ -13,7 +13,7 @@ parcels as (
select
parcel_id as id
, valid
- , ST_Transform(geom, 4269) as geom
+ , geom
from {{ ref("parcels_base") }}
),
zip_codes as (
diff --git a/dbt/models/parking.sql b/dbt/models/parking.sql
index 6f4e6cdb..cd0b874e 100644
--- a/dbt/models/parking.sql
+++ b/dbt/models/parking.sql
@@ -26,5 +26,5 @@ select
, "housing un" as num_housing_units
, "car parkin" as num_car_parking_spaces
, "bike parki" as num_bike_parking_spaces
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from parking_raw
diff --git a/dbt/models/parking_to_parcels.sql b/dbt/models/parking_to_parcels.sql
index 21c20edc..7eb1c755 100644
--- a/dbt/models/parking_to_parcels.sql
+++ b/dbt/models/parking_to_parcels.sql
@@ -13,7 +13,7 @@ with
select
parking_id as id
, daterange(date_, date_, '[]') as valid
- , ST_Transform(geom, 26915) as geom
+ , geom
from {{ ref('parking') }}
)
, parcels as (
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 95ee9a9d..35a68113 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -27,7 +27,7 @@ select
, permit_val as permit_value
, community_ as community_designation
, notes
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'residential_permits_residentialpermits') }}
where
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 17eac722..346b6b82 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -10,7 +10,7 @@
with city_boundary as (
select
- st_transform(geom, 4269) as geom
+ geom
from
{{ ref('city_boundary') }}
)
From b26754c0a47e5c63439b1133a17718b4019c5744 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 16:26:44 -0400
Subject: [PATCH 056/142] include all zip codes that intersect the city
boundary
---
dbt/models/zip_codes.sql | 1 -
1 file changed, 1 deletion(-)
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 346b6b82..77d9ddd3 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -24,4 +24,3 @@ from
city_boundary
where
st_intersects(all_zip_codes.geom, city_boundary.geom)
- and st_area(st_intersection(all_zip_codes.geom, city_boundary.geom)) / st_area(all_zip_codes.geom) > 0.2
From 8159ba108d15680a5bf23d074d5f62fb4b49cbdc Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 16:26:44 -0400
Subject: [PATCH 057/142] include all zip codes that intersect the city
boundary
---
dbt/models/zip_codes.sql | 1 -
1 file changed, 1 deletion(-)
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 17eac722..6958629e 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -24,4 +24,3 @@ from
city_boundary
where
st_intersects(all_zip_codes.geom, city_boundary.geom)
- and st_area(st_intersection(all_zip_codes.geom, city_boundary.geom)) / st_area(all_zip_codes.geom) > 0.2
From 8c96c52e8acf8820c31f402e380c047fdaff0ce2 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 16:26:15 -0400
Subject: [PATCH 058/142] convert all geometry to standardized srid
---
dbt/dbt_project.yml | 1 +
dbt/models/all_zip_codes_2010.sql | 2 +-
dbt/models/all_zip_codes_2020.sql | 2 +-
dbt/models/census_block_groups.sql | 2 +-
dbt/models/census_tracts.sql | 2 +-
dbt/models/city_boundary.sql | 2 +-
dbt/models/commercial_permits.sql | 2 +-
dbt/models/neighborhoods.sql | 2 +-
dbt/models/parcels_base.sql | 2 +-
dbt/models/parcels_to_census_block_groups.sql | 2 +-
dbt/models/parcels_to_zip_codes.sql | 2 +-
dbt/models/parking.sql | 2 +-
dbt/models/parking_to_parcels.sql | 2 +-
dbt/models/residential_permits.sql | 2 +-
dbt/models/zip_codes.sql | 2 +-
15 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml
index e4b65a64..34355ccf 100644
--- a/dbt/dbt_project.yml
+++ b/dbt/dbt_project.yml
@@ -24,5 +24,6 @@ clean-targets: # directories to be removed by `dbt clean`
vars:
+ srid: 26915 # use UTM zone 15N for all geometric data. note, this must have meters as the unit of measure
# years for which we have census tract/block group data
census_years: [2010, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]
diff --git a/dbt/models/all_zip_codes_2010.sql b/dbt/models/all_zip_codes_2010.sql
index 8cdafd23..e6f2c5c5 100644
--- a/dbt/models/all_zip_codes_2010.sql
+++ b/dbt/models/all_zip_codes_2010.sql
@@ -1,5 +1,5 @@
select
zcta5ce10 as zip_code,
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'zip_codes_tl_2020_us_zcta510') }}
diff --git a/dbt/models/all_zip_codes_2020.sql b/dbt/models/all_zip_codes_2020.sql
index aee015ae..9a9a77b0 100644
--- a/dbt/models/all_zip_codes_2020.sql
+++ b/dbt/models/all_zip_codes_2020.sql
@@ -1,4 +1,4 @@
select
zcta5ce20 as zip_code,
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta520') }}
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index a52c2f47..d3d8ac72 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -37,7 +37,7 @@ census_block_groups as (
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{ year_ }}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
{% endif %}
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_bg_500k') }}
{% if not loop.last %}union all{% endif %}
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 634e18ac..1119140c 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -14,7 +14,7 @@ select
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
{% endif %}
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_tract_500k') }}
{% if not loop.last %}union all{% endif %}
diff --git a/dbt/models/city_boundary.sql b/dbt/models/city_boundary.sql
index b34a22ec..1b7fc755 100644
--- a/dbt/models/city_boundary.sql
+++ b/dbt/models/city_boundary.sql
@@ -1,4 +1,4 @@
select
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'city_boundary_minneapolis') }}
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index 349b6c8e..b51cb23d 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -20,7 +20,7 @@ select
, permit_val as permit_value
, sqf as square_feet
, address
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'commercial_permits_nonresidentialconstruction') }}
where
diff --git a/dbt/models/neighborhoods.sql b/dbt/models/neighborhoods.sql
index b031cf08..bd3da714 100644
--- a/dbt/models/neighborhoods.sql
+++ b/dbt/models/neighborhoods.sql
@@ -1,6 +1,6 @@
select
bdnum as neighborhood_id
, bdname as name_
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'neighborhoods_minneapolis') }}
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 8fb2f2dd..6fb778f1 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -40,5 +40,5 @@ select
, year_built
, sale_date
, sale_value
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from parcels
diff --git a/dbt/models/parcels_to_census_block_groups.sql b/dbt/models/parcels_to_census_block_groups.sql
index 07caa1fb..bb6cc212 100644
--- a/dbt/models/parcels_to_census_block_groups.sql
+++ b/dbt/models/parcels_to_census_block_groups.sql
@@ -13,7 +13,7 @@ parcels as (
select
parcel_id as id
, valid
- , ST_Transform(geom, 4269) as geom
+ , geom
from {{ ref("parcels_base") }}
),
census_block_groups as (
diff --git a/dbt/models/parcels_to_zip_codes.sql b/dbt/models/parcels_to_zip_codes.sql
index 2519888a..6a045300 100644
--- a/dbt/models/parcels_to_zip_codes.sql
+++ b/dbt/models/parcels_to_zip_codes.sql
@@ -13,7 +13,7 @@ parcels as (
select
parcel_id as id
, valid
- , ST_Transform(geom, 4269) as geom
+ , geom
from {{ ref("parcels_base") }}
),
zip_codes as (
diff --git a/dbt/models/parking.sql b/dbt/models/parking.sql
index 6f4e6cdb..cd0b874e 100644
--- a/dbt/models/parking.sql
+++ b/dbt/models/parking.sql
@@ -26,5 +26,5 @@ select
, "housing un" as num_housing_units
, "car parkin" as num_car_parking_spaces
, "bike parki" as num_bike_parking_spaces
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from parking_raw
diff --git a/dbt/models/parking_to_parcels.sql b/dbt/models/parking_to_parcels.sql
index 21c20edc..7eb1c755 100644
--- a/dbt/models/parking_to_parcels.sql
+++ b/dbt/models/parking_to_parcels.sql
@@ -13,7 +13,7 @@ with
select
parking_id as id
, daterange(date_, date_, '[]') as valid
- , ST_Transform(geom, 26915) as geom
+ , geom
from {{ ref('parking') }}
)
, parcels as (
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 95ee9a9d..35a68113 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -27,7 +27,7 @@ select
, permit_val as permit_value
, community_ as community_designation
, notes
- , geom
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'residential_permits_residentialpermits') }}
where
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 6958629e..77d9ddd3 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -10,7 +10,7 @@
with city_boundary as (
select
- st_transform(geom, 4269) as geom
+ geom
from
{{ ref('city_boundary') }}
)
From 98437a0930e3238bea55773f13b2e092dbec57c8 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 16:31:48 -0400
Subject: [PATCH 059/142] add downtown, university, and transit lines
---
dbt/models/downtown.sql | 4 +++
dbt/models/high_frequency_transit_lines.sql | 13 ++++++++++
.../high_frequency_transit_lines_union.sql | 25 +++++++++++++++++++
dbt/models/university.sql | 4 +++
4 files changed, 46 insertions(+)
create mode 100644 dbt/models/downtown.sql
create mode 100644 dbt/models/high_frequency_transit_lines.sql
create mode 100644 dbt/models/high_frequency_transit_lines_union.sql
create mode 100644 dbt/models/university.sql
diff --git a/dbt/models/downtown.sql b/dbt/models/downtown.sql
new file mode 100644
index 00000000..5514d39e
--- /dev/null
+++ b/dbt/models/downtown.sql
@@ -0,0 +1,4 @@
+select
+ st_transform(geom, {{ var("srid") }}) as geom
+from
+ {{ source('minneapolis', 'downtown') }}
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
new file mode 100644
index 00000000..459e7400
--- /dev/null
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -0,0 +1,13 @@
+with lines as (
+ select
+ line_id
+ , year_
+ , geom
+ from {{ ref('high_frequency_transit_lines_union') }}
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['line_id', 'year_']) }} as line_id
+ , year_
+ , geom
+from
+ lines
diff --git a/dbt/models/high_frequency_transit_lines_union.sql b/dbt/models/high_frequency_transit_lines_union.sql
new file mode 100644
index 00000000..8f4eedd2
--- /dev/null
+++ b/dbt/models/high_frequency_transit_lines_union.sql
@@ -0,0 +1,25 @@
+with lines_2015 as (
+ select
+ ogc_fid as line_id,
+ st_transform(geom, {{ var("srid") }}) as geom
+ from
+ {{ source('minneapolis', 'high_frequency_transit_2015_freq_lines') }}
+)
+, lines_2016 as (
+ select
+ ogc_fid as line_id,
+ st_transform(geom, {{ var("srid") }}) as geom
+ from
+ {{ source('minneapolis', 'high_frequency_transit_2016_freq_lines') }}
+)
+select
+ 2015 as year_,
+ line_id,
+ geom
+from lines_2015
+union all
+select
+ 2016 as year_,
+ line_id,
+ geom
+from lines_2016
diff --git a/dbt/models/university.sql b/dbt/models/university.sql
new file mode 100644
index 00000000..6ae78ad1
--- /dev/null
+++ b/dbt/models/university.sql
@@ -0,0 +1,4 @@
+select
+ st_transform(geom, {{ var("srid") }}) as geom
+from
+ {{ source('minneapolis', 'university') }}
From bc1217d3951b6a2e1c9924c4ff7bdf088f4549ef Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 18:00:52 -0400
Subject: [PATCH 060/142] add missing primary keys to allow for qgis viz
---
dbt/models/city_boundary.sql | 3 ++-
dbt/models/downtown.sql | 3 ++-
dbt/models/high_frequency_transit_lines.sql | 17 +++++++++++++----
.../high_frequency_transit_lines_union.sql | 8 ++------
dbt/models/residential_permits.sql | 2 +-
dbt/models/university.sql | 3 ++-
6 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/dbt/models/city_boundary.sql b/dbt/models/city_boundary.sql
index 1b7fc755..88af8782 100644
--- a/dbt/models/city_boundary.sql
+++ b/dbt/models/city_boundary.sql
@@ -1,4 +1,5 @@
select
- st_transform(geom, {{ var("srid") }}) as geom
+ ogc_id as city_boundary_id
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'city_boundary_minneapolis') }}
diff --git a/dbt/models/downtown.sql b/dbt/models/downtown.sql
index 5514d39e..dc3e09cd 100644
--- a/dbt/models/downtown.sql
+++ b/dbt/models/downtown.sql
@@ -1,4 +1,5 @@
select
- st_transform(geom, {{ var("srid") }}) as geom
+ ogc_fid as downtown_id
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'downtown') }}
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
index 459e7400..af4c344c 100644
--- a/dbt/models/high_frequency_transit_lines.sql
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -1,13 +1,22 @@
with lines as (
select
- line_id
- , year_
+ year_
, geom
from {{ ref('high_frequency_transit_lines_union') }}
)
+, stops as (
+ select
+ year_
+ , geom
+ from {{ ref('high_frequency_transit_stops') }}
+)
select
- {{ dbt_utils.generate_surrogate_key(['line_id', 'year_']) }} as line_id
+ year_ as high_frequency_transit_lines_id
, year_
- , geom
+ , lines.geom
+ -- note units are in meters
+ , st_buffer(lines.geom, 106.7) as blue_zone_geom -- 350 feet
+ , st_union(st_buffer(lines.geom, 402.3), st_buffer(stops.geom, 804.7)) as yellow_zone_geom -- quarter mile around lines and half mile around stops
from
lines
+ inner join stops using (year_)
diff --git a/dbt/models/high_frequency_transit_lines_union.sql b/dbt/models/high_frequency_transit_lines_union.sql
index 8f4eedd2..073ec9a1 100644
--- a/dbt/models/high_frequency_transit_lines_union.sql
+++ b/dbt/models/high_frequency_transit_lines_union.sql
@@ -1,25 +1,21 @@
with lines_2015 as (
select
- ogc_fid as line_id,
- st_transform(geom, {{ var("srid") }}) as geom
+ st_union(st_transform(geom, {{ var("srid") }})) as geom
from
{{ source('minneapolis', 'high_frequency_transit_2015_freq_lines') }}
)
, lines_2016 as (
select
- ogc_fid as line_id,
- st_transform(geom, {{ var("srid") }}) as geom
+ st_union(st_transform(geom, {{ var("srid") }})) as geom
from
{{ source('minneapolis', 'high_frequency_transit_2016_freq_lines') }}
)
select
2015 as year_,
- line_id,
geom
from lines_2015
union all
select
2016 as year_,
- line_id,
geom
from lines_2016
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 35a68113..c4fb4267 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -9,7 +9,7 @@
}}
select
- sde_id as residential_permit_id
+ sde_id::int as residential_permit_id
, year::int as year_
, tenure
, housing_ty as housing_type
diff --git a/dbt/models/university.sql b/dbt/models/university.sql
index 6ae78ad1..7c6b4309 100644
--- a/dbt/models/university.sql
+++ b/dbt/models/university.sql
@@ -1,4 +1,5 @@
select
- st_transform(geom, {{ var("srid") }}) as geom
+ ogc_fid as university_id
+ , st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'university') }}
From b177443c0abe39ce78fe1b845f9048d58b2585e4 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 18:01:14 -0400
Subject: [PATCH 061/142] add high frequency transit stops model
---
dbt/models/high_frequency_transit_stops.sql | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
create mode 100644 dbt/models/high_frequency_transit_stops.sql
diff --git a/dbt/models/high_frequency_transit_stops.sql b/dbt/models/high_frequency_transit_stops.sql
new file mode 100644
index 00000000..b751153f
--- /dev/null
+++ b/dbt/models/high_frequency_transit_stops.sql
@@ -0,0 +1,21 @@
+with stops_2015 as (
+ select
+ 2015 as year_
+ , st_union(st_transform(geom, {{ var("srid") }}))::geometry(multipoint, {{ var("srid") }}) as geom
+ from {{ source('minneapolis', 'high_frequency_transit_2015_freq_rail_stops') }}
+)
+, stops_2016 as ( -- stops are unchanged in 2016
+ select
+ 2016 as year_
+ , geom
+ from stops_2015
+)
+select
+ year_
+ , geom
+from stops_2015
+union all
+select
+ year_
+ , geom
+from stops_2016
From 190cbebb1161e4ff2b20147c7ebba30dbd51c979 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 19 Aug 2024 18:07:14 -0400
Subject: [PATCH 062/142] remove version 1
---
etl/acs.sql | 27 ----
etl/acs_schema.sql | 50 -------
etl/census_schema.sql | 91 ------------
etl/db.py | 2 -
etl/fair_market_rents.sql | 63 --------
etl/fair_market_rents_schema.sql | 12 --
etl/load_acs_raw.py | 168 ---------------------
etl/load_fair_market_rents_raw.py | 94 ------------
etl/load_parcels.py | 54 -------
etl/load_raw_shapes.py | 90 ------------
etl/load_usps_migration_raw.py | 64 --------
etl/load_zip.py | 22 ---
etl/parcel_schema.sql | 36 -----
etl/parcel_to_bg.sql | 118 ---------------
etl/parcel_to_zip.sql | 118 ---------------
etl/permit_schema.sql | 236 ------------------------------
etl/property_values.sql | 11 --
etl/real_estate_transactions.sql | 73 ---------
etl/segregation.sql | 92 ------------
etl/usps_migration.sql | 156 --------------------
etl/usps_migration_raw_schema.sql | 22 ---
etl/zip_schema.sql | 25 ----
22 files changed, 1624 deletions(-)
delete mode 100644 etl/acs.sql
delete mode 100644 etl/acs_schema.sql
delete mode 100644 etl/census_schema.sql
delete mode 100644 etl/db.py
delete mode 100644 etl/fair_market_rents.sql
delete mode 100644 etl/fair_market_rents_schema.sql
delete mode 100644 etl/load_acs_raw.py
delete mode 100644 etl/load_fair_market_rents_raw.py
delete mode 100644 etl/load_parcels.py
delete mode 100644 etl/load_raw_shapes.py
delete mode 100644 etl/load_usps_migration_raw.py
delete mode 100644 etl/load_zip.py
delete mode 100644 etl/parcel_schema.sql
delete mode 100644 etl/parcel_to_bg.sql
delete mode 100644 etl/parcel_to_zip.sql
delete mode 100644 etl/permit_schema.sql
delete mode 100644 etl/property_values.sql
delete mode 100644 etl/real_estate_transactions.sql
delete mode 100644 etl/segregation.sql
delete mode 100644 etl/usps_migration.sql
delete mode 100644 etl/usps_migration_raw_schema.sql
delete mode 100644 etl/zip_schema.sql
diff --git a/etl/acs.sql b/etl/acs.sql
deleted file mode 100644
index 4026036e..00000000
--- a/etl/acs.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-insert into acs_tract
-select
- id
- , year_
- , name_
- , value_
-from
- acs_tract_raw as t1
- join census_tract as t2 on t1.statefp = t2.statefp
- and t1.countyfp = t2.countyfp
- and t1.tractce = t2.tractce
- and to_date(t1.year_::text , 'YYYY') <@ t2.valid;
-
-insert into acs_bg
-select
- id
- , year_
- , name_
- , value_
-from
- acs_bg_raw as t1
- join census_bg as t2 on t1.statefp = t2.statefp
- and t1.countyfp = t2.countyfp
- and t1.tractce = t2.tractce
- and t1.blkgrpce = t2.blkgrpce
- and to_date(t1.year_::text , 'YYYY') <@ t2.valid;
-
diff --git a/etl/acs_schema.sql b/etl/acs_schema.sql
deleted file mode 100644
index 8acb6088..00000000
--- a/etl/acs_schema.sql
+++ /dev/null
@@ -1,50 +0,0 @@
-drop table if exists acs_variable cascade;
-
-create table acs_variable (
- name_ text primary key
- , description text not null
-);
-
-drop table if exists acs_tract_raw cascade;
-
-create table acs_tract_raw (
- statefp text
- , countyfp text
- , tractce text
- , year_ int
- , name_ text
- , value_ numeric
-);
-
-drop table if exists acs_bg_raw cascade;
-
-create table acs_bg_raw (
- statefp text
- , countyfp text
- , tractce text
- , blkgrpce text
- , year_ int
- , name_ text
- , value_ numeric
-);
-
-drop table if exists acs_tract cascade;
-
-create table acs_tract (
- id int references census_tract (id)
- , year_ int not null
- , name_ text references acs_variable (name_)
- , value_ numeric
- , primary key (id , year_ , name_)
-);
-
-drop table if exists acs_bg cascade;
-
-create table acs_bg (
- id int references census_bg (id)
- , year_ int not null
- , name_ text references acs_variable (name_)
- , value_ numeric
- , primary key (id , year_ , name_)
-);
-
diff --git a/etl/census_schema.sql b/etl/census_schema.sql
deleted file mode 100644
index b90b5a90..00000000
--- a/etl/census_schema.sql
+++ /dev/null
@@ -1,91 +0,0 @@
-drop table if exists census_tract cascade;
-
-create table census_tract (
- id serial primary key
- , statefp text not null
- , countyfp text not null
- , tractce text not null
- , geoidfq text not null
- , valid daterange not null
- , geom geometry(MultiPolygon , 4269) not null
-);
-
-create index census_tract_geom_idx on census_tract using gist (geom);
-
-create index census_tract_valid_idx on census_tract using gist (valid);
-
-insert into census_tract (statefp , countyfp , tractce , geoidfq , valid , geom)
-select
- statefp
- , countyfp
- , tractce
- , affgeoid
- , '[2010-01-01,2020-01-01)'::daterange
- , geom
-from
- cb_2018_27_tract_500k
-union all
-select
- statefp
- , countyfp
- , tractce
- , geoidfq
- , '[2020-01-01,2030-01-01)'::daterange
- , geom
-from
- cb_2023_27_tract_500k;
-
-drop table if exists census_bg cascade;
-
-create table census_bg (
- id serial primary key
- , statefp text not null
- , countyfp text not null
- , tractce text not null
- , blkgrpce text not null
- , geoidfq text not null
- , tract_id int references census_tract (id)
- , valid daterange not null
- , geom geometry(MultiPolygon , 4269) not null
-);
-
-create index census_bg_geom_idx on census_bg using gist (geom);
-
-create index census_bg_valid_idx on census_bg using gist (valid);
-
-insert into census_bg (statefp , countyfp , tractce , blkgrpce , geoidfq , tract_id , valid , geom)
-select
- statefp
- , countyfp
- , tractce
- , blkgrpce
- , bg.geoidfq
- , census_tract.id
- , bg.valid
- , bg.geom
-from (
- select
- statefp
- , countyfp
- , tractce
- , blkgrpce
- , affgeoid as geoidfq
- , '[2010-01-01,2020-01-01)'::daterange as valid
- , geom
- from
- cb_2018_27_bg_500k
- union all
- select
- statefp
- , countyfp
- , tractce
- , blkgrpce
- , geoidfq
- , '[2020-01-01,2030-01-01)'::daterange as valid
- , geom
- from
- cb_2023_27_bg_500k) as bg
- join census_tract using (statefp , countyfp , tractce)
-where
- census_tract.valid && bg.valid;
-
diff --git a/etl/db.py b/etl/db.py
deleted file mode 100644
index acaa0053..00000000
--- a/etl/db.py
+++ /dev/null
@@ -1,2 +0,0 @@
-HOST = "34.123.100.76"
-USER = "postgres"
diff --git a/etl/fair_market_rents.sql b/etl/fair_market_rents.sql
deleted file mode 100644
index d2eb3137..00000000
--- a/etl/fair_market_rents.sql
+++ /dev/null
@@ -1,63 +0,0 @@
-drop table if exists fair_market_rents cascade;
-
-create table fair_market_rents (
- zip_id int references zip_code (id)
- , rent numeric
- , num_bedrooms int
- , year_ int
-);
-
-insert into fair_market_rents (zip_id , rent , num_bedrooms , year_)
-with fmr_zip as (
- select
- zip_code.id as zip_id
- , rent_br0
- , rent_br1
- , rent_br2
- , rent_br3
- , rent_br4
- , year_
- from
- fair_market_rents_raw
- join zip_code on zip_code.zip_code = fair_market_rents_raw.zip
- and zip_code.valid @> to_date(year_::text , 'YYYY'))
- select
- zip_id
- , rent_br0
- , 0
- , year_
- from
- fmr_zip
- union
- select
- zip_id
- , rent_br1
- , 1
- , year_
- from
- fmr_zip
- union
- select
- zip_id
- , rent_br2
- , 2
- , year_
- from
- fmr_zip
- union
- select
- zip_id
- , rent_br3
- , 3
- , year_
- from
- fmr_zip
- union
- select
- zip_id
- , rent_br4
- , 4
- , year_
- from
- fmr_zip;
-
diff --git a/etl/fair_market_rents_schema.sql b/etl/fair_market_rents_schema.sql
deleted file mode 100644
index 4fd2ac52..00000000
--- a/etl/fair_market_rents_schema.sql
+++ /dev/null
@@ -1,12 +0,0 @@
-drop table if exists fair_market_rents_raw cascade;
-
-create table fair_market_rents_raw (
- zip text
- , rent_br0 numeric
- , rent_br1 numeric
- , rent_br2 numeric
- , rent_br3 numeric
- , rent_br4 numeric
- , year_ int
-);
-
diff --git a/etl/load_acs_raw.py b/etl/load_acs_raw.py
deleted file mode 100644
index 4ed52239..00000000
--- a/etl/load_acs_raw.py
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env python
-
-import logging
-import os
-import psycopg2
-
-from db import HOST, USER
-
-log = logging.getLogger(__name__)
-
-YEAR_RANGE = range(2013, 2023)
-ACS_CODES = {
- "B03002_003E": "population_white_non_hispanic",
- "B03002_004E": "population_black_non_hispanic",
- "B03002_005E": "population_asian_non_hispanic",
- "B03002_006E": "population_native_hawaiian_or_pacific_islander_non_hispanic",
- "B03002_007E": "population_american_indian_or_alaska_native_non_hispanic",
- "B03002_008E": "population_other_non_hispanic",
- "B03002_009E": "population_multiple_races_non_hispanic",
- "B03002_010E": "population_multiple_races_and_other_non_hispanic",
- "B07204_001E": "geographic_mobility_total_responses",
- "B07204_002E": "geographic_mobility_same_house_1_year_ago",
- "B07204_004E": "geographic_mobility_different_house_1_year_ago_same_city",
- "B07204_005E": "geographic_mobility_different_house_1_year_ago_same_county",
- "B07204_006E": "geographic_mobility_different_house_1_year_ago_same_state",
- "B07204_007E": "geographic_mobility_different_house_1_year_ago_same_country",
- "B07204_016E": "geographic_mobility_different_house_1_year_ago_abroad",
- "B01003_001E": "population",
- "B02001_002E": "white",
- "B02001_003E": "black",
- "B02001_004E": "american_indian_or_alaska_native",
- "B02001_005E": "asian",
- "B02001_006E": "native_hawaiian_or_pacific_islander",
- "B03001_003E": "population_hispanic_or_latino",
- "B02001_007E": "other_race",
- "B02001_008E": "multiple_races",
- "B02001_009E": "multiple_races_and_other_race",
- "B02001_010E": "two_or_more_races_excluding_other",
- "B02015_002E": "east_asian_chinese",
- "B02015_003E": "east_asian_hmong",
- "B02015_004E": "east_asian_japanese",
- "B02015_005E": "east_asian_korean",
- "B02015_006E": "east_asian_mongolian",
- "B02015_007E": "east_asian_okinawan",
- "B02015_008E": "east_asian_taiwanese",
- "B02015_009E": "east_asian_other",
- "B02015_010E": "southeast_asian_burmese",
- "B02015_011E": "southeast_asian_cambodian",
- "B02015_012E": "southeast_asian_filipino",
- "B02015_013E": "southeast_asian_indonesian",
- "B02015_014E": "southeast_asian_laotian",
- "B02015_015E": "southeast_asian_malaysian",
- "B02015_016E": "southeast_asian_mien",
- "B02015_017E": "southeast_asian_singaporean",
- "B02015_018E": "southeast_asian_thai",
- "B02015_019E": "southeast_asian_viet",
- "B02015_020E": "southeast_asian_other",
- "B02015_021E": "south_asian_asian_indian",
- "B02015_022E": "south_asian_bangladeshi",
- "B02015_023E": "south_asian_bhutanese",
- "B02015_024E": "south_asian_nepalese",
- "B02015_025E": "south_asian_pakistani",
- "B02015_026E": "south_asian_sikh",
- "B02015_027E": "south_asian_sri_lankan",
- "B02015_028E": "south_asian_other",
- "B02015_029E": "central_asian_kazakh",
- "B02015_030E": "central_asian_uzbek",
- "B02015_031E": "central_asian_other",
- "B02015_032E": "other_asian_specified",
- "B02015_033E": "other_asian_not_specified",
- "B19013_001E": "median_household_income",
- "B19013A_001E": "median_household_income_white",
- "B19013H_001E": "median_household_income_white_non_hispanic",
- "B19013I_001E": "median_household_income_hispanic",
- "B19013B_001E": "median_household_income_black",
- "B19013C_001E": "median_household_income_american_indian_or_alaska_native",
- "B19013D_001E": "median_household_income_asian",
- "B19013E_001E": "median_household_income_native_hawaiian_or_pacific_islander",
- "B19013F_001E": "median_household_income_other_race",
- "B19013G_001E": "median_household_income_multiple_races",
- "B19019_002E": "median_household_income_1_person_households",
- "B19019_003E": "median_household_income_2_person_households",
- "B19019_004E": "median_household_income_3_person_households",
- "B19019_005E": "median_household_income_4_person_households",
- "B19019_006E": "median_household_income_5_person_households",
- "B19019_007E": "median_household_income_6_person_households",
- "B19019_008E": "median_household_income_7_or_more_person_households",
- "B01002_001E": "median_age",
- "B01002_002E": "median_age_male",
- "B01002_003E": "median_age_female",
- "B25031_001E": "median_gross_rent",
- "B25031_002E": "median_gross_rent_0_bedrooms",
- "B25031_003E": "median_gross_rent_1_bedrooms",
- "B25031_004E": "median_gross_rent_2_bedrooms",
- "B25031_005E": "median_gross_rent_3_bedrooms",
- "B25031_006E": "median_gross_rent_4_bedrooms",
- "B25031_007E": "median_gross_rent_5_bedrooms",
- "B25032_001E": "total_housing_units",
- "B25032_002E": "total_owner_occupied_housing_units",
- "B25032_013E": "total_renter_occupied_housing_units",
- "B25070_001E": "median_gross_rent_as_percentage_of_household_income",
-}
-
-
-def main():
- conn = psycopg2.connect(host=HOST, user=USER, database="cities")
- cur = conn.cursor()
-
- with open("etl/acs_schema.sql", "r") as f:
- cur.execute(f.read())
-
- for code, desc in ACS_CODES.items():
- cur.execute("insert into acs_variable values (%s, %s)", (code, desc))
- conn.commit()
-
- cur.execute("drop table if exists acs_tract_temp")
- cur.execute(
- "create temp table acs_tract_temp (statefp text, countyfp text, tractce text, value numeric)"
- )
-
- for code in ACS_CODES.keys():
- desc = ACS_CODES[code]
- for year in YEAR_RANGE:
- log.info(f"Loading {desc} for {year}")
- filename = f"zoning/data/raw/demographics/tracts/{desc}/{year}.csv"
- if not os.path.isfile(filename):
- logging.info(f"File {filename} does not exist")
- continue
-
- cur.execute("truncate acs_tract_temp")
-
- with open(filename, "r") as f:
- cur.copy_expert("copy acs_tract_temp from stdin with csv header", f)
-
- cur.execute(
- "insert into acs_tract_raw select statefp, countyfp, tractce, %s, %s, value from acs_tract_temp",
- (year, code),
- )
- conn.commit()
-
- cur.execute("drop table if exists acs_bg_temp")
- cur.execute(
- "create temp table acs_bg_temp (statefp text, countyfp text, tractce text, blkgrpce text, value numeric)"
- )
-
- for code in ACS_CODES.keys():
- desc = ACS_CODES[code]
- for year in YEAR_RANGE:
- log.info(f"Loading {desc} for {year}")
- filename = f"zoning/data/raw/demographics/block_groups/{desc}/{year}.csv"
- if not os.path.isfile(filename):
- logging.info(f"File {filename} does not exist")
- continue
-
- cur.execute("truncate acs_bg_temp")
-
- with open(filename, "r") as f:
- cur.copy_expert("copy acs_bg_temp from stdin with csv header", f)
- cur.execute(
- "insert into acs_bg_raw select statefp, countyfp, tractce, blkgrpce, %s, %s, value from acs_bg_temp",
- (year, code),
- )
- conn.commit()
-
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- main()
diff --git a/etl/load_fair_market_rents_raw.py b/etl/load_fair_market_rents_raw.py
deleted file mode 100644
index 565c8a05..00000000
--- a/etl/load_fair_market_rents_raw.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python
-
-import logging
-import os
-import glob
-from io import StringIO
-
-import psycopg2
-import pandas as pd
-
-from db import HOST, USER
-
-log = logging.getLogger(__name__)
-
-RAW_DATA_DIRECTORY = "zoning/data/raw/demographics/zip_codes/fair_market_rents"
-
-
-def preprocess_csv_to_df(filename):
- year = filename.split("_")[-1].replace(".csv", "")
-
- df = pd.read_csv(filename, dtype=str, na_values={})
-
- rename_dict = {}
- for col in list(df.columns):
- if "zip" in col.lower() or col == "zcta":
- rename_dict[col] = "zip_code"
- elif "BR" in col and "90" not in col and "110" not in col:
- rename_dict[col] = "rent_br" + col.lower().split("br")[0][-1]
- elif "area_rent_br" in col:
- rename_dict[col] = "rent_br" + col[-1]
- elif "safmr" in col and "90" not in col and "110" not in col:
- rename_dict[col] = "rent_br" + col.split("_")[-1][0]
-
- df = df.rename(columns=rename_dict)[
- [
- "zip_code",
- "rent_br0",
- "rent_br1",
- "rent_br2",
- "rent_br3",
- "rent_br4",
- ]
- ]
-
- for col in df.columns:
- if "rent_" in col:
- df[col] = [x.replace("$", "").replace(",", "") for x in df[col]]
-
- return (year, df)
-
-
-def copy_from_stringio(cur, df, table):
- """Here we are going save the dataframe in memory and use copy_from() to copy it to the table"""
- buf = StringIO()
- df.to_csv(buf, index=False, header=False)
- buf.seek(0)
- cur.copy_from(buf, table, sep=",")
-
-
-def main():
- conn = psycopg2.connect(host=HOST, user=USER, database="cities")
- cur = conn.cursor()
-
- with open("etl/fair_market_rents_schema.sql", "r") as f:
- cur.execute(f.read())
-
- cur.execute("drop table if exists fmr_temp")
- cur.execute(
- """
- create temp table fmr_temp (
- zip text
- , rent_br0 numeric
- , rent_br1 numeric
- , rent_br2 numeric
- , rent_br3 numeric
- , rent_br4 numeric)
- """
- )
-
- for filename in glob.glob(f"{RAW_DATA_DIRECTORY}/*.csv"):
- (year, df) = preprocess_csv_to_df(filename)
- cur.execute("truncate fmr_temp")
- copy_from_stringio(cur, df, "fmr_temp")
-
- cur.execute(
- "insert into fair_market_rents_raw select *, %s as year from fmr_temp",
- (year,),
- )
- conn.commit()
-
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- main()
diff --git a/etl/load_parcels.py b/etl/load_parcels.py
deleted file mode 100644
index 7a114b34..00000000
--- a/etl/load_parcels.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-
-import logging
-import psycopg2
-
-from db import HOST, USER
-
-log = logging.getLogger(__name__)
-
-PARCEL_YEARS = range(2002, 2024)
-COUNTY_ID = "053"
-
-
-def main():
- conn = psycopg2.connect(host=HOST, user=USER, database="cities")
- cur = conn.cursor()
-
- with open("etl/parcel_schema.sql", "r") as f:
- cur.execute(f.read())
- conn.commit()
-
- # select distinct geometry from all parcel tables
- distinct_geom = " union ".join(
- f"select geom from parcel_raw_{year} where upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'"
- for year in PARCEL_YEARS
- )
- parcel_geom_load = f"insert into parcel_geom (geom) {distinct_geom};"
- log.info("Executing: %s", parcel_geom_load)
- cur.execute(parcel_geom_load)
- conn.commit()
-
- # insert parcel data into parcel table
- parcel_data = " union all ".join(
- f"""
- select replace(pin, '{COUNTY_ID}-', ''), '[{year-1}-01-01,{year}-01-01)'::daterange, nullif(emv_land, 0), nullif(emv_bldg, 0), nullif(emv_total, 0), nullif(year_built, 0), sale_date, nullif(sale_value, 0), parcel_geom.id
- from parcel_raw_{year}, parcel_geom
- where parcel_raw_{year}.geom = parcel_geom.geom
- and upper({'city' if year < 2018 else 'ctu_name'}) = 'MINNEAPOLIS'
- """
- for year in PARCEL_YEARS
- )
-
- parcel_load = f"""
- insert into parcel (pid, valid, emv_land, emv_building, emv_total, year_built, sale_date, sale_value, geom_id)
- {parcel_data}
- """
- log.info("Executing: %s", parcel_load)
- cur.execute(parcel_load)
- conn.commit()
-
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- main()
diff --git a/etl/load_raw_shapes.py b/etl/load_raw_shapes.py
deleted file mode 100644
index 41119a47..00000000
--- a/etl/load_raw_shapes.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-
-import glob
-import subprocess
-import logging
-import os
-
-from db import HOST, USER
-
-log = logging.getLogger(__name__)
-
-BASE_DIR = "zoning/data/raw"
-OGR2OGR_OPTS = [
- "--config",
- "PG_USE_COPY", # use postgres specific copy
- "-progress",
- "-lco",
- "PRECISION=NO", # disable use of numeric types (required when shapefiles mis-specify numeric precision)
- "-overwrite", # overwrite existing tables
- "-lco",
- "GEOMETRY_NAME=geom", # name of geometry column
- "-nlt",
- "PROMOTE_TO_MULTI", # promote all POLYGONs to MULTIPOLYGONs
-]
-DB_OPTS = [f"Pg:dbname=cities host={HOST} user={USER} port=5432"]
-
-# (shapefile, table_name) pairs. shapefiles are relative to BASE_DIR
-REL_SHAPES = [
- (
- "base/shp_society_census2000tiger_zcta/Census2000TigerZipCodeTabAreas.shp",
- "zip_raw_2000",
- ),
- (
- "base/shp_bdry_zip_code_tabulation_areas/zip_code_tabulation_areas.shp",
- "zip_raw_2020",
- ),
- (
- "base/hennepin_county_census_tracts_2018/cb_2018_27_tract_500k.shp",
- "census_tract_raw_2018",
- ),
- (
- "base/hennepin_county_census_block_groups_2018/cb_2018_27_bg_500k.shp",
- "census_block_group_raw_2018",
- ),
- (
- "base/hennepin_county_census_tracts_2023/cb_2023_27_tract_500k.shp",
- "census_tract_raw_2023",
- ),
- (
- "base/hennepin_county_census_block_groups_2023/cb_2023_27_bg_500k.shp",
- "census_block_group_raw_2023",
- ),
- (
- "commercial_permits/shp_struc_non_res_construction/NonresidentialConstruction.shp",
- "commercial_permits_raw",
- ),
- (
- "residential_permits/shp_econ_residential_building_permts/ResidentialPermits.shp",
- "residential_permits_raw",
- ),
-]
-
-
-def main():
- # convert relative paths to absolute paths
- abs_shapes = [(os.path.join(BASE_DIR, shape), table) for shape, table in REL_SHAPES]
-
- for parcel_shape_dir in glob.glob(
- os.path.join(BASE_DIR, "property_values/shp_plan_regional_parcels_*/")
- ):
- year = int(parcel_shape_dir.split("/")[-2].split("_")[-1])
- shape = os.path.join(parcel_shape_dir, f"Parcels{year}Hennepin.shp")
- table = f"parcel_raw_{year}"
- abs_shapes.append((shape, table))
-
- for shape, table in abs_shapes:
- if os.path.exists(shape):
- log.info("Loading %s into %s", shape, table)
- else:
- log.warn("Skipping %s because it does not exist", shape)
- continue
-
- subprocess.check_call(
- ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", table] + DB_OPTS + [shape]
- )
-
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- main()
diff --git a/etl/load_usps_migration_raw.py b/etl/load_usps_migration_raw.py
deleted file mode 100644
index c05f8e0b..00000000
--- a/etl/load_usps_migration_raw.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-
-import glob
-import logging
-import psycopg2
-
-from db import HOST, USER
-
-log = logging.getLogger(__name__)
-
-
-RAW_DATA_DIRECTORY = "zoning/data/raw/demographics/zip_codes/usps_migration"
-
-
-def main():
- conn = psycopg2.connect(host=HOST, user=USER, database="cities")
- cur = conn.cursor()
-
- with open("etl/usps_migration_raw_schema.sql", "r") as f:
- cur.execute(f.read())
-
- cur.execute("drop table if exists m_temp")
- cur.execute(
- """
- create temp table m_temp (
- yyyymm text
- , zip_code text
- , city text
- , state text
- , total_from_zip numeric
- , total_from_zip_business numeric
- , total_from_zip_family numeric
- , total_from_zip_individual numeric
- , total_from_zip_perm numeric
- , total_from_zip_temp numeric
- , total_to_zip numeric
- , total_to_zip_business numeric
- , total_to_zip_family numeric
- , total_to_zip_individual numeric
- , total_to_zip_perm numeric
- , total_to_zip_temp numeric
- )
- """
- )
-
- for filename in glob.glob(f"{RAW_DATA_DIRECTORY}/*.csv"):
- log.info(f"Loading {filename}")
- year = filename.split("/")[-1].split(".")[0].replace("Y", "")
-
- cur.execute("truncate m_temp")
-
- with open(filename, "r") as f:
- cur.copy_expert("copy m_temp from stdin with csv header", f)
-
- cur.execute(
- "insert into usps_migration_raw select *, %s from m_temp",
- (year,),
- )
- conn.commit()
-
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- main()
diff --git a/etl/load_zip.py b/etl/load_zip.py
deleted file mode 100644
index 02a27cbc..00000000
--- a/etl/load_zip.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import psycopg2
-
-from db import HOST, USER
-
-PARCEL_YEARS = range(2002, 2024)
-COUNTY_ID = "053"
-
-conn = psycopg2.connect(host=HOST, user=USER, database="cities")
-cur = conn.cursor()
-
-with open("etl/zip_schema.sql", "r") as f:
- cur.execute(f.read())
-conn.commit()
-
-zip_load = """
-insert into zip_code(zip_code, valid, geom)
-select zcta5ce20, '[2020-01-01,)'::daterange, geom from zip_raw_2020
-union select zcta, '[2000-01-01,2020-01-01)'::daterange, ST_Transform(geom, 4269) from zip_raw_2000
-"""
-print("Executing:", zip_load)
-cur.execute(zip_load)
-conn.commit()
diff --git a/etl/parcel_schema.sql b/etl/parcel_schema.sql
deleted file mode 100644
index 24bf8523..00000000
--- a/etl/parcel_schema.sql
+++ /dev/null
@@ -1,36 +0,0 @@
-create extension if not exists postgis;
-
-drop table if exists parcel_geom cascade;
-
-create table parcel_geom (
- id serial primary key
- , geom geometry(MultiPolygon , 26915) not null
-);
-
-create index parcel_geom_idx on parcel_geom using gist (geom);
-
-drop table if exists parcel cascade;
-
-create table parcel (
- id serial primary key
- , pid text not null
- , valid daterange not null
- , emv_land numeric
- , emv_building numeric
- , emv_total numeric
- , year_built int
- , sale_date date
- , sale_value numeric
- , geom_id int references parcel_geom (id)
-);
-
-comment on column parcel.valid is 'Dates for which this parcel is valid';
-
-comment on column parcel.pid is 'Municipal parcel ID';
-
-comment on column parcel.emv_land is 'Estimated Market Value, land';
-
-comment on column parcel.emv_building is 'Estimated Market Value, buildings';
-
-comment on column parcel.emv_total is 'Estimated Market Value, total (may be more than sum of land and building)';
-
diff --git a/etl/parcel_to_bg.sql b/etl/parcel_to_bg.sql
deleted file mode 100644
index 5dbc7ac9..00000000
--- a/etl/parcel_to_bg.sql
+++ /dev/null
@@ -1,118 +0,0 @@
-drop type if exists parcel_census_bg_type cascade;
-
-create type parcel_census_bg_type as enum (
- 'within'
- , 'most_overlap'
- , 'closest'
-);
-
-drop table if exists parcel_census_bg;
-
-create table parcel_census_bg (
- parcel_id int references parcel (id)
- , census_bg_id int references census_bg (id)
- , valid daterange not null
- , type parcel_census_bg_type not null
-);
-
-with parcel_with_geom as (
- select
- parcel.id
- , geom_id
- , valid
- , ST_Transform (geom
- , 4269) as geom
- from
- parcel
- join parcel_geom on geom_id = parcel_geom.id
-)
-, parcel_within as (
- -- easy case: one parcel in one bg
- select
- parcel.id as parcel_id
- , census_bg.id as census_bg_id
- , parcel.valid * census_bg.valid as valid
- from
- parcel_with_geom as parcel
- join census_bg on ST_Within (parcel.geom
- , census_bg.geom)
- and parcel.valid && census_bg.valid
-)
-, parcel_not_within as (
- -- parcels that are not fully within any bg
- select
- *
- from
- parcel_with_geom
- where
- not exists (
- select
- parcel_id
- from
- parcel_within
- where
- parcel_id = id)
-)
-, parcel_largest_overlap as (
- -- parcels that overlap multiple bgs map to the one with the largest overlap
- select distinct on (parcel.id)
- parcel.id as parcel_id
- , census_bg.id as census_bg_id
- , parcel.valid * census_bg.valid as valid
- from
- parcel_not_within as parcel
- join census_bg on ST_Intersects (parcel.geom
- , census_bg.geom)
- and parcel.valid && census_bg.valid
- order by
- parcel_id
- , ST_Area (ST_Intersection (parcel.geom
- , census_bg.geom)) desc
-)
-, parcel_no_overlap as (
- -- parcels that do not overlap any bg
- select
- *
- from
- parcel_not_within
- where
- not exists (
- select
- parcel_id
- from
- parcel_largest_overlap
- where
- parcel_id = id)
-)
-, parcel_closest as (
- -- parcels that overlap no bgs map to the closest one
- select distinct on (parcel.id)
- parcel.id as parcel_id
- , census_bg.id as census_bg_id
- , parcel.valid * census_bg.valid as valid
- from
- parcel_no_overlap as parcel
- join census_bg on parcel.valid && census_bg.valid
- order by
- parcel_id
- , ST_Distance (parcel.geom
- , census_bg.geom))
- insert into parcel_census_bg
- select
- *
- , 'within'::parcel_census_bg_type
- from
- parcel_within
- union all
- select
- *
- , 'most_overlap'::parcel_census_bg_type
- from
- parcel_largest_overlap
- union all
- select
- *
- , 'closest'::parcel_census_bg_type
- from
- parcel_closest;
-
diff --git a/etl/parcel_to_zip.sql b/etl/parcel_to_zip.sql
deleted file mode 100644
index 669e48a1..00000000
--- a/etl/parcel_to_zip.sql
+++ /dev/null
@@ -1,118 +0,0 @@
-drop type if exists parcel_zip_type;
-
-create type parcel_zip_type as enum (
- 'within'
- , 'most_overlap'
- , 'closest'
-);
-
-drop table if exists parcel_zip;
-
-create table parcel_zip (
- parcel_id int references parcel (id)
- , zip_code_id int references zip_code (id)
- , valid daterange not null
- , type parcel_zip_type not null
-);
-
-with parcel_with_geom as (
- select
- parcel.id
- , geom_id
- , valid
- , ST_Transform (geom
- , 4269) as geom
- from
- parcel
- join parcel_geom on geom_id = parcel_geom.id
-)
-, parcel_in_zip as (
- -- easy case: one parcel in one zip code
- select
- parcel.id as parcel_id
- , zip_code.id as zip_code_id
- , parcel.valid * zip_code.valid as valid
- from
- parcel_with_geom as parcel
- join zip_code on ST_Within (parcel.geom
- , zip_code.geom)
- and parcel.valid && zip_code.valid
-)
-, parcel_not_within_zip as (
- -- parcels that are not fully within any zip code
- select
- *
- from
- parcel_with_geom
- where
- not exists (
- select
- parcel_id
- from
- parcel_in_zip
- where
- parcel_id = id)
-)
-, parcel_largest_overlap as (
- -- parcels that overlap multiple zip codes map to the one with the largest overlap
- select distinct on (parcel.id)
- parcel.id as parcel_id
- , zip_code.id as zip_code_id
- , parcel.valid * zip_code.valid as valid
- from
- parcel_not_within_zip as parcel
- join zip_code on ST_Intersects (parcel.geom
- , zip_code.geom)
- and parcel.valid && zip_code.valid
- order by
- parcel_id
- , ST_Area (ST_Intersection (parcel.geom
- , zip_code.geom)) desc
-)
-, parcel_no_overlap as (
- -- parcels that do not overlap any zip code
- select
- *
- from
- parcel_not_within_zip
- where
- not exists (
- select
- parcel_id
- from
- parcel_largest_overlap
- where
- parcel_id = id)
-)
-, parcel_closest as (
- -- parcels that overlap no zip codes map to the closest one
- select distinct on (parcel.id)
- parcel.id as parcel_id
- , zip_code.id as zip_code_id
- , parcel.valid * zip_code.valid as valid
- from
- parcel_no_overlap as parcel
- join zip_code on parcel.valid && zip_code.valid
- order by
- parcel_id
- , ST_Distance (parcel.geom
- , zip_code.geom))
- insert into parcel_zip
- select
- *
- , 'within'::parcel_zip_type
- from
- parcel_in_zip
- union all
- select
- *
- , 'most_overlap'::parcel_zip_type
- from
- parcel_largest_overlap
- union all
- select
- *
- , 'closest'::parcel_zip_type
- from
- parcel_closest;
-
diff --git a/etl/permit_schema.sql b/etl/permit_schema.sql
deleted file mode 100644
index c5c7e5e9..00000000
--- a/etl/permit_schema.sql
+++ /dev/null
@@ -1,236 +0,0 @@
-drop table if exists residential_permit cascade;
-
-create table residential_permit (
- id serial primary key
- , ctu_id text
- , coctu_id text
- , year int
- , tenure text
- , housing_ty text
- , res_permit text
- , address text
- , zip_code text
- , name text
- , buildings int
- , units int
- , age_restri int
- , memory_car int
- , assisted int
- , com_off_re boolean
- , sqf numeric
- , public_fun boolean
- , permit_val numeric
- , community_ text
- , notes text
- , pin text
- , geom geometry(multipoint , 26915)
-);
-
-create index residential_permit_geom_idx on residential_permit using gist (geom);
-
-insert into residential_permit (ctu_id , coctu_id , year , tenure , housing_ty , res_permit , address , zip_code , name , buildings , units , age_restri , memory_car , assisted , com_off_re , sqf , public_fun , permit_val , community_ , notes , pin , geom)
-select
- ctu_id
- , coctu_id
- , year::int
- , tenure
- , housing_ty
- , res_permit
- , address
- , zip_code
- , name
- , buildings
- , units
- , age_restri
- , memory_car
- , assisted
- , com_off_re = 'Y'
- , sqf
- , public_fun = 'Y'
- , permit_val
- , community_
- , notes
- , pin
- , geom
-from
- residential_permits_raw
-where
- co_code = '053'
- and lower(ctu_name) = 'minneapolis';
-
-drop table if exists residential_permit_parcel;
-
-create table residential_permit_parcel (
- permit_id int references residential_permit (id)
- , parcel_id int references parcel (id)
- , type_ region_tag_type
-);
-
-with within as (
- select
- residential_permit.id as permit_id
- , parcel.id as parcel_id
- from
- parcel_with_geom as parcel
- join residential_permit on st_within (residential_permit.geom
- , parcel.geom)
- and to_date(year::text
- , 'YYYY') <@ parcel.valid
-)
-, not_within as (
- select
- id
- , year
- , geom
- from
- residential_permit
- where
- not exists (
- select
- permit_id
- from
- within
- where
- permit_id = id)
-)
-, closest as (
- select distinct on (permit.id)
- permit.id as permit_id
- , parcel.id as parcel_id
- from
- not_within as permit
- join parcel_with_geom as parcel on st_dwithin (permit.geom
- , parcel.geom
- , 100.0)
- and to_date(year::text
- , 'YYYY') <@ parcel.valid
- order by
- permit_id
- , st_distance (permit.geom
- , parcel.geom))
- insert into residential_permit_parcel
- select
- permit_id
- , parcel_id
- , 'within'::region_tag_type
- from
- within
- union all
- select
- permit_id
- , parcel_id
- , 'closest'::region_tag_type
-from
- closest;
-
-drop table if exists commercial_permit cascade;
-
-create table commercial_permit (
- id serial primary key
- , ctu_id text
- , coctu_id text
- , year int
- , nonres_gro text
- , nonres_sub text
- , nonres_typ text
- , bldg_name text
- , bldg_desc text
- , permit_typ text
- , permit_val numeric
- , sqf int
- , address text
- , zip_code text
- , pin text
- , geom geometry(multipoint , 26915)
-);
-
-create index commercial_permit_geom_idx on commercial_permit using gist (geom);
-
-insert into commercial_permit (ctu_id , coctu_id , year , nonres_gro , nonres_sub , nonres_typ , bldg_name , bldg_desc , permit_typ , permit_val , sqf , address , zip_code , pin , geom)
-select
- ctu_id
- , coctu_id
- , year::int
- , nonres_gro
- , nonres_sub
- , nonres_typ
- , bldg_name
- , bldg_desc
- , permit_typ
- , permit_val
- , sqf
- , address
- , zip_code
- , pin
- , geom
-from
- commercial_permits_raw
-where
- co_code = '053'
- and lower(ctu_name) = 'minneapolis';
-
-drop table if exists commercial_permit_parcel;
-
-create table commercial_permit_parcel (
- permit_id int references commercial_permit (id)
- , parcel_id int references parcel (id)
- , type_ region_tag_type
-);
-
-with within as (
- select
- commercial_permit.id as permit_id
- , parcel.id as parcel_id
- from
- parcel_with_geom as parcel
- join commercial_permit on st_within (commercial_permit.geom
- , parcel.geom)
- and to_date(year::text
- , 'YYYY') <@ parcel.valid
-)
-, not_within as (
- select
- id
- , year
- , geom
- from
- commercial_permit
- where
- not exists (
- select
- permit_id
- from
- within
- where
- permit_id = id)
-)
-, closest as (
- select distinct on (permit.id)
- permit.id as permit_id
- , parcel.id as parcel_id
- from
- not_within as permit
- join parcel_with_geom as parcel on st_dwithin (permit.geom
- , parcel.geom
- , 100.0)
- and to_date(year::text
- , 'YYYY') <@ parcel.valid
- order by
- permit_id
- , st_distance (permit.geom
- , parcel.geom))
- insert into commercial_permit_parcel
- select
- permit_id
- , parcel_id
- , 'within'::region_tag_type
- from
- within
- union all
- select
- permit_id
- , parcel_id
- , 'closest'::region_tag_type
-from
- closest;
-
diff --git a/etl/property_values.sql b/etl/property_values.sql
deleted file mode 100644
index 3e163f9c..00000000
--- a/etl/property_values.sql
+++ /dev/null
@@ -1,11 +0,0 @@
-drop view if exists property_values;
-
-create view property_values as (
- select
- id
- , pid
- , emv_total as value_
- , valid
- from
- parcel);
-
diff --git a/etl/real_estate_transactions.sql b/etl/real_estate_transactions.sql
deleted file mode 100644
index 6980b5ed..00000000
--- a/etl/real_estate_transactions.sql
+++ /dev/null
@@ -1,73 +0,0 @@
-drop table if exists real_estate_transactions_scraped;
-
-create table real_estate_transactions_scraped (
- parcel_id text
- , address text
- , sale_date date
- , sale_price numeric
- , building_area numeric
- , beds numeric
- , baths numeric
- , stories numeric
- , year_built numeric
- , neighborhood text
- , property_type text
-);
-
-\copy real_estate_transactions_scraped from 'zoning/data/processed/real_estate_transactions/real_estate_transactions.csv' with csv header delimiter ',';
-drop table if exists real_estate_transactions_raw;
-
-create table real_estate_transactions_raw (
- sale_id int
- , ecrv text
- , sale_date date
- , excluded_from_ratio_study text
- , pin text
- , num_parcels_in_sale int
- , formatted_address text
- , land_sale text
- , community_cd int
- , community_desc text
- , nbhd_cd int
- , nbhd_desc text
- , ward int
- , proptype_cd text
- , proptype_desc text
- , grantee1 text
- , grantee2 text
- , grantor1 text
- , grantor2 text
- , adj_sale_price int
- , gross_sale_price int
- , downpayment int
- , x numeric
- , y numeric
- , fid int
-);
-
-\copy real_estate_transactions_raw from 'zoning/data/raw/real_estate_transactions/Property_Sales_2019_to_2023.csv' with csv header delimiter ',';
-drop table if exists real_estate_transactions;
-
-create table real_estate_transactions (
- id serial primary key
- , parcel_id int references parcel (id)
- , address text
- , sale_date date
- , sale_price numeric
- , neighborhood text
- , property_type text
-);
-
-insert into real_estate_transactions (parcel_id , address , sale_date , sale_price , neighborhood , property_type)
-select
- parcel.id
- , address
- , scraped.sale_date
- , sale_price
- , neighborhood
- , property_type
-from
- real_estate_transactions_scraped as scraped
- join parcel on pid = parcel_id
- and scraped.sale_date <@ valid;
-
diff --git a/etl/segregation.sql b/etl/segregation.sql
deleted file mode 100644
index ba25e911..00000000
--- a/etl/segregation.sql
+++ /dev/null
@@ -1,92 +0,0 @@
-create or replace view categories as select * from (
- values
- ('population_white_non_hispanic'),
- ('population_black_non_hispanic'),
- ('population_hispanic_or_latino'),
- ('population_asian_non_hispanic'),
- ('population_native_hawaiian_or_pacific_islander_non_hispanic'),
- ('population_american_indian_or_alaska_native_non_hispanic'),
- ('population_multiple_races_non_hispanic'),
- ('population_other_non_hispanic')
-) as t (description);
-
-drop type if exists reference_distribution cascade;
-create type reference_distribution as enum (
- 'uniform'
- , 'annual_city'
- , 'average_city'
-);
-
-
--- Segregation index for each tract for each year, computed for each reference
--- distribution.
---
--- The segregation index is the KL-divergence between the distribution of
--- population in a tract and a reference distribution. For example, a tract that
--- has many more white people than the average for the city will have a high
--- segregation index for the 'average_city' distribution.
-
-drop table if exists segregation;
-
-create table segregation as (
-with
- pop_tyc as
- ( -- Population by tract, year, and category
- select id, year_, description, value_
- from acs_tract
- join acs_variable using (name_)
- join categories using (description)
- ),
- pop_ty as
- ( -- Population by tract and year (note: using 'population' variable instead of aggregating categories)
- select id, year_, value_
- from acs_tract join acs_variable using (name_)
- where description = 'population'
- ),
- pop_yc as
- ( -- Population by year and category
- select year_, description, sum(value_) as value_
- from pop_tyc group by year_, description
- ),
- pop_y as
- ( -- Population by year
- select year_, sum(value_) as value_ from pop_ty group by year_
- ),
- dist_yc as
- ( -- Distribution of population by year and category
- select description, c.year_,
- case t.value_ when 0 then 0 else c.value_ / t.value_ end as value_
- from pop_yc as c join pop_y as t using (year_)
- ),
- dist_tyc as
- ( -- Distribution of population by tract, year, and category
- select id, year_, description,
- case t.value_ when 0 then 0 else p.value_ / t.value_ end as value_
- from pop_tyc as p join pop_ty as t using (year_, id)
- ),
- uniform_dist as
- ( -- Uniform distribution across categories
- with n_cat as (select count(*) as n_cat from categories)
- select description, 1.0 / n_cat as value_
- from categories, n_cat
- ),
- average_dist as
- ( -- Average of the annual citywide distributions
- select description, avg(value_) as value_
- from dist_yc
- group by description
- )
-select id, year_, dist, sum(case when p = 0 or q = 0 then 0 else p * ln(p / q) end) as segregation_index
- from
- (
- select id, year_, 'uniform'::reference_distribution as dist, dist_tyc.value_ as p, uniform_dist.value_ as q
- from dist_tyc join uniform_dist using (description)
- union all
- select id, year_, 'annual_city'::reference_distribution as dist, dist_tyc.value_ as p, dist_yc.value_ as q
- from dist_tyc join dist_yc using (year_, description)
- union all
- select id, year_, 'average_city'::reference_distribution as dist, dist_tyc.value_ as p, average_dist.value_ as q
- from dist_tyc join average_dist using (description)
- )
- group by id, year_, dist
-);
diff --git a/etl/usps_migration.sql b/etl/usps_migration.sql
deleted file mode 100644
index 9a123bb4..00000000
--- a/etl/usps_migration.sql
+++ /dev/null
@@ -1,156 +0,0 @@
-drop type if exists usps_migration_flow_direction cascade;
-
-create type usps_migration_flow_direction as enum (
- 'in'
- , 'out'
-);
-
-drop enum if exists usps_migration_flow_type cascade;
-
-create type usps_migration_flow_type as enum (
- 'total'
- , 'business'
- , 'family'
- , 'individual'
- , 'perm'
- , 'temp'
-);
-
-drop table if exists usps_migration cascade;
-
-create table usps_migration (
- date_ date not null check (extract(day from date_) = 1) -- granularity is year-month
- , zip_id int references zip_code (id)
- , direction usps_migration_flow_direction not null
- , type_ usps_migration_flow_type not null
- , flow numeric
- , primary key (date_ , zip_id , direction , type_)
-);
-
-insert into usps_migration with process_date as (
- select
- to_date(yyyymm
- , 'YYYYMM') as date_
- , *
- from
- usps_migration_raw
-)
-, add_zip_id as (
- select
- zip_code.id as zip_id
- , mr.*
- from
- process_date as mr
- join zip_code on zip_code.zip_code = replace(mr.zip_code
- , '='
- , '')
- and zip_code.valid @> to_date(year_::text
- , 'YYYY'))
- select
- date_
- , zip_id
- , 'in'::usps_migration_flow_direction
- , 'total'::usps_migration_flow_type
- , total_from_zip
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'in'::usps_migration_flow_direction
- , 'business'::usps_migration_flow_type
- , total_from_zip_business
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'in'::usps_migration_flow_direction
- , 'family'::usps_migration_flow_type
- , total_from_zip_family
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'in'::usps_migration_flow_direction
- , 'individual'::usps_migration_flow_type
- , total_from_zip_individual
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'in'::usps_migration_flow_direction
- , 'perm'::usps_migration_flow_type
- , total_from_zip_perm
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'in'::usps_migration_flow_direction
- , 'temp'::usps_migration_flow_type
- , total_from_zip_temp
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'out'::usps_migration_flow_direction
- , 'total'::usps_migration_flow_type
- , total_to_zip
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'out'::usps_migration_flow_direction
- , 'business'::usps_migration_flow_type
- , total_to_zip_business
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'out'::usps_migration_flow_direction
- , 'family'::usps_migration_flow_type
- , total_to_zip_family
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'out'::usps_migration_flow_direction
- , 'individual'::usps_migration_flow_type
- , total_to_zip_individual
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'out'::usps_migration_flow_direction
- , 'perm'::usps_migration_flow_type
- , total_to_zip_perm
- from
- add_zip_id
- union all
- select
- date_
- , zip_id
- , 'out'::usps_migration_flow_direction
- , 'temp'::usps_migration_flow_type
- , total_to_zip_temp
- from
- add_zip_id;
-
diff --git a/etl/usps_migration_raw_schema.sql b/etl/usps_migration_raw_schema.sql
deleted file mode 100644
index 50a823ff..00000000
--- a/etl/usps_migration_raw_schema.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-drop table if exists usps_migration_raw cascade;
-
-create table usps_migration_raw (
- yyyymm text
- , zip_code text
- , city text
- , state text
- , total_from_zip numeric
- , total_from_zip_business numeric
- , total_from_zip_family numeric
- , total_from_zip_individual numeric
- , total_from_zip_perm numeric
- , total_from_zip_temp numeric
- , total_to_zip numeric
- , total_to_zip_business numeric
- , total_to_zip_family numeric
- , total_to_zip_individual numeric
- , total_to_zip_perm numeric
- , total_to_zip_temp numeric
- , year_ int
-);
-
diff --git a/etl/zip_schema.sql b/etl/zip_schema.sql
deleted file mode 100644
index 1d9ab6c8..00000000
--- a/etl/zip_schema.sql
+++ /dev/null
@@ -1,25 +0,0 @@
-drop table if exists zip_code cascade;
-
-create table zip_code (
- id serial primary key
- , zip_code text not null
- , valid daterange not null
- , geom geometry(MultiPolygon , 4269) not null
-);
-
-create index zip_code_geom_idx on zip_code using gist (geom);
-
-insert into zip_code (zip_code , valid , geom)
-select
- zcta5ce20
- , '[2020-01-01,)'::daterange
- , geom
-from
- zip_raw_2020
-union
-select
- zcta
- , '[2000-01-01,2020-01-01)'::daterange
- , ST_Transform (geom , 4269)
-from
- zip_raw_2000
From 028368fb246fb2250072a086296f345ee26bb851 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 13:14:58 -0400
Subject: [PATCH 063/142] add dependencies to setup.py
---
setup.py | 52 +++++++++++++++++++++++++++++++---------------------
1 file changed, 31 insertions(+), 21 deletions(-)
diff --git a/setup.py b/setup.py
index 61a0324a..419f142a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,23 +4,27 @@
VERSION = "0.1.0"
TEST_REQUIRES = [
- "pytest",
- "pytest-cov",
- "pytest-xdist",
- "mypy",
- "black",
- "flake8",
- "isort",
- "nbval",
- "nbqa",
- "autoflake",
- ]
+ "pytest",
+ "pytest-cov",
+ "pytest-xdist",
+ "mypy",
+ "black",
+ "flake8",
+ "isort",
+ "nbval",
+ "nbqa",
+ "autoflake",
+]
DEV_REQUIRES = [
"pyro-ppl>=1.8.5",
- "torch", "plotly.express",
- "scipy",
- "chirho", "graphviz",
+ "torch",
+ "plotly.express",
+ "scipy",
+ "chirho",
+ "graphviz",
+ "python-dotenv",
+ "google-cloud-storage",
]
setup(
@@ -31,14 +35,20 @@
author="Basis",
url="https://www.basis.ai/",
project_urls={
- # "Documentation": "",
+ # "Documentation": "",
"Source": "https://github.com/BasisResearch/cities",
},
- install_requires=["jupyter","pandas", "numpy", "scikit-learn","dill", "plotly", "matplotlib>=3.8.2"],
- extras_require={
- "test": TEST_REQUIRES,
- "dev": DEV_REQUIRES + TEST_REQUIRES
- },
+ install_requires=[
+ "jupyter",
+ "pandas",
+ "numpy",
+ "scikit-learn",
+ "dill",
+ "plotly",
+ "matplotlib>=3.8.2",
+ ],
+ extras_require={"test": TEST_REQUIRES, "dev": DEV_REQUIRES + TEST_REQUIRES},
python_requires=">=3.10",
keywords="similarity, causal inference, policymaking, chirho",
- license="Apache 2.0",)
+ license="Apache 2.0",
+)
From 8e7ebf8f0a7b08787f82b89a46864e377a767182 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 13:20:35 -0400
Subject: [PATCH 064/142] format
---
load_data_server/load_server.py | 259 ++++++++++++++++++++------------
1 file changed, 165 insertions(+), 94 deletions(-)
diff --git a/load_data_server/load_server.py b/load_data_server/load_server.py
index cbf82881..602b7295 100644
--- a/load_data_server/load_server.py
+++ b/load_data_server/load_server.py
@@ -14,61 +14,76 @@
load_dotenv()
# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
# DATA INFO
-PROJECT_NAME = os.getenv('GOOGLE_CLOUD_PROJECT')
-BUCKET_NAME = os.getenv('GOOGLE_CLOUD_BUCKET')
+PROJECT_NAME = os.getenv("GOOGLE_CLOUD_PROJECT")
+BUCKET_NAME = os.getenv("GOOGLE_CLOUD_BUCKET")
# Paths inside the bucket
FOLDERS = [
- 'fair_market_rents',
+ "fair_market_rents",
]
# DATABASE INFO
-SCHEMA = os.getenv('SCHEMA')
-HOST = os.getenv('HOST')
-DATABASE = os.getenv('DATABASE')
-USERNAME = os.getenv('USERNAME')
-PASSWORD = os.getenv('PASSWORD')
+SCHEMA = os.getenv("SCHEMA")
+HOST = os.getenv("HOST")
+DATABASE = os.getenv("DATABASE")
+USERNAME = os.getenv("USERNAME")
+PASSWORD = os.getenv("PASSWORD")
OGR2OGR_OPTS = [
- "--config", "PG_USE_COPY", "YES",
+ "--config",
+ "PG_USE_COPY",
+ "YES",
"-progress",
- "-lco", "PRECISION=NO",
+ "-lco",
+ "PRECISION=NO",
"-overwrite",
- "-lco", "GEOMETRY_NAME=geom",
- "-nlt", "PROMOTE_TO_MULTI",
+ "-lco",
+ "GEOMETRY_NAME=geom",
+ "-nlt",
+ "PROMOTE_TO_MULTI",
+]
+DB_OPTS = [
+ f"PG:dbname={DATABASE} host={HOST} user={USERNAME} password={PASSWORD} port=5432"
]
-DB_OPTS = [f"PG:dbname={DATABASE} host={HOST} user={USERNAME} password={PASSWORD} port=5432"]
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
+
def get_db_connection():
"""Create a database connection with retries."""
for attempt in range(MAX_RETRIES):
try:
conn = psycopg2.connect(
- host=HOST,
- database=DATABASE,
- user=USERNAME,
- password=PASSWORD
+ host=HOST, database=DATABASE, user=USERNAME, password=PASSWORD
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
return conn
except psycopg2.OperationalError as e:
if attempt < MAX_RETRIES - 1:
- logging.warning(f"Connection attempt {attempt + 1} failed. Retrying in {RETRY_DELAY} seconds...")
+ logging.warning(
+ f"Connection attempt {attempt + 1} failed. Retrying in {RETRY_DELAY} seconds..."
+ )
time.sleep(RETRY_DELAY)
else:
- logging.error(f"Failed to connect to the database after {MAX_RETRIES} attempts: {e}")
+ logging.error(
+ f"Failed to connect to the database after {MAX_RETRIES} attempts: {e}"
+ )
raise
+
def create_schema_if_not_exists(conn):
"""Create the schema if it doesn't exist."""
with conn.cursor() as cur:
- cur.execute(f"SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = %s);", (SCHEMA,))
+ cur.execute(
+ "SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = %s);",
+ (SCHEMA,),
+ )
schema_exists = cur.fetchone()[0]
if not schema_exists:
@@ -77,68 +92,87 @@ def create_schema_if_not_exists(conn):
else:
logging.info(f"Schema '{SCHEMA}' already exists.")
+
def generate_table_name(blob_name):
"""Generate a PostgreSQL-friendly table name from the blob name, including all parent folders and removing duplicates."""
table_name = os.path.splitext(blob_name)[0]
- path_components = table_name.split('/')
-
+ path_components = table_name.split("/")
+
# Remove any leading empty components
path_components = [comp for comp in path_components if comp]
-
- table_name = '_'.join(path_components)
- table_name = table_name.replace('-', '_').replace('.', '_')
-
- words = table_name.split('_')
+
+ table_name = "_".join(path_components)
+ table_name = table_name.replace("-", "_").replace(".", "_")
+
+ words = table_name.split("_")
unique_words = []
for word in words:
if word.lower() not in (w.lower() for w in unique_words):
unique_words.append(word)
-
- table_name = '_'.join(unique_words)
- table_name = re.sub('_+', '_', table_name)
-
+
+ table_name = "_".join(unique_words)
+ table_name = re.sub("_+", "_", table_name)
+
if table_name[0].isdigit():
- table_name = 'f_' + table_name
-
+ table_name = "f_" + table_name
+
if len(table_name) > 63:
table_name = table_name[:63]
-
- table_name = table_name.rstrip('_')
-
+
+ table_name = table_name.rstrip("_")
+
return table_name.lower()
+
def table_exists(conn, table_name):
"""Check if a table exists in the specified schema."""
with conn.cursor() as cur:
- cur.execute("""
+ cur.execute(
+ """
SELECT EXISTS (
- SELECT FROM information_schema.tables
+ SELECT FROM information_schema.tables
WHERE table_schema = %s AND table_name = %s
);
- """, (SCHEMA, table_name))
+ """,
+ (SCHEMA, table_name),
+ )
return cur.fetchone()[0]
+
def drop_table_if_exists(conn, table_name):
"""Drop the table if it exists."""
with conn.cursor() as cur:
cur.execute(f"DROP TABLE IF EXISTS {SCHEMA}.{table_name} CASCADE;")
+
def load_into_server(conn, file_path, file_type):
table_name = os.path.splitext(os.path.basename(file_path))[0]
full_table_name = f"{SCHEMA}.{table_name}"
-
+
if table_exists(conn, table_name):
drop_table_if_exists(conn, table_name)
-
+
# Upload the file based on its type
- if file_type == 'shp':
- upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", full_table_name] + DB_OPTS + [file_path]
- elif file_type == 'geojson':
- upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-f", "PostgreSQL"] + DB_OPTS + [file_path, "-nln", full_table_name]
+ if file_type == "shp":
+ upload_command = (
+ ["ogr2ogr"]
+ + OGR2OGR_OPTS
+ + ["-nln", full_table_name]
+ + DB_OPTS
+ + [file_path]
+ )
+ elif file_type == "geojson":
+ upload_command = (
+ ["ogr2ogr"]
+ + OGR2OGR_OPTS
+ + ["-f", "PostgreSQL"]
+ + DB_OPTS
+ + [file_path, "-nln", full_table_name]
+ )
else:
logging.error(f"Unsupported file type: {file_type}")
return False
-
+
for attempt in range(MAX_RETRIES):
try:
subprocess.check_call(upload_command)
@@ -146,110 +180,135 @@ def load_into_server(conn, file_path, file_type):
return True
except subprocess.CalledProcessError as e:
if attempt < MAX_RETRIES - 1:
- logging.warning(f"Attempt {attempt + 1} failed for {file_path}. Retrying in {RETRY_DELAY} seconds...")
+ logging.warning(
+ f"Attempt {attempt + 1} failed for {file_path}. Retrying in {RETRY_DELAY} seconds..."
+ )
time.sleep(RETRY_DELAY)
else:
- logging.error(f"Failed to process {file_path} after {MAX_RETRIES} attempts: {e}")
+ logging.error(
+ f"Failed to process {file_path} after {MAX_RETRIES} attempts: {e}"
+ )
return False
+
def group_shapefile_components(blobs):
"""Group Shapefile components together."""
shapefile_groups = {}
for blob in blobs:
name, ext = os.path.splitext(blob.name)
- if ext.lower() in ['.shp', '.shx', '.dbf', '.prj']:
+ if ext.lower() in [".shp", ".shx", ".dbf", ".prj"]:
if name not in shapefile_groups:
shapefile_groups[name] = []
shapefile_groups[name].append(blob)
return shapefile_groups
+
def process_geojson(conn, blob):
table_name = generate_table_name(blob.name)
if table_exists(conn, table_name):
return False # Table already exists, skip processing
-
+
full_table_name = f"{SCHEMA}.{table_name}"
- file_path = os.path.join('/tmp', os.path.basename(blob.name))
+ file_path = os.path.join("/tmp", os.path.basename(blob.name))
blob.download_to_filename(file_path)
-
- upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-f", "PostgreSQL"] + DB_OPTS + [file_path, "-nln", full_table_name]
-
+
+ upload_command = (
+ ["ogr2ogr"]
+ + OGR2OGR_OPTS
+ + ["-f", "PostgreSQL"]
+ + DB_OPTS
+ + [file_path, "-nln", full_table_name]
+ )
+
success = False
for attempt in range(MAX_RETRIES):
try:
- subprocess.check_call(upload_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ subprocess.check_call(
+ upload_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+ )
success = True
break
except subprocess.CalledProcessError:
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
-
+
os.remove(file_path)
return success
+
def process_shapefile(conn, component_blobs):
- shp_blob = next(blob for blob in component_blobs if blob.name.endswith('.shp'))
+ shp_blob = next(blob for blob in component_blobs if blob.name.endswith(".shp"))
table_name = generate_table_name(shp_blob.name)
-
+
if table_exists(conn, table_name):
return False # Table already exists, skip processing
- temp_dir = os.path.join('/tmp', table_name)
+ temp_dir = os.path.join("/tmp", table_name)
os.makedirs(temp_dir, exist_ok=True)
-
+
for blob in component_blobs:
file_ext = os.path.splitext(blob.name)[1]
file_name = f"{table_name}{file_ext}"
file_path = os.path.join(temp_dir, file_name)
blob.download_to_filename(file_path)
-
+
shp_file = f"{table_name}.shp"
shp_path = os.path.join(temp_dir, shp_file)
-
+
full_table_name = f"{SCHEMA}.{table_name}"
-
- upload_command = ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", full_table_name] + DB_OPTS + [shp_path]
-
+
+ upload_command = (
+ ["ogr2ogr"] + OGR2OGR_OPTS + ["-nln", full_table_name] + DB_OPTS + [shp_path]
+ )
+
success = False
for attempt in range(MAX_RETRIES):
try:
- subprocess.check_call(upload_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ subprocess.check_call(
+ upload_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+ )
success = True
break
except subprocess.CalledProcessError:
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
-
+
for file in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, file))
os.rmdir(temp_dir)
-
+
return success
+
def load_csv_into_server(conn, file_path, full_table_name):
"""Load a CSV file into the PostgreSQL server."""
try:
- with open(file_path, 'r') as f:
+ with open(file_path, "r") as f:
cursor = conn.cursor()
# Read and sanitize the header row
- header = f.readline().strip().split(',')
- sanitized_header = [re.sub(r'[^a-zA-Z0-9_]', '_', col.strip('"').strip()) for col in header]
-
+ header = f.readline().strip().split(",")
+ sanitized_header = [
+ re.sub(r"[^a-zA-Z0-9_]", "_", col.strip('"').strip()) for col in header
+ ]
+
# Ensure column names are unique
seen = set()
- sanitized_header = [col if col not in seen and not seen.add(col) else f"{col}_dup" for col in sanitized_header]
-
+ sanitized_header = [
+ col if col not in seen and not seen.add(col) else f"{col}_dup"
+ for col in sanitized_header
+ ]
+
create_table_sql = f"""
CREATE TABLE {full_table_name} (
{','.join([f'"{col}" TEXT' for col in sanitized_header])}
);
"""
cursor.execute(create_table_sql)
-
+
# Reset file pointer to beginning
f.seek(0)
-
+
# Use COPY to load the data into the table
cursor.copy_expert(f"COPY {full_table_name} FROM STDIN WITH CSV HEADER", f)
conn.commit()
@@ -259,19 +318,20 @@ def load_csv_into_server(conn, file_path, full_table_name):
conn.rollback()
return False
+
def process_csv(conn, blob):
"""Process a CSV file from Google Cloud Storage and load it into the database."""
# Generate a table name based on the blob name
table_name = generate_table_name(blob.name)
full_table_name = f"{SCHEMA}.{table_name}"
-
+
# Check if the table already exists
if table_exists(conn, table_name):
return False # Table already exists, skip processing
-
+
# Download the CSV file to a temporary location
temp_file_name = f"temp_{table_name}.csv"
- temp_file_path = os.path.join('/tmp', temp_file_name)
+ temp_file_path = os.path.join("/tmp", temp_file_name)
blob.download_to_filename(temp_file_path)
try:
@@ -283,45 +343,48 @@ def process_csv(conn, blob):
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
+
def count_processable_files(blobs):
"""Count the number of files that will be processed."""
count = 0
shapefile_groups = group_shapefile_components(blobs)
for blob in blobs:
- if blob.name.endswith('.geojson') or blob.name.endswith('.csv'):
+ if blob.name.endswith(".geojson") or blob.name.endswith(".csv"):
count += 1
- elif blob.name.endswith('.shp'):
+ elif blob.name.endswith(".shp"):
base_name = os.path.splitext(blob.name)[0]
if base_name in shapefile_groups:
count += 1
return count
+
def process_file(conn, blob, shapefile_groups, processed_shapefiles):
"""Process a single file and return whether it was processed."""
- if blob.name.endswith('.geojson'):
+ if blob.name.endswith(".geojson"):
return process_geojson(conn, blob)
- elif blob.name.endswith('.shp'):
+ elif blob.name.endswith(".shp"):
base_name = os.path.splitext(blob.name)[0]
if base_name in shapefile_groups and base_name not in processed_shapefiles:
success = process_shapefile(conn, shapefile_groups[base_name])
if success:
processed_shapefiles.add(base_name)
return success
- elif blob.name.endswith('.csv'):
+ elif blob.name.endswith(".csv"):
return process_csv(conn, blob)
return False
-def download_and_process_files(bucket, conn, folder_prefix=''):
+
+def download_and_process_files(bucket, conn, folder_prefix=""):
"""Download and process files from the specified folder and its subfolders in the GCS bucket."""
blobs = list(bucket.list_blobs(prefix=folder_prefix))
total_files = count_processable_files(blobs)
shapefile_groups = group_shapefile_components(blobs)
-
+
processed_shapefiles = set()
-
+
with tqdm(total=total_files, desc="Processing files", unit="file") as pbar:
for blob in blobs:
- if blob.name.endswith('/'): # This is a folder
+ if blob.name.endswith("/"): # This is a folder
continue
processed = process_file(conn, blob, shapefile_groups, processed_shapefiles)
if processed:
@@ -330,12 +393,13 @@ def download_and_process_files(bucket, conn, folder_prefix=''):
pbar.total -= 1
pbar.refresh()
+
def main(process_entire_bucket=False):
try:
# Initialize Google Cloud Storage client
storage_client = storage.Client(project=PROJECT_NAME)
bucket = storage_client.bucket(BUCKET_NAME)
-
+
# Connect to the database
conn = get_db_connection()
create_schema_if_not_exists(conn)
@@ -354,12 +418,19 @@ def main(process_entire_bucket=False):
except Exception as e:
print(f"An error occurred: {e}")
finally:
- if 'conn' in locals() and conn:
+ if "conn" in locals() and conn:
conn.close()
+
if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Process files from Google Cloud Storage bucket")
- parser.add_argument('--full-bucket', action='store_true', help='Process the entire bucket instead of specific folders')
+ parser = argparse.ArgumentParser(
+ description="Process files from Google Cloud Storage bucket"
+ )
+ parser.add_argument(
+ "--full-bucket",
+ action="store_true",
+ help="Process the entire bucket instead of specific folders",
+ )
args = parser.parse_args()
- main(process_entire_bucket=args.full_bucket)
\ No newline at end of file
+ main(process_entire_bucket=args.full_bucket)
From d2cfa81035b1778b8fcf4cb0e68ecd8fb5fa21c4 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 13:25:08 -0400
Subject: [PATCH 065/142] ensure postgis is loaded
---
load_data_server/load_server.py | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/load_data_server/load_server.py b/load_data_server/load_server.py
index 602b7295..f0466aad 100644
--- a/load_data_server/load_server.py
+++ b/load_data_server/load_server.py
@@ -80,17 +80,8 @@ def get_db_connection():
def create_schema_if_not_exists(conn):
"""Create the schema if it doesn't exist."""
with conn.cursor() as cur:
- cur.execute(
- "SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = %s);",
- (SCHEMA,),
- )
- schema_exists = cur.fetchone()[0]
-
- if not schema_exists:
- cur.execute(f"CREATE SCHEMA {SCHEMA};")
- logging.info(f"Schema '{SCHEMA}' created.")
- else:
- logging.info(f"Schema '{SCHEMA}' already exists.")
+ cur.execute(f"create schema if not exists {SCHEMA};")
+ cur.execute("create extension if not exists postgis;")
def generate_table_name(blob_name):
From 6870b936e51f0dc23b4eecc0776882d86d830e9f Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 13:28:26 -0400
Subject: [PATCH 066/142] unconditionally load all data
---
load_data_server/load_server.py | 29 ++---------------------------
1 file changed, 2 insertions(+), 27 deletions(-)
diff --git a/load_data_server/load_server.py b/load_data_server/load_server.py
index f0466aad..4dd98015 100644
--- a/load_data_server/load_server.py
+++ b/load_data_server/load_server.py
@@ -115,21 +115,6 @@ def generate_table_name(blob_name):
return table_name.lower()
-def table_exists(conn, table_name):
- """Check if a table exists in the specified schema."""
- with conn.cursor() as cur:
- cur.execute(
- """
- SELECT EXISTS (
- SELECT FROM information_schema.tables
- WHERE table_schema = %s AND table_name = %s
- );
- """,
- (SCHEMA, table_name),
- )
- return cur.fetchone()[0]
-
-
def drop_table_if_exists(conn, table_name):
"""Drop the table if it exists."""
with conn.cursor() as cur:
@@ -140,8 +125,7 @@ def load_into_server(conn, file_path, file_type):
table_name = os.path.splitext(os.path.basename(file_path))[0]
full_table_name = f"{SCHEMA}.{table_name}"
- if table_exists(conn, table_name):
- drop_table_if_exists(conn, table_name)
+ drop_table_if_exists(conn, table_name)
# Upload the file based on its type
if file_type == "shp":
@@ -196,9 +180,6 @@ def group_shapefile_components(blobs):
def process_geojson(conn, blob):
table_name = generate_table_name(blob.name)
- if table_exists(conn, table_name):
- return False # Table already exists, skip processing
-
full_table_name = f"{SCHEMA}.{table_name}"
file_path = os.path.join("/tmp", os.path.basename(blob.name))
@@ -232,9 +213,6 @@ def process_shapefile(conn, component_blobs):
shp_blob = next(blob for blob in component_blobs if blob.name.endswith(".shp"))
table_name = generate_table_name(shp_blob.name)
- if table_exists(conn, table_name):
- return False # Table already exists, skip processing
-
temp_dir = os.path.join("/tmp", table_name)
os.makedirs(temp_dir, exist_ok=True)
@@ -291,6 +269,7 @@ def load_csv_into_server(conn, file_path, full_table_name):
]
create_table_sql = f"""
+ drop table if exists {full_table_name};
CREATE TABLE {full_table_name} (
{','.join([f'"{col}" TEXT' for col in sanitized_header])}
);
@@ -316,10 +295,6 @@ def process_csv(conn, blob):
table_name = generate_table_name(blob.name)
full_table_name = f"{SCHEMA}.{table_name}"
- # Check if the table already exists
- if table_exists(conn, table_name):
- return False # Table already exists, skip processing
-
# Download the CSV file to a temporary location
temp_file_name = f"temp_{table_name}.csv"
temp_file_path = os.path.join("/tmp", temp_file_name)
From a72f6f0e02d61fcf4bc5e600cd3cd2d07e246306 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 14:06:52 -0400
Subject: [PATCH 067/142] null out missing data in residential_permits
---
dbt/models/residential_permits.sql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index c4fb4267..35018922 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -22,9 +22,9 @@ select
, memory_car as num_memory_care_units
, assisted as num_assisted_living_units
, com_off_re = 'Y' as is_commercial_and_residential
- , sqf as square_feet
+ , nullif(sqf, 0) as square_feet
, public_fun = 'Y' as is_public_funded
- , permit_val as permit_value
+ , nullif(permit_val, 0) as permit_value
, community_ as community_designation
, notes
, st_transform(geom, {{ var("srid") }}) as geom
From d33588ebad1aad27fb8d9073e47a4e6469b260fc Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 15:33:05 -0400
Subject: [PATCH 068/142] ensure table drop succeeds
---
load_data_server/load_server.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/load_data_server/load_server.py b/load_data_server/load_server.py
index 4dd98015..8c3089e8 100644
--- a/load_data_server/load_server.py
+++ b/load_data_server/load_server.py
@@ -269,7 +269,7 @@ def load_csv_into_server(conn, file_path, full_table_name):
]
create_table_sql = f"""
- drop table if exists {full_table_name};
+ drop table if exists {full_table_name} cascade;
CREATE TABLE {full_table_name} (
{','.join([f'"{col}" TEXT' for col in sanitized_header])}
);
From 6c3b5757e14249abf599a6733929d6f0f8381a01 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 15:57:31 -0400
Subject: [PATCH 069/142] start adding tables to be used by the model
---
dbt/models/housing_units_by_census_tracts.sql | 62 +++++++++++++++++++
.../property_values_by_census_tracts.sql | 22 +++++++
.../residential_permits_to_census_tracts.sql | 31 ++++++++++
3 files changed, 115 insertions(+)
create mode 100644 dbt/models/housing_units_by_census_tracts.sql
create mode 100644 dbt/models/property_values_by_census_tracts.sql
create mode 100644 dbt/models/residential_permits_to_census_tracts.sql
diff --git a/dbt/models/housing_units_by_census_tracts.sql b/dbt/models/housing_units_by_census_tracts.sql
new file mode 100644
index 00000000..f73b49f5
--- /dev/null
+++ b/dbt/models/housing_units_by_census_tracts.sql
@@ -0,0 +1,62 @@
+with census_tracts as (
+ select
+ census_tract_id
+ , statefp || countyfp || tractce as census_tract
+ from {{ ref('census_tracts') }}
+)
+, parcels as (
+ select
+ parcel_id
+ , geom
+ from {{ ref('parcels') }}
+)
+, residential_permits as (
+ select
+ residential_permit_id
+ , year_
+ , permit_value
+ from {{ ref('residential_permits') }}
+)
+, residential_permits_to_parcels as (
+ select
+ residential_permit_id
+ , parcel_id
+ from {{ ref('residential_permits_to_parcels') }}
+)
+, residential_permits_to_census_tracts as (
+ select
+ residential_permit_id
+ , census_tract_id
+ from {{ ref('residential_permits_to_census_tracts') }}
+)
+, residential as (
+ select
+ census_tracts.census_tract
+ , residential_permits.year_
+ , residential_permits.housing_units
+ , st_area(parcels.geom) as parcel_sqm
+ , residential_permits.permit_value
+ from
+ residential_permits
+ inner join residential_permits_to_parcels using (residential_permit_id)
+ inner join parcels using (parcel_id)
+ inner join residential_permits_to_census_tracts using (residential_permit_id)
+ inner join census_tracts using (census_tract_id)
+ where year_ <= 2020
+)
+, agg_residential as (
+ select
+ census_tract
+ , year_
+ , sum(housing_units) as housing_units
+ from residential
+ group by census_tract, year_
+)
+
+select
+ census_tract
+ , year_
+ , housing_units -- do we really want the total _applied_ units, or should we
+ -- be looking at the total unit estimates from ACS?
+from
+ agg_residential
diff --git a/dbt/models/property_values_by_census_tracts.sql b/dbt/models/property_values_by_census_tracts.sql
new file mode 100644
index 00000000..0c187448
--- /dev/null
+++ b/dbt/models/property_values_by_census_tracts.sql
@@ -0,0 +1,22 @@
+-- Median and total parcel property values aggregated by census tract.
+
+with parcels as (
+ select
+ parcel_id
+ , emv_total
+ from {{ ref('parcels_base') }}
+)
+, parcels_to_census_tracts as (
+ select
+ parcel_id
+ , census_tract_id
+ from {{ ref('parcels_to_census_tracts') }}
+)
+select
+ parcels_to_census_tracts.census_tract_id
+ , sum(parcels.emv_total) as total_value
+ , percentile_cont(0.5) within group (order by parcels.emv_total) as median_value
+from
+ parcels_to_census_tracts using (parcel_id)
+ inner join parcels using (parcel_id)
+group by census_tracts.census_tract_id
diff --git a/dbt/models/residential_permits_to_census_tracts.sql b/dbt/models/residential_permits_to_census_tracts.sql
new file mode 100644
index 00000000..79a48be4
--- /dev/null
+++ b/dbt/models/residential_permits_to_census_tracts.sql
@@ -0,0 +1,31 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['residential_permit_id']},
+ {'columns': ['census_tract_id']}
+ ]
+ )
+}}
+
+with
+residential_permits as (
+ select
+ residential_permit_id as id
+ , daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
+ , geom
+ from {{ ref("residential_permits") }}
+)
+, census_tracts as (
+ select
+ census_tract_id as id
+ , valid
+ , geom
+ from {{ ref("census_tracts") }}
+)
+select
+ child_id as residential_permit_id
+ , parent_id as census_tract_id
+ , valid
+ , type_
+from {{ tag_regions("residential_permits", "census_tracts") }}
From a820807868005e02adf7a501c67fd7eb7a619058 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 20 Aug 2024 16:59:18 -0400
Subject: [PATCH 070/142] fix misnamed field
---
dbt/models/city_boundary.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dbt/models/city_boundary.sql b/dbt/models/city_boundary.sql
index 88af8782..d9bfa060 100644
--- a/dbt/models/city_boundary.sql
+++ b/dbt/models/city_boundary.sql
@@ -1,5 +1,5 @@
select
- ogc_id as city_boundary_id
+ ogc_fid as city_boundary_id
, st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'city_boundary_minneapolis') }}
From 9590725f0d9ec89f56e3525d9f4e2cb82dc37728 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 11:43:15 -0400
Subject: [PATCH 071/142] remove last references to old raw schema
---
dbt/models/acs_block_group.sql | 2 +-
dbt/models/acs_block_group_clean.sql | 22 ++++++++++++++++++++++
dbt/models/acs_tract_clean.sql | 14 +++++++-------
dbt/models/schema.yml | 8 ++------
4 files changed, 32 insertions(+), 14 deletions(-)
create mode 100644 dbt/models/acs_block_group_clean.sql
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index 98545ebb..6165c8ac 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -29,7 +29,7 @@ census_block_groups as (
, name_
, value_
from
- {{ source('minneapolis_old', 'acs_bg_raw') }}
+ {{ ref('acs_block_group_clean') }}
)
select
census_block_groups.census_block_group_id
diff --git a/dbt/models/acs_block_group_clean.sql b/dbt/models/acs_block_group_clean.sql
new file mode 100644
index 00000000..d629d782
--- /dev/null
+++ b/dbt/models/acs_block_group_clean.sql
@@ -0,0 +1,22 @@
+with
+acs_bg_raw as (
+ select
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , year
+ , code
+ , value
+ from {{ source('minneapolis', 'acs_bg_raw') }}
+)
+select
+ statefp
+ , countyfp
+ , tractce
+ , blkgrpce
+ , year as year_
+ , code as name_
+ , case when "value" < 0 then null else "value" end as value_
+from
+ acs_bg_raw
diff --git a/dbt/models/acs_tract_clean.sql b/dbt/models/acs_tract_clean.sql
index bd5638a8..1c631ff4 100644
--- a/dbt/models/acs_tract_clean.sql
+++ b/dbt/models/acs_tract_clean.sql
@@ -4,17 +4,17 @@ acs_tract_raw as (
statefp
, countyfp
, tractce
- , year_
- , name_
- , value_
- from {{ source('minneapolis_old', 'acs_tract_raw') }}
+ , year
+ , code
+ , value
+ from {{ source('minneapolis', 'acs_tract_raw') }}
)
select
statefp
, countyfp
, tractce
- , year_
- , name_
- , case when value_ < 0 then null else value_ end as value_
+ , year as year_
+ , code as name_
+ , case when "value" < 0 then null else "value" end as value_
from
acs_tract_raw
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index f78e1251..f9b0ddab 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -1,14 +1,10 @@
sources:
- - name: minneapolis_old
- database: cities
- schema: public
- tables:
- - name: acs_bg_raw
- - name: acs_tract_raw
- name: minneapolis
database: cities
schema: minneapolis
tables:
+ - name: acs_bg_raw
+ - name: acs_tract_raw
- name: residential_permits_residentialpermits
- name: commercial_permits_nonresidentialconstruction
- name: high_frequency_transit_2015_freq_350_ft_buffer
From 42847e5e8382548386839fef3f894c485e9559ab Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 11:43:29 -0400
Subject: [PATCH 072/142] add new loader for acs data
---
load_data_server/load_acs.py | 183 +++++++++++++++++++++++++++++++++++
1 file changed, 183 insertions(+)
create mode 100644 load_data_server/load_acs.py
diff --git a/load_data_server/load_acs.py b/load_data_server/load_acs.py
new file mode 100644
index 00000000..23ae704e
--- /dev/null
+++ b/load_data_server/load_acs.py
@@ -0,0 +1,183 @@
+import logging
+import os
+import tempfile
+
+from dotenv import load_dotenv
+from google.cloud import storage
+import psycopg2
+from tqdm import tqdm
+
+# Load environment variables
+load_dotenv()
+
+# Set up logging
+logging.basicConfig(
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+# DATA INFO
+PROJECT_NAME = os.getenv("GOOGLE_CLOUD_PROJECT")
+BUCKET_NAME = os.getenv("GOOGLE_CLOUD_BUCKET")
+
+# DATABASE INFO
+SCHEMA = os.getenv("SCHEMA")
+HOST = os.getenv("HOST")
+DATABASE = os.getenv("DATABASE")
+USERNAME = os.getenv("USERNAME")
+PASSWORD = os.getenv("PASSWORD")
+
+YEAR_RANGE = range(2013, 2023)
+ACS_CODES = {
+ "B03002_003E": "population_white_non_hispanic",
+ "B03002_004E": "population_black_non_hispanic",
+ "B03002_005E": "population_asian_non_hispanic",
+ "B03002_006E": "population_native_hawaiian_or_pacific_islander_non_hispanic",
+ "B03002_007E": "population_american_indian_or_alaska_native_non_hispanic",
+ "B03002_008E": "population_other_non_hispanic",
+ "B03002_009E": "population_multiple_races_non_hispanic",
+ "B03002_010E": "population_multiple_races_and_other_non_hispanic",
+ "B07204_001E": "geographic_mobility_total_responses",
+ "B07204_002E": "geographic_mobility_same_house_1_year_ago",
+ "B07204_004E": "geographic_mobility_different_house_1_year_ago_same_city",
+ "B07204_005E": "geographic_mobility_different_house_1_year_ago_same_county",
+ "B07204_006E": "geographic_mobility_different_house_1_year_ago_same_state",
+ "B07204_007E": "geographic_mobility_different_house_1_year_ago_same_country",
+ "B07204_016E": "geographic_mobility_different_house_1_year_ago_abroad",
+ "B01003_001E": "population",
+ "B02001_002E": "white",
+ "B02001_003E": "black",
+ "B02001_004E": "american_indian_or_alaska_native",
+ "B02001_005E": "asian",
+ "B02001_006E": "native_hawaiian_or_pacific_islander",
+ "B03001_003E": "population_hispanic_or_latino",
+ "B02001_007E": "other_race",
+ "B02001_008E": "multiple_races",
+ "B02001_009E": "multiple_races_and_other_race",
+ "B02001_010E": "two_or_more_races_excluding_other",
+ "B02015_002E": "east_asian_chinese",
+ "B02015_003E": "east_asian_hmong",
+ "B02015_004E": "east_asian_japanese",
+ "B02015_005E": "east_asian_korean",
+ "B02015_006E": "east_asian_mongolian",
+ "B02015_007E": "east_asian_okinawan",
+ "B02015_008E": "east_asian_taiwanese",
+ "B02015_009E": "east_asian_other",
+ "B02015_010E": "southeast_asian_burmese",
+ "B02015_011E": "southeast_asian_cambodian",
+ "B02015_012E": "southeast_asian_filipino",
+ "B02015_013E": "southeast_asian_indonesian",
+ "B02015_014E": "southeast_asian_laotian",
+ "B02015_015E": "southeast_asian_malaysian",
+ "B02015_016E": "southeast_asian_mien",
+ "B02015_017E": "southeast_asian_singaporean",
+ "B02015_018E": "southeast_asian_thai",
+ "B02015_019E": "southeast_asian_viet",
+ "B02015_020E": "southeast_asian_other",
+ "B02015_021E": "south_asian_asian_indian",
+ "B02015_022E": "south_asian_bangladeshi",
+ "B02015_023E": "south_asian_bhutanese",
+ "B02015_024E": "south_asian_nepalese",
+ "B02015_025E": "south_asian_pakistani",
+ "B02015_026E": "south_asian_sikh",
+ "B02015_027E": "south_asian_sri_lankan",
+ "B02015_028E": "south_asian_other",
+ "B02015_029E": "central_asian_kazakh",
+ "B02015_030E": "central_asian_uzbek",
+ "B02015_031E": "central_asian_other",
+ "B02015_032E": "other_asian_specified",
+ "B02015_033E": "other_asian_not_specified",
+ "B19013_001E": "median_household_income",
+ "B19013A_001E": "median_household_income_white",
+ "B19013H_001E": "median_household_income_white_non_hispanic",
+ "B19013I_001E": "median_household_income_hispanic",
+ "B19013B_001E": "median_household_income_black",
+ "B19013C_001E": "median_household_income_american_indian_or_alaska_native",
+ "B19013D_001E": "median_household_income_asian",
+ "B19013E_001E": "median_household_income_native_hawaiian_or_pacific_islander",
+ "B19013F_001E": "median_household_income_other_race",
+ "B19013G_001E": "median_household_income_multiple_races",
+ "B19019_002E": "median_household_income_1_person_households",
+ "B19019_003E": "median_household_income_2_person_households",
+ "B19019_004E": "median_household_income_3_person_households",
+ "B19019_005E": "median_household_income_4_person_households",
+ "B19019_006E": "median_household_income_5_person_households",
+ "B19019_007E": "median_household_income_6_person_households",
+ "B19019_008E": "median_household_income_7_or_more_person_households",
+ "B01002_001E": "median_age",
+ "B01002_002E": "median_age_male",
+ "B01002_003E": "median_age_female",
+ "B25031_001E": "median_gross_rent",
+ "B25031_002E": "median_gross_rent_0_bedrooms",
+ "B25031_003E": "median_gross_rent_1_bedrooms",
+ "B25031_004E": "median_gross_rent_2_bedrooms",
+ "B25031_005E": "median_gross_rent_3_bedrooms",
+ "B25031_006E": "median_gross_rent_4_bedrooms",
+ "B25031_007E": "median_gross_rent_5_bedrooms",
+ "B25032_001E": "total_housing_units",
+ "B25032_002E": "total_owner_occupied_housing_units",
+ "B25032_013E": "total_renter_occupied_housing_units",
+ "B25070_001E": "median_gross_rent_as_percentage_of_household_income",
+}
+
+
+if __name__ == "__main__":
+ conn = psycopg2.connect(
+ host=HOST, database=DATABASE, user=USERNAME, password=PASSWORD
+ )
+ storage_client = storage.Client(project=PROJECT_NAME)
+ bucket = storage_client.bucket(BUCKET_NAME)
+ cur = conn.cursor()
+
+ cur.execute(f"drop table if exists {SCHEMA}.acs_tract_raw")
+ cur.execute(
+ f"create table {SCHEMA}.acs_tract_raw (statefp text, countyfp text, tractce text, year int, code text, value numeric)"
+ )
+
+ temp_table = f"{SCHEMA}.acs_tract_temp"
+ cur.execute(f"drop table if exists {temp_table}")
+ cur.execute(
+ f"create table {temp_table} (statefp text, countyfp text, tractce text, value numeric)"
+ )
+ for code in tqdm(ACS_CODES.keys()):
+ desc = ACS_CODES[code]
+
+ for blob in bucket.list_blobs(prefix=f"acs/tracts/{desc}/"):
+ year = blob.name.split("/")[-1].split(".")[0]
+ cur.execute(f"truncate {temp_table}")
+ with tempfile.NamedTemporaryFile() as temp:
+ blob.download_to_filename(temp.name)
+ cur.copy_expert(f"copy {temp_table} from stdin with csv header", temp)
+
+ cur.execute(
+ f"insert into {SCHEMA}.acs_tract_raw select statefp, countyfp, tractce, %s, %s, value from {temp_table}",
+ (year, code),
+ )
+ cur.execute(f"drop table {temp_table}")
+ conn.commit()
+
+ cur.execute(f"drop table if exists {SCHEMA}.acs_bg_raw")
+ cur.execute(
+ f"create table {SCHEMA}.acs_bg_raw (statefp text, countyfp text, tractce text, blkgrpce text, year int, code text, value numeric)"
+ )
+
+ temp_table = f"{SCHEMA}.acs_tract_temp"
+ cur.execute(f"drop table if exists {temp_table}")
+ cur.execute(
+ f"create table {temp_table} (statefp text, countyfp text, tractce text, blkgrpce text, value numeric)"
+ )
+
+ for code in tqdm(ACS_CODES.keys()):
+ desc = ACS_CODES[code]
+ for blob in bucket.list_blobs(prefix=f"acs/block_groups/{desc}/"):
+ year = blob.name.split("/")[-1].split(".")[0]
+ cur.execute(f"truncate {temp_table}")
+ with tempfile.NamedTemporaryFile() as temp:
+ blob.download_to_filename(temp.name)
+ cur.copy_expert(f"copy {temp_table} from stdin with csv header", temp)
+
+ cur.execute(
+ f"insert into {SCHEMA}.acs_bg_raw select statefp, countyfp, tractce, blkgrpce, %s, %s value from {temp_table}",
+ (year, code),
+ )
+ cur.execute(f"drop table {temp_table}")
+ conn.commit()
From 686ee09639cfd733a4b2785c08aec877eafd3bdd Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 11:43:48 -0400
Subject: [PATCH 073/142] correctly refer to seed
---
dbt/models/segregation_indexes.sql | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
index 2d2c3cac..8d50587f 100644
--- a/dbt/models/segregation_indexes.sql
+++ b/dbt/models/segregation_indexes.sql
@@ -17,18 +17,24 @@ with
, value_
from {{ ref("acs_tract") }}
)
+ , acs_variables as (
+ select
+ "variable" as name_
+ , description
+ from {{ ref("acs_variables") }}
+ )
, pop_tyc as
( -- Population by tract, year, and category
select acs_tract.census_tract_id, acs_tract.year_, categories.category, acs_tract.value_
from acs_tract
- join acs_variable using (name_)
- join categories on categories.category = acs_variable.description
+ join acs_variables using (name_)
+ join categories on categories.category = acs_variables.description
),
pop_ty as
( -- Population by tract and year (note: using 'population' variable instead of aggregating categories)
select census_tract_id, year_, value_
- from acs_tract join acs_variable using (name_)
- where acs_variable.description = 'population'
+ from acs_tract join acs_variables using (name_)
+ where acs_variables.description = 'population'
),
pop_yc as
( -- Population by year and category
From 30323c4a3a92c3366cc64a493c326132862fa374 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 11:49:08 -0400
Subject: [PATCH 074/142] more work on tables for model
---
dbt/models/high_frequency_transit_lines.sql | 26 ++++++++++++-------
.../high_frequency_transit_lines_union.sql | 4 +--
dbt/models/high_frequency_transit_stops.sql | 16 ++----------
dbt/models/housing_units_by_census_tracts.sql | 9 ++++---
dbt/models/parcels_distance_to_transit.sql | 21 +++++++++++++++
dbt/models/parcels_to_census_tracts.sql | 25 ++++++++++++++++++
.../property_values_by_census_tracts.sql | 4 +--
7 files changed, 74 insertions(+), 31 deletions(-)
create mode 100644 dbt/models/parcels_distance_to_transit.sql
create mode 100644 dbt/models/parcels_to_census_tracts.sql
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
index af4c344c..34cd9779 100644
--- a/dbt/models/high_frequency_transit_lines.sql
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -1,22 +1,30 @@
with lines as (
select
- year_
+ valid
, geom
from {{ ref('high_frequency_transit_lines_union') }}
)
, stops as (
select
- year_
+ valid
, geom
from {{ ref('high_frequency_transit_stops') }}
)
+, lines_and_stops as (
+ select
+ lines.valid * stops.valid as valid
+ , lines.geom as line_geom
+ , stops.geom as stop_geom
+ from
+ lines
+ inner join stops on lines.valid && stops.valid
+)
select
- year_ as high_frequency_transit_lines_id
- , year_
- , lines.geom
+ {{ dbt_utils.generate_surrogate_key(['valid']) }} as high_frequency_transit_line_id
+ , valid
+ , line_geom as geom
-- note units are in meters
- , st_buffer(lines.geom, 106.7) as blue_zone_geom -- 350 feet
- , st_union(st_buffer(lines.geom, 402.3), st_buffer(stops.geom, 804.7)) as yellow_zone_geom -- quarter mile around lines and half mile around stops
+ , st_buffer(line_geom, 106.7) as blue_zone_geom -- 350 feet
+ , st_union(st_buffer(line_geom, 402.3), st_buffer(stop_geom, 804.7)) as yellow_zone_geom -- quarter mile around lines and half mile around stops
from
- lines
- inner join stops using (year_)
+ lines_and_stops
diff --git a/dbt/models/high_frequency_transit_lines_union.sql b/dbt/models/high_frequency_transit_lines_union.sql
index 073ec9a1..8b361587 100644
--- a/dbt/models/high_frequency_transit_lines_union.sql
+++ b/dbt/models/high_frequency_transit_lines_union.sql
@@ -11,11 +11,11 @@ with lines_2015 as (
{{ source('minneapolis', 'high_frequency_transit_2016_freq_lines') }}
)
select
- 2015 as year_,
+ '(,2016-01-01)'::daterange as valid,
geom
from lines_2015
union all
select
- 2016 as year_,
+ '[2016-01-01,)'::daterange as valid,
geom
from lines_2016
diff --git a/dbt/models/high_frequency_transit_stops.sql b/dbt/models/high_frequency_transit_stops.sql
index b751153f..9d9a0459 100644
--- a/dbt/models/high_frequency_transit_stops.sql
+++ b/dbt/models/high_frequency_transit_stops.sql
@@ -1,21 +1,9 @@
with stops_2015 as (
select
- 2015 as year_
- , st_union(st_transform(geom, {{ var("srid") }}))::geometry(multipoint, {{ var("srid") }}) as geom
+ st_union(st_transform(geom, {{ var("srid") }}))::geometry(multipoint, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'high_frequency_transit_2015_freq_rail_stops') }}
)
-, stops_2016 as ( -- stops are unchanged in 2016
- select
- 2016 as year_
- , geom
- from stops_2015
-)
select
- year_
+ '[,]'::daterange as valid
, geom
from stops_2015
-union all
-select
- year_
- , geom
-from stops_2016
diff --git a/dbt/models/housing_units_by_census_tracts.sql b/dbt/models/housing_units_by_census_tracts.sql
index f73b49f5..208c12f3 100644
--- a/dbt/models/housing_units_by_census_tracts.sql
+++ b/dbt/models/housing_units_by_census_tracts.sql
@@ -15,6 +15,7 @@ with census_tracts as (
residential_permit_id
, year_
, permit_value
+ , num_units
from {{ ref('residential_permits') }}
)
, residential_permits_to_parcels as (
@@ -33,7 +34,7 @@ with census_tracts as (
select
census_tracts.census_tract
, residential_permits.year_
- , residential_permits.housing_units
+ , residential_permits.num_units
, st_area(parcels.geom) as parcel_sqm
, residential_permits.permit_value
from
@@ -48,7 +49,7 @@ with census_tracts as (
select
census_tract
, year_
- , sum(housing_units) as housing_units
+ , sum(num_units) as num_units
from residential
group by census_tract, year_
)
@@ -56,7 +57,7 @@ with census_tracts as (
select
census_tract
, year_
- , housing_units -- do we really want the total _applied_ units, or should we
- -- be looking at the total unit estimates from ACS?
+ , num_units -- do we really want the total _applied_ units, or should we be
+ -- looking at the total unit estimates from ACS?
from
agg_residential
diff --git a/dbt/models/parcels_distance_to_transit.sql b/dbt/models/parcels_distance_to_transit.sql
new file mode 100644
index 00000000..c7881209
--- /dev/null
+++ b/dbt/models/parcels_distance_to_transit.sql
@@ -0,0 +1,21 @@
+with
+ parcels as (
+ select
+ parcel_id
+ , valid
+ , geom
+ from {{ ref('parcels_base') }}
+ )
+ , high_frequency_transit_lines as (
+ select
+ valid
+ , geom
+ from {{ ref('high_frequency_transit_lines') }}
+ )
+select
+ parcels.parcel_id
+ , st_distance(parcels.geom, high_frequency_transit_lines.geom) as distance
+from
+ parcels
+ inner join high_frequency_transit_lines
+ on parcels.valid && high_frequency_transit_lines.valid
diff --git a/dbt/models/parcels_to_census_tracts.sql b/dbt/models/parcels_to_census_tracts.sql
new file mode 100644
index 00000000..d3c2d6e0
--- /dev/null
+++ b/dbt/models/parcels_to_census_tracts.sql
@@ -0,0 +1,25 @@
+with
+parcels as (
+ select
+ parcel_id
+ from {{ ref("parcels_base") }}
+)
+, census_block_groups as (
+ select
+ census_block_group_id
+ , census_tract_id
+ from {{ ref("census_block_groups") }}
+)
+, parcels_to_census_block_groups as (
+ select
+ parcel_id
+ , census_block_group_id
+ from {{ ref("parcels_to_census_block_groups") }}
+)
+select
+ parcels.parcel_id
+ , census_block_groups.census_tract_id
+from
+ parcels
+ left join parcels_to_census_block_groups using (parcel_id)
+ left join census_block_groups using (census_block_group_id)
diff --git a/dbt/models/property_values_by_census_tracts.sql b/dbt/models/property_values_by_census_tracts.sql
index 0c187448..51efa233 100644
--- a/dbt/models/property_values_by_census_tracts.sql
+++ b/dbt/models/property_values_by_census_tracts.sql
@@ -17,6 +17,6 @@ select
, sum(parcels.emv_total) as total_value
, percentile_cont(0.5) within group (order by parcels.emv_total) as median_value
from
- parcels_to_census_tracts using (parcel_id)
+ parcels_to_census_tracts
inner join parcels using (parcel_id)
-group by census_tracts.census_tract_id
+group by parcels_to_census_tracts.census_tract_id
From 5800b5dffb20f91e6f28574c08fdeb310c361295 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 14:57:21 -0400
Subject: [PATCH 075/142] more work on tables for model
---
dbt/macros/median.sql | 3 +
dbt/macros/standardize.sql | 6 ++
dbt/models/census_block_groups.sql | 2 +
dbt/models/census_tracts.sql | 14 +++-
.../census_tracts_distance_to_transit.sql | 36 ++++++++++
dbt/models/census_tracts_housing_units.sql | 36 ++++++++++
dbt/models/census_tracts_parcel_area.sql | 34 ++++++++++
dbt/models/census_tracts_property_values.sql | 37 +++++++++++
dbt/models/census_tracts_wide.sql | 66 +++++++++++++++++++
dbt/models/high_frequency_transit_lines.sql | 10 +++
dbt/models/housing_units_by_census_tracts.sql | 63 ------------------
dbt/models/parcels_distance_to_transit.sql | 9 +++
dbt/models/parcels_to_census_tracts.sql | 10 +++
.../property_values_by_census_tracts.sql | 22 -------
14 files changed, 262 insertions(+), 86 deletions(-)
create mode 100644 dbt/macros/median.sql
create mode 100644 dbt/macros/standardize.sql
create mode 100644 dbt/models/census_tracts_distance_to_transit.sql
create mode 100644 dbt/models/census_tracts_housing_units.sql
create mode 100644 dbt/models/census_tracts_parcel_area.sql
create mode 100644 dbt/models/census_tracts_property_values.sql
create mode 100644 dbt/models/census_tracts_wide.sql
delete mode 100644 dbt/models/housing_units_by_census_tracts.sql
delete mode 100644 dbt/models/property_values_by_census_tracts.sql
diff --git a/dbt/macros/median.sql b/dbt/macros/median.sql
new file mode 100644
index 00000000..131339f9
--- /dev/null
+++ b/dbt/macros/median.sql
@@ -0,0 +1,3 @@
+{% macro median(attr) %}
+(percentile_cont(0.5) within group (order by {{ attr }}))
+{% endmacro %}
diff --git a/dbt/macros/standardize.sql b/dbt/macros/standardize.sql
new file mode 100644
index 00000000..795ebad2
--- /dev/null
+++ b/dbt/macros/standardize.sql
@@ -0,0 +1,6 @@
+{% macro standardize(columns) %}
+ {% for c in columns %}
+ (({{ c }} - (avg({{ c }}) over ())) / (stddev_samp({{ c }}) over ())) as std_{{ c }}
+ {% if not loop.last %},{% endif %}
+ {% endfor %}
+{% endmacro %}
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index d3d8ac72..15783fa6 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -37,6 +37,7 @@ census_block_groups as (
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{ year_ }}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
{% endif %}
+ , {{ year_ }} as year_
, st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_bg_500k') }}
@@ -67,5 +68,6 @@ select
, geoidfq
, census_tract_id
, valid
+ , year_
, geom
from census_block_groups_with_tracts
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 1119140c..31f09322 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id'], 'unique': true},
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
with census_tracts as (
{% for year_ in var('census_years') %}
select
@@ -13,7 +23,8 @@ select
, tractce
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
, '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
-{% endif %}
+ {% endif %}
+ , {{ year_ }} as year_
, st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_tract_500k') }}
@@ -27,6 +38,7 @@ select
, tractce
, geoidfq
, valid
+ , year_
, geom
from
census_tracts
diff --git a/dbt/models/census_tracts_distance_to_transit.sql b/dbt/models/census_tracts_distance_to_transit.sql
new file mode 100644
index 00000000..8073ce5b
--- /dev/null
+++ b/dbt/models/census_tracts_distance_to_transit.sql
@@ -0,0 +1,36 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id'], 'unique': true},
+ ]
+ )
+}}
+
+with
+ parcels_distance_to_transit as (
+ select
+ parcel_id
+ , distance
+ from {{ ref('parcels_distance_to_transit') }}
+ )
+ , census_tracts as (
+ select
+ census_tract_id
+ from {{ ref('census_tracts') }}
+ )
+ , parcels_to_census_tracts as (
+ select
+ parcel_id
+ , census_tract_id
+ from {{ ref('parcels_to_census_tracts') }}
+ )
+select
+ census_tracts.census_tract_id
+ , avg(parcels_distance_to_transit.distance) as mean_distance_to_transit
+ , {{ median('parcels_distance_to_transit.distance') }} as median_distance_to_transit
+from
+ census_tracts
+ left join parcels_to_census_tracts using (census_tract_id)
+ left join parcels_distance_to_transit using (parcel_id)
+group by 1
diff --git a/dbt/models/census_tracts_housing_units.sql b/dbt/models/census_tracts_housing_units.sql
new file mode 100644
index 00000000..fe712f95
--- /dev/null
+++ b/dbt/models/census_tracts_housing_units.sql
@@ -0,0 +1,36 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id'], 'unique': true},
+ ]
+ )
+}}
+
+with census_tracts as (
+ select
+ census_tract_id
+ from {{ ref('census_tracts') }}
+)
+, residential_permits as (
+ select
+ residential_permit_id
+ , year_
+ , permit_value
+ , num_units
+ from {{ ref('residential_permits') }}
+)
+, residential_permits_to_census_tracts as (
+ select
+ residential_permit_id
+ , census_tract_id
+ from {{ ref('residential_permits_to_census_tracts') }}
+)
+select
+ census_tracts.census_tract_id
+ , sum(residential_permits.num_units) as num_units
+from
+ census_tracts
+ left join residential_permits_to_census_tracts using (census_tract_id)
+ left join residential_permits using (residential_permit_id)
+group by 1
diff --git a/dbt/models/census_tracts_parcel_area.sql b/dbt/models/census_tracts_parcel_area.sql
new file mode 100644
index 00000000..24d21cff
--- /dev/null
+++ b/dbt/models/census_tracts_parcel_area.sql
@@ -0,0 +1,34 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id'], 'unique': true},
+ ]
+ )
+}}
+
+with census_tracts as (
+ select
+ census_tract_id
+ from {{ ref('census_tracts') }}
+)
+, parcels as (
+ select
+ parcel_id
+ , geom
+ from {{ ref('parcels_base') }}
+)
+, parcels_to_census_tracts as (
+ select
+ parcel_id
+ , census_tract_id
+ from {{ ref('parcels_to_census_tracts') }}
+)
+select
+ census_tract_id
+ , sum(st_area(parcels.geom)) as parcel_sqm
+from
+ census_tracts
+ left join parcels_to_census_tracts using (census_tract_id)
+ left join parcels using (parcel_id)
+group by 1
diff --git a/dbt/models/census_tracts_property_values.sql b/dbt/models/census_tracts_property_values.sql
new file mode 100644
index 00000000..e2a4531b
--- /dev/null
+++ b/dbt/models/census_tracts_property_values.sql
@@ -0,0 +1,37 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id'], 'unique': true},
+ ]
+ )
+}}
+
+-- Median and total parcel property values aggregated by census tract.
+
+with parcels as (
+ select
+ parcel_id
+ , emv_total
+ from {{ ref('parcels_base') }}
+)
+, census_tracts as (
+ select
+ census_tract_id
+ from {{ ref('census_tracts') }}
+)
+, parcels_to_census_tracts as (
+ select
+ parcel_id
+ , census_tract_id
+ from {{ ref('parcels_to_census_tracts') }}
+)
+select
+ census_tracts.census_tract_id
+ , sum(parcels.emv_total) as total_value
+ , {{ median('parcels.emv_total') }} as median_value
+from
+ census_tracts
+ left join parcels_to_census_tracts using (census_tract_id)
+ left join parcels using (parcel_id)
+group by 1
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
new file mode 100644
index 00000000..adab9e44
--- /dev/null
+++ b/dbt/models/census_tracts_wide.sql
@@ -0,0 +1,66 @@
+with
+census_tracts as (
+ select
+ census_tract_id
+ , statefp || countyfp || tractce as census_tract
+ , year_
+ from {{ ref('census_tracts') }}
+)
+, census_tracts_housing_units as (
+ select
+ census_tract_id
+ , num_units
+ from {{ ref('census_tracts_housing_units') }}
+)
+, census_tracts_property_values as (
+ select
+ census_tract_id
+ , median_value
+ , total_value
+ from {{ ref('census_tracts_property_values') }}
+)
+, census_tracts_distance_to_transit as (
+ select
+ census_tract_id
+ , median_distance_to_transit
+ , mean_distance_to_transit
+ from {{ ref('census_tracts_distance_to_transit') }}
+)
+, census_tracts_parcel_area as (
+ select
+ census_tract_id
+ , parcel_sqm
+ from {{ ref('census_tracts_parcel_area') }}
+)
+, raw_data as (
+select
+ census_tracts.census_tract
+ , census_tracts.year_
+ , coalesce(census_tracts_housing_units.num_units, 0) as num_units
+ , census_tracts_property_values.total_value
+ , census_tracts_property_values.median_value
+ , census_tracts_distance_to_transit.median_distance_to_transit
+ , census_tracts_distance_to_transit.mean_distance_to_transit
+ , census_tracts_parcel_area.parcel_sqm
+from
+ census_tracts_housing_units
+ inner join census_tracts_property_values using(census_tract_id)
+ inner join census_tracts_distance_to_transit using (census_tract_id)
+ inner join census_tracts_parcel_area using (census_tract_id)
+ inner join census_tracts using (census_tract_id)
+where
+ census_tracts.year_ <= 2020
+ and census_tracts.census_tract_id in (select census_tract_id from {{ ref('census_tracts_in_city_boundary') }})
+)
+select
+ census_tract
+ , year_
+ , num_units
+ , total_value
+ , median_value
+ , median_distance_to_transit
+ , mean_distance_to_transit
+ , parcel_sqm
+ , {{ standardize(['num_units', 'total_value', 'median_value', 'median_distance_to_transit', 'mean_distance_to_transit', 'parcel_sqm']) }}
+from
+ raw_data
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
index 34cd9779..42f13975 100644
--- a/dbt/models/high_frequency_transit_lines.sql
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['high_frequency_transit_line_id'], 'unique': true},
+ {'columns': ['valid', 'geom'], 'type': 'gist'},
+ ]
+ )
+}}
+
with lines as (
select
valid
diff --git a/dbt/models/housing_units_by_census_tracts.sql b/dbt/models/housing_units_by_census_tracts.sql
deleted file mode 100644
index 208c12f3..00000000
--- a/dbt/models/housing_units_by_census_tracts.sql
+++ /dev/null
@@ -1,63 +0,0 @@
-with census_tracts as (
- select
- census_tract_id
- , statefp || countyfp || tractce as census_tract
- from {{ ref('census_tracts') }}
-)
-, parcels as (
- select
- parcel_id
- , geom
- from {{ ref('parcels') }}
-)
-, residential_permits as (
- select
- residential_permit_id
- , year_
- , permit_value
- , num_units
- from {{ ref('residential_permits') }}
-)
-, residential_permits_to_parcels as (
- select
- residential_permit_id
- , parcel_id
- from {{ ref('residential_permits_to_parcels') }}
-)
-, residential_permits_to_census_tracts as (
- select
- residential_permit_id
- , census_tract_id
- from {{ ref('residential_permits_to_census_tracts') }}
-)
-, residential as (
- select
- census_tracts.census_tract
- , residential_permits.year_
- , residential_permits.num_units
- , st_area(parcels.geom) as parcel_sqm
- , residential_permits.permit_value
- from
- residential_permits
- inner join residential_permits_to_parcels using (residential_permit_id)
- inner join parcels using (parcel_id)
- inner join residential_permits_to_census_tracts using (residential_permit_id)
- inner join census_tracts using (census_tract_id)
- where year_ <= 2020
-)
-, agg_residential as (
- select
- census_tract
- , year_
- , sum(num_units) as num_units
- from residential
- group by census_tract, year_
-)
-
-select
- census_tract
- , year_
- , num_units -- do we really want the total _applied_ units, or should we be
- -- looking at the total unit estimates from ACS?
-from
- agg_residential
diff --git a/dbt/models/parcels_distance_to_transit.sql b/dbt/models/parcels_distance_to_transit.sql
index c7881209..9edd1ff1 100644
--- a/dbt/models/parcels_distance_to_transit.sql
+++ b/dbt/models/parcels_distance_to_transit.sql
@@ -1,3 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id'], 'unique': true},
+ ]
+ )
+}}
+
with
parcels as (
select
diff --git a/dbt/models/parcels_to_census_tracts.sql b/dbt/models/parcels_to_census_tracts.sql
index d3c2d6e0..4b742396 100644
--- a/dbt/models/parcels_to_census_tracts.sql
+++ b/dbt/models/parcels_to_census_tracts.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id'], 'unique': true},
+ {'columns': ['census_tract_id']}
+ ]
+ )
+}}
+
with
parcels as (
select
diff --git a/dbt/models/property_values_by_census_tracts.sql b/dbt/models/property_values_by_census_tracts.sql
deleted file mode 100644
index 51efa233..00000000
--- a/dbt/models/property_values_by_census_tracts.sql
+++ /dev/null
@@ -1,22 +0,0 @@
--- Median and total parcel property values aggregated by census tract.
-
-with parcels as (
- select
- parcel_id
- , emv_total
- from {{ ref('parcels_base') }}
-)
-, parcels_to_census_tracts as (
- select
- parcel_id
- , census_tract_id
- from {{ ref('parcels_to_census_tracts') }}
-)
-select
- parcels_to_census_tracts.census_tract_id
- , sum(parcels.emv_total) as total_value
- , percentile_cont(0.5) within group (order by parcels.emv_total) as median_value
-from
- parcels_to_census_tracts
- inner join parcels using (parcel_id)
-group by parcels_to_census_tracts.census_tract_id
From 13952dca56ae8967daae982c334e10e06eca4c0f Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 16:00:45 -0400
Subject: [PATCH 076/142] cleanup
---
dbt/models/acs_block_group.sql | 25 +------
dbt/models/acs_block_group_clean.sql | 14 +---
dbt/models/acs_tract.sql | 22 +-----
dbt/models/acs_tract_wide.sql | 25 ++-----
dbt/models/census_block_groups.sql | 24 ++-----
dbt/models/census_tracts.sql | 9 +--
.../census_tracts_distance_to_transit.sql | 14 +---
dbt/models/census_tracts_housing_units.sql | 16 +----
dbt/models/census_tracts_in_city_boundary.sql | 9 +--
dbt/models/census_tracts_parcel_area.sql | 14 +---
dbt/models/census_tracts_property_values.sql | 14 +---
dbt/models/census_tracts_wide.sql | 68 +++++++++----------
dbt/models/fair_market_rents.sql | 20 +-----
dbt/models/high_frequency_transit_lines.sql | 14 +---
dbt/models/parcels_base.sql | 13 +---
dbt/models/parcels_to_census_tracts.sql | 18 +----
dbt/models/segregation_indexes.sql | 20 +-----
dbt/models/usps_migration.sql | 12 +---
18 files changed, 74 insertions(+), 277 deletions(-)
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index 6165c8ac..37d6f96e 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -8,29 +8,8 @@
}}
with
-census_block_groups as (
- select
- census_block_group_id
- , statefp
- , countyfp
- , tractce
- , blkgrpce
- , valid
- from
- {{ ref('census_block_groups') }}
-)
-, acs_bg as (
- select
- statefp
- , countyfp
- , tractce
- , blkgrpce
- , year_
- , name_
- , value_
- from
- {{ ref('acs_block_group_clean') }}
-)
+census_block_groups as (select * from {{ ref('census_block_groups') }})
+, acs_bg as (select * from {{ ref('acs_block_group_clean') }})
select
census_block_groups.census_block_group_id
, acs_bg.year_
diff --git a/dbt/models/acs_block_group_clean.sql b/dbt/models/acs_block_group_clean.sql
index d629d782..22cf94e4 100644
--- a/dbt/models/acs_block_group_clean.sql
+++ b/dbt/models/acs_block_group_clean.sql
@@ -1,15 +1,3 @@
-with
-acs_bg_raw as (
- select
- statefp
- , countyfp
- , tractce
- , blkgrpce
- , year
- , code
- , value
- from {{ source('minneapolis', 'acs_bg_raw') }}
-)
select
statefp
, countyfp
@@ -19,4 +7,4 @@ select
, code as name_
, case when "value" < 0 then null else "value" end as value_
from
- acs_bg_raw
+ {{ source('minneapolis', 'acs_bg_raw') }}
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index 7482f43a..3909b2dc 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -8,26 +8,8 @@
}}
with
-census_tracts as (
- select
- census_tract_id
- , statefp
- , countyfp
- , tractce
- , valid
-
- from {{ ref("census_tracts") }}
-)
-, acs_tract as (
- select
- statefp
- , countyfp
- , tractce
- , year_
- , name_
- , value_
- from {{ ref('acs_tract_clean') }}
-)
+census_tracts as (select * from {{ ref("census_tracts") }})
+, acs_tract as (select * from {{ ref('acs_tract_clean') }})
select
census_tract_id
, acs_tract.year_
diff --git a/dbt/models/acs_tract_wide.sql b/dbt/models/acs_tract_wide.sql
index 434d2d9e..0d142795 100644
--- a/dbt/models/acs_tract_wide.sql
+++ b/dbt/models/acs_tract_wide.sql
@@ -9,21 +9,12 @@
{% set years = range(2013, 2023) %}
-with acs_tract as (
- select
- census_tract_id
- , year_
- , name_
- , value_
- from {{ ref('acs_tract') }}
-)
-
+with
+acs_tract as (select * from {{ ref('acs_tract') }})
+, acs_variables as (select * from {{ ref("acs_variables") }})
, census_tracts_in_city_boundary as (
- select
- census_tract_id
- from {{ ref("census_tracts_in_city_boundary") }}
+ select * from {{ ref("census_tracts_in_city_boundary") }}
)
-
, census_tracts as (
select
census_tract_id
@@ -31,14 +22,6 @@ with acs_tract as (
from {{ ref("census_tracts") }}
where census_tract_id in (select census_tract_id from census_tracts_in_city_boundary)
)
-
-, acs_variables as (
- select
- "variable"
- , description
- from {{ ref("acs_variables") }}
-)
-
, acs_tract_extended as (
select
acs_tract.census_tract_id
diff --git a/dbt/models/census_block_groups.sql b/dbt/models/census_block_groups.sql
index 15783fa6..b33a6aea 100644
--- a/dbt/models/census_block_groups.sql
+++ b/dbt/models/census_block_groups.sql
@@ -10,15 +10,7 @@
}}
with
-census_tracts as (
- select
- census_tract_id
- , statefp
- , countyfp
- , tractce
- , valid
- from {{ ref("census_tracts") }}
-),
+census_tracts as (select * from {{ ref("census_tracts") }}),
census_block_groups as (
{% for year_ in var('census_years') %}
select
@@ -55,19 +47,11 @@ census_block_groups_with_tracts as (
, (census_block_groups.valid * census_tracts.valid) as valid
, census_block_groups.geom
from census_block_groups
- inner join census_tracts using (statefp , countyfp , tractce)
+ inner join census_tracts using (statefp, countyfp, tractce)
where
census_tracts.valid && census_block_groups.valid
)
select
- {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_block_group_id
- , statefp
- , countyfp
- , tractce
- , blkgrpce
- , geoidfq
- , census_tract_id
- , valid
- , year_
- , geom
+ {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_block_group_id,
+ *
from census_block_groups_with_tracts
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 31f09322..c6a67620 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -32,13 +32,6 @@ from
{% endfor %}
)
select
- {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_tract_id
- , statefp
- , countyfp
- , tractce
- , geoidfq
- , valid
- , year_
- , geom
+ {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_tract_id, *
from
census_tracts
diff --git a/dbt/models/census_tracts_distance_to_transit.sql b/dbt/models/census_tracts_distance_to_transit.sql
index 8073ce5b..2c905572 100644
--- a/dbt/models/census_tracts_distance_to_transit.sql
+++ b/dbt/models/census_tracts_distance_to_transit.sql
@@ -9,21 +9,13 @@
with
parcels_distance_to_transit as (
- select
- parcel_id
- , distance
- from {{ ref('parcels_distance_to_transit') }}
+ select * from {{ ref('parcels_distance_to_transit') }}
)
, census_tracts as (
- select
- census_tract_id
- from {{ ref('census_tracts') }}
+ select * from {{ ref('census_tracts') }}
)
, parcels_to_census_tracts as (
- select
- parcel_id
- , census_tract_id
- from {{ ref('parcels_to_census_tracts') }}
+ select * from {{ ref('parcels_to_census_tracts') }}
)
select
census_tracts.census_tract_id
diff --git a/dbt/models/census_tracts_housing_units.sql b/dbt/models/census_tracts_housing_units.sql
index fe712f95..c60779e2 100644
--- a/dbt/models/census_tracts_housing_units.sql
+++ b/dbt/models/census_tracts_housing_units.sql
@@ -8,23 +8,13 @@
}}
with census_tracts as (
- select
- census_tract_id
- from {{ ref('census_tracts') }}
+ select * from {{ ref('census_tracts') }}
)
, residential_permits as (
- select
- residential_permit_id
- , year_
- , permit_value
- , num_units
- from {{ ref('residential_permits') }}
+ select * from {{ ref('residential_permits') }}
)
, residential_permits_to_census_tracts as (
- select
- residential_permit_id
- , census_tract_id
- from {{ ref('residential_permits_to_census_tracts') }}
+ select * from {{ ref('residential_permits_to_census_tracts') }}
)
select
census_tracts.census_tract_id
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index 51e1d4e2..be4771e3 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -1,13 +1,8 @@
with census_tracts as (
- select
- census_tract_id
- , geom
- from {{ ref('census_tracts') }}
+ select * from {{ ref('census_tracts') }}
)
, city_boundary as (
- select
- geom
- from {{ ref('city_boundary') }}
+ select * from {{ ref('city_boundary') }}
)
select
census_tracts.census_tract_id
diff --git a/dbt/models/census_tracts_parcel_area.sql b/dbt/models/census_tracts_parcel_area.sql
index 24d21cff..4751c4ea 100644
--- a/dbt/models/census_tracts_parcel_area.sql
+++ b/dbt/models/census_tracts_parcel_area.sql
@@ -8,21 +8,13 @@
}}
with census_tracts as (
- select
- census_tract_id
- from {{ ref('census_tracts') }}
+ select * from {{ ref('census_tracts') }}
)
, parcels as (
- select
- parcel_id
- , geom
- from {{ ref('parcels_base') }}
+ select * from {{ ref('parcels_base') }}
)
, parcels_to_census_tracts as (
- select
- parcel_id
- , census_tract_id
- from {{ ref('parcels_to_census_tracts') }}
+ select * from {{ ref('parcels_to_census_tracts') }}
)
select
census_tract_id
diff --git a/dbt/models/census_tracts_property_values.sql b/dbt/models/census_tracts_property_values.sql
index e2a4531b..0b140a92 100644
--- a/dbt/models/census_tracts_property_values.sql
+++ b/dbt/models/census_tracts_property_values.sql
@@ -10,21 +10,13 @@
-- Median and total parcel property values aggregated by census tract.
with parcels as (
- select
- parcel_id
- , emv_total
- from {{ ref('parcels_base') }}
+ select * from {{ ref('parcels_base') }}
)
, census_tracts as (
- select
- census_tract_id
- from {{ ref('census_tracts') }}
+ select * from {{ ref('census_tracts') }}
)
, parcels_to_census_tracts as (
- select
- parcel_id
- , census_tract_id
- from {{ ref('parcels_to_census_tracts') }}
+ select * from {{ ref('parcels_to_census_tracts') }}
)
select
census_tracts.census_tract_id
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index adab9e44..49433109 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -1,57 +1,49 @@
with
-census_tracts as (
+census_tracts_in_city_boundary as (
+ select
+ census_tract_id
+ from {{ ref('census_tracts_in_city_boundary') }}
+)
+, census_tracts as (
select
census_tract_id
, statefp || countyfp || tractce as census_tract
, year_
from {{ ref('census_tracts') }}
+ where
+ year_ <= 2020
+ and census_tract_id in (select * from census_tracts_in_city_boundary)
)
-, census_tracts_housing_units as (
- select
- census_tract_id
- , num_units
- from {{ ref('census_tracts_housing_units') }}
+, housing_units as (
+ select * from {{ ref('census_tracts_housing_units') }}
)
-, census_tracts_property_values as (
- select
- census_tract_id
- , median_value
- , total_value
- from {{ ref('census_tracts_property_values') }}
+, property_values as (
+ select * from {{ ref('census_tracts_property_values') }}
)
-, census_tracts_distance_to_transit as (
- select
- census_tract_id
- , median_distance_to_transit
- , mean_distance_to_transit
- from {{ ref('census_tracts_distance_to_transit') }}
+, distance_to_transit as (
+ select * from {{ ref('census_tracts_distance_to_transit') }}
)
-, census_tracts_parcel_area as (
- select
- census_tract_id
- , parcel_sqm
- from {{ ref('census_tracts_parcel_area') }}
+, parcel_area as (
+ select * from {{ ref('census_tracts_parcel_area') }}
)
, raw_data as (
select
census_tracts.census_tract
, census_tracts.year_
- , coalesce(census_tracts_housing_units.num_units, 0) as num_units
- , census_tracts_property_values.total_value
- , census_tracts_property_values.median_value
- , census_tracts_distance_to_transit.median_distance_to_transit
- , census_tracts_distance_to_transit.mean_distance_to_transit
- , census_tracts_parcel_area.parcel_sqm
+ , coalesce(housing_units.num_units, 0) as num_units
+ , property_values.total_value
+ , property_values.median_value
+ , distance_to_transit.median_distance_to_transit
+ , distance_to_transit.mean_distance_to_transit
+ , parcel_area.parcel_sqm
from
- census_tracts_housing_units
- inner join census_tracts_property_values using(census_tract_id)
- inner join census_tracts_distance_to_transit using (census_tract_id)
- inner join census_tracts_parcel_area using (census_tract_id)
- inner join census_tracts using (census_tract_id)
-where
- census_tracts.year_ <= 2020
- and census_tracts.census_tract_id in (select census_tract_id from {{ ref('census_tracts_in_city_boundary') }})
+ census_tracts
+ inner join housing_units using (census_tract_id)
+ inner join property_values using (census_tract_id)
+ inner join distance_to_transit using (census_tract_id)
+ inner join parcel_area using (census_tract_id)
)
+, with_std as (
select
census_tract
, year_
@@ -64,3 +56,5 @@ select
, {{ standardize(['num_units', 'total_value', 'median_value', 'median_distance_to_transit', 'mean_distance_to_transit', 'parcel_sqm']) }}
from
raw_data
+)
+select * from with_std
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index 9927b36f..a9a9cdbc 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -1,24 +1,8 @@
{% set num_bedrooms = range(0, 5) %}
with
-zip_codes as (
- select
- zip_code_id
- , zip_code
- , valid
- from {{ ref('zip_codes') }}
-)
-, fair_market_rents as (
- select
- zip_code
- , rent_br0
- , rent_br1
- , rent_br2
- , rent_br3
- , rent_br4
- , year_
- from {{ ref('fair_market_rents_union') }}
-)
+zip_codes as (select * from {{ ref('zip_codes') }})
+, fair_market_rents as (select * from {{ ref('fair_market_rents_union') }})
, fmr_zip as (
select
zip_codes.zip_code_id
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
index 42f13975..34d1238a 100644
--- a/dbt/models/high_frequency_transit_lines.sql
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -8,18 +8,8 @@
)
}}
-with lines as (
- select
- valid
- , geom
- from {{ ref('high_frequency_transit_lines_union') }}
-)
-, stops as (
- select
- valid
- , geom
- from {{ ref('high_frequency_transit_stops') }}
-)
+with lines as (select * from {{ ref('high_frequency_transit_lines_union') }})
+, stops as (select * from {{ ref('high_frequency_transit_stops') }})
, lines_and_stops as (
select
lines.valid * stops.valid as valid
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 6fb778f1..29055aa4 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -24,21 +24,12 @@ with parcels as (
nullif(year_built, 0) as year_built,
sale_date,
nullif(sale_value, 0) as sale_value,
- geom
+ st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'parcels_shp_plan_regonal_' ~ year_ ~ '_parcels' ~ year_ ~ 'hennepin') }}
where upper({{ "city" if year_ < 2018 else "ctu_name" }}) = '{{ city }}'
{% if not loop.last %}union all{% endif %}
{% endfor %}
)
select
- {{ dbt_utils.generate_surrogate_key(['ogc_fid', 'valid']) }} as parcel_id
- , pin
- , valid
- , emv_land
- , emv_bldg
- , emv_total
- , year_built
- , sale_date
- , sale_value
- , st_transform(geom, {{ var("srid") }}) as geom
+ {{ dbt_utils.generate_surrogate_key(['ogc_fid', 'valid']) }} as parcel_id, *
from parcels
diff --git a/dbt/models/parcels_to_census_tracts.sql b/dbt/models/parcels_to_census_tracts.sql
index 4b742396..75f5f360 100644
--- a/dbt/models/parcels_to_census_tracts.sql
+++ b/dbt/models/parcels_to_census_tracts.sql
@@ -9,22 +9,10 @@
}}
with
-parcels as (
- select
- parcel_id
- from {{ ref("parcels_base") }}
-)
-, census_block_groups as (
- select
- census_block_group_id
- , census_tract_id
- from {{ ref("census_block_groups") }}
-)
+parcels as (select * from {{ ref("parcels_base") }})
+, census_block_groups as (select * from {{ ref("census_block_groups") }})
, parcels_to_census_block_groups as (
- select
- parcel_id
- , census_block_group_id
- from {{ ref("parcels_to_census_block_groups") }}
+ select * from {{ ref("parcels_to_census_block_groups") }}
)
select
parcels.parcel_id
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
index 8d50587f..206545e5 100644
--- a/dbt/models/segregation_indexes.sql
+++ b/dbt/models/segregation_indexes.sql
@@ -6,23 +6,9 @@
-- has many more white people than the average for the city will have a high
-- segregation index for the 'average_city' distribution.
with
- categories as (
- select category from {{ ref("population_categories") }}
- )
- , acs_tract as (
- select
- census_tract_id
- , year_
- , name_
- , value_
- from {{ ref("acs_tract") }}
- )
- , acs_variables as (
- select
- "variable" as name_
- , description
- from {{ ref("acs_variables") }}
- )
+ categories as (select * from {{ ref("population_categories") }})
+ , acs_tract as (select * from {{ ref("acs_tract") }})
+ , acs_variables as (select * from {{ ref("acs_variables") }})
, pop_tyc as
( -- Population by tract, year, and category
select acs_tract.census_tract_id, acs_tract.year_, categories.category, acs_tract.value_
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 7550f0d0..2cafef6c 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -10,18 +10,12 @@
{% set usps_migration_flow_types = ['business', 'family', 'individual', 'perm', 'temp'] %}
{% set usps_migration_flow_directions = ['from', 'to'] %}
-with process_date as (
+with
+zip_codes as (select * from {{ ref('zip_codes') }})
+, process_date as (
select to_date(yyyy_mm, 'YYYYMM') as date_, *
from {{ ref('usps_migration_union') }}
)
-, zip_codes as (
- select
- zip_code_id
- , zip_code
- , valid
- from
- {{ ref('zip_codes') }}
-)
, add_zip_id as (
select zip_code_id, process_date.*
from
From 92f6285d8070c05d062ca57f5c51770978f3311d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 17:28:11 -0400
Subject: [PATCH 077/142] finish adding columns to census_tracts_wide
---
.../census_tracts_distance_to_transit.sql | 10 ++--
dbt/models/census_tracts_parcel_area.sql | 15 ++----
dbt/models/census_tracts_parking_limits.sql | 50 +++++++++++++++++++
dbt/models/census_tracts_property_values.sql | 15 ++----
dbt/models/census_tracts_wide.sql | 33 ++++++------
dbt/models/commercial_permits_to_parcels.sql | 2 +-
dbt/models/parcels.sql | 40 ++++++++-------
dbt/models/parcels_distance_to_transit.sql | 15 +-----
dbt/models/parcels_to_census_tracts.sql | 23 ---------
dbt/models/residential_permits_to_parcels.sql | 2 +-
dbt/models/segregation_indexes.sql | 7 ++-
11 files changed, 108 insertions(+), 104 deletions(-)
create mode 100644 dbt/models/census_tracts_parking_limits.sql
delete mode 100644 dbt/models/parcels_to_census_tracts.sql
diff --git a/dbt/models/census_tracts_distance_to_transit.sql b/dbt/models/census_tracts_distance_to_transit.sql
index 2c905572..edf28211 100644
--- a/dbt/models/census_tracts_distance_to_transit.sql
+++ b/dbt/models/census_tracts_distance_to_transit.sql
@@ -11,18 +11,14 @@ with
parcels_distance_to_transit as (
select * from {{ ref('parcels_distance_to_transit') }}
)
- , census_tracts as (
- select * from {{ ref('census_tracts') }}
- )
- , parcels_to_census_tracts as (
- select * from {{ ref('parcels_to_census_tracts') }}
- )
+ , census_tracts as (select * from {{ ref('census_tracts') }})
+ , parcels as (select * from {{ ref('parcels') }})
select
census_tracts.census_tract_id
, avg(parcels_distance_to_transit.distance) as mean_distance_to_transit
, {{ median('parcels_distance_to_transit.distance') }} as median_distance_to_transit
from
census_tracts
- left join parcels_to_census_tracts using (census_tract_id)
+ left join parcels using (census_tract_id)
left join parcels_distance_to_transit using (parcel_id)
group by 1
diff --git a/dbt/models/census_tracts_parcel_area.sql b/dbt/models/census_tracts_parcel_area.sql
index 4751c4ea..687d2274 100644
--- a/dbt/models/census_tracts_parcel_area.sql
+++ b/dbt/models/census_tracts_parcel_area.sql
@@ -7,20 +7,13 @@
)
}}
-with census_tracts as (
- select * from {{ ref('census_tracts') }}
-)
-, parcels as (
- select * from {{ ref('parcels_base') }}
-)
-, parcels_to_census_tracts as (
- select * from {{ ref('parcels_to_census_tracts') }}
-)
+with
+census_tracts as (select * from {{ ref('census_tracts') }}),
+parcels as (select * from {{ ref('parcels') }})
select
census_tract_id
, sum(st_area(parcels.geom)) as parcel_sqm
from
census_tracts
- left join parcels_to_census_tracts using (census_tract_id)
- left join parcels using (parcel_id)
+ left join parcels using (census_tract_id)
group by 1
diff --git a/dbt/models/census_tracts_parking_limits.sql b/dbt/models/census_tracts_parking_limits.sql
new file mode 100644
index 00000000..b40f89ea
--- /dev/null
+++ b/dbt/models/census_tracts_parking_limits.sql
@@ -0,0 +1,50 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract_id'], 'unique': true},
+ ]
+ )
+}}
+
+with
+parcels as (select * from {{ ref('parcels') }}),
+transit as (select * from {{ ref('high_frequency_transit_lines') }}),
+with_parking_limit as (
+ select
+ parcel_id,
+ census_tract_id,
+ case
+ when parcels.valid << '[2015-01-01,)'::daterange then 'full'
+ else
+ case
+ when st_intersects(parcels.geom, transit.blue_zone_geom) then 'eliminated'
+ when st_intersects(parcels.geom, transit.yellow_zone_geom) then 'reduced'
+ else 'full'
+ end
+ end as limit_
+ from
+ parcels
+ left join transit
+ on parcels.valid && transit.valid
+),
+with_limit_numeric as (
+ select
+ parcel_id,
+ census_tract_id,
+ limit_,
+ case limit_
+ when 'full' then 1
+ when 'reduced' then 0.5
+ when 'eliminated' then 0
+ end as limit_numeric
+ from with_parking_limit
+),
+by_census_tract as (
+ select
+ census_tract_id,
+ avg(limit_numeric) as mean_limit
+ from with_limit_numeric
+ group by census_tract_id
+)
+select * from by_census_tract
diff --git a/dbt/models/census_tracts_property_values.sql b/dbt/models/census_tracts_property_values.sql
index 0b140a92..7bb18e72 100644
--- a/dbt/models/census_tracts_property_values.sql
+++ b/dbt/models/census_tracts_property_values.sql
@@ -9,21 +9,14 @@
-- Median and total parcel property values aggregated by census tract.
-with parcels as (
- select * from {{ ref('parcels_base') }}
-)
-, census_tracts as (
- select * from {{ ref('census_tracts') }}
-)
-, parcels_to_census_tracts as (
- select * from {{ ref('parcels_to_census_tracts') }}
-)
+with
+parcels as (select * from {{ ref('parcels') }})
+, census_tracts as (select * from {{ ref('census_tracts') }})
select
census_tracts.census_tract_id
, sum(parcels.emv_total) as total_value
, {{ median('parcels.emv_total') }} as median_value
from
census_tracts
- left join parcels_to_census_tracts using (census_tract_id)
- left join parcels using (parcel_id)
+ left join parcels using (census_tract_id)
group by 1
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index 49433109..d080f37e 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -1,9 +1,16 @@
+{{
+ config(
+ materialized='table',
+ )
+}}
+
with
-census_tracts_in_city_boundary as (
- select
- census_tract_id
- from {{ ref('census_tracts_in_city_boundary') }}
-)
+in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
+, housing_units as (select * from {{ ref('census_tracts_housing_units') }})
+, property_values as (select * from {{ ref('census_tracts_property_values') }})
+, distance_to_transit as (select * from {{ ref('census_tracts_distance_to_transit') }})
+, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
+, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
, census_tracts as (
select
census_tract_id
@@ -12,19 +19,7 @@ census_tracts_in_city_boundary as (
from {{ ref('census_tracts') }}
where
year_ <= 2020
- and census_tract_id in (select * from census_tracts_in_city_boundary)
-)
-, housing_units as (
- select * from {{ ref('census_tracts_housing_units') }}
-)
-, property_values as (
- select * from {{ ref('census_tracts_property_values') }}
-)
-, distance_to_transit as (
- select * from {{ ref('census_tracts_distance_to_transit') }}
-)
-, parcel_area as (
- select * from {{ ref('census_tracts_parcel_area') }}
+ and census_tract_id in (select census_tract_id from in_city_boundary)
)
, raw_data as (
select
@@ -36,12 +31,14 @@ select
, distance_to_transit.median_distance_to_transit
, distance_to_transit.mean_distance_to_transit
, parcel_area.parcel_sqm
+ , parking_limits.mean_limit
from
census_tracts
inner join housing_units using (census_tract_id)
inner join property_values using (census_tract_id)
inner join distance_to_transit using (census_tract_id)
inner join parcel_area using (census_tract_id)
+ inner join parking_limits using (census_tract_id)
)
, with_std as (
select
diff --git a/dbt/models/commercial_permits_to_parcels.sql b/dbt/models/commercial_permits_to_parcels.sql
index de1df444..b74a47f4 100644
--- a/dbt/models/commercial_permits_to_parcels.sql
+++ b/dbt/models/commercial_permits_to_parcels.sql
@@ -21,7 +21,7 @@ commercial_permits as (
parcel_id as id
, valid
, geom
- from {{ ref("parcels_base") }}
+ from {{ ref("parcels") }}
)
select
child_id as commercial_permit_id
diff --git a/dbt/models/parcels.sql b/dbt/models/parcels.sql
index f3482927..12a48c54 100644
--- a/dbt/models/parcels.sql
+++ b/dbt/models/parcels.sql
@@ -1,21 +1,25 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id'], 'unique': true},
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
with
-parcels_to_zip_codes as (
- select
- parcel_id
- , zip_code_id
- from {{ref('parcels_to_zip_codes')}}
-),
-parcels_to_census_block_groups as (
- select
- parcel_id
- , census_block_group_id
- from {{ref('parcels_to_census_block_groups')}}
-)
+parcels as (select * from {{ ref('parcels_base') }}),
+to_zip_codes as (select * from {{ref('parcels_to_zip_codes')}}),
+to_census_bgs as (select * from {{ref('parcels_to_census_block_groups')}}),
+census_bgs as (select * from {{ref('census_block_groups')}})
select
- {{ dbt_utils.star(ref('parcels_base')) }}
- , zip_code_id
- , census_block_group_id
+ parcels.*
+ , to_zip_codes.zip_code_id
+ , to_census_bgs.census_block_group_id
+ , census_bgs.census_tract_id
from
- {{ ref('parcels_base') }}
- left join parcels_to_zip_codes using (parcel_id)
- left join parcels_to_census_block_groups using (parcel_id)
+ parcels
+ left join to_zip_codes using (parcel_id)
+ left join to_census_bgs using (parcel_id)
+ left join census_bgs using (census_block_group_id)
diff --git a/dbt/models/parcels_distance_to_transit.sql b/dbt/models/parcels_distance_to_transit.sql
index 9edd1ff1..77a5daf6 100644
--- a/dbt/models/parcels_distance_to_transit.sql
+++ b/dbt/models/parcels_distance_to_transit.sql
@@ -8,19 +8,8 @@
}}
with
- parcels as (
- select
- parcel_id
- , valid
- , geom
- from {{ ref('parcels_base') }}
- )
- , high_frequency_transit_lines as (
- select
- valid
- , geom
- from {{ ref('high_frequency_transit_lines') }}
- )
+ parcels as (select * from {{ ref('parcels') }})
+ , high_freq_transit as (select * from {{ ref('high_frequency_transit_lines') }})
select
parcels.parcel_id
, st_distance(parcels.geom, high_frequency_transit_lines.geom) as distance
diff --git a/dbt/models/parcels_to_census_tracts.sql b/dbt/models/parcels_to_census_tracts.sql
deleted file mode 100644
index 75f5f360..00000000
--- a/dbt/models/parcels_to_census_tracts.sql
+++ /dev/null
@@ -1,23 +0,0 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['parcel_id'], 'unique': true},
- {'columns': ['census_tract_id']}
- ]
- )
-}}
-
-with
-parcels as (select * from {{ ref("parcels_base") }})
-, census_block_groups as (select * from {{ ref("census_block_groups") }})
-, parcels_to_census_block_groups as (
- select * from {{ ref("parcels_to_census_block_groups") }}
-)
-select
- parcels.parcel_id
- , census_block_groups.census_tract_id
-from
- parcels
- left join parcels_to_census_block_groups using (parcel_id)
- left join census_block_groups using (census_block_group_id)
diff --git a/dbt/models/residential_permits_to_parcels.sql b/dbt/models/residential_permits_to_parcels.sql
index 7f9ea59c..daedfab1 100644
--- a/dbt/models/residential_permits_to_parcels.sql
+++ b/dbt/models/residential_permits_to_parcels.sql
@@ -21,7 +21,7 @@ residential_permits as (
parcel_id as id
, valid
, geom
- from {{ ref("parcels_base") }}
+ from {{ ref("parcels") }}
)
select
child_id as residential_permit_id
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
index 206545e5..ea47ba0f 100644
--- a/dbt/models/segregation_indexes.sql
+++ b/dbt/models/segregation_indexes.sql
@@ -8,7 +8,12 @@
with
categories as (select * from {{ ref("population_categories") }})
, acs_tract as (select * from {{ ref("acs_tract") }})
- , acs_variables as (select * from {{ ref("acs_variables") }})
+ , acs_variables as (
+ select
+ variable as name_,
+ description
+ from {{ ref("acs_variables") }}
+ )
, pop_tyc as
( -- Population by tract, year, and category
select acs_tract.census_tract_id, acs_tract.year_, categories.category, acs_tract.value_
From 2526c3188369b4323df2167ace9148855b9953bb Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 21 Aug 2024 17:46:31 -0400
Subject: [PATCH 078/142] remove unused line in clean.sh
---
scripts/clean.sh | 2 --
1 file changed, 2 deletions(-)
diff --git a/scripts/clean.sh b/scripts/clean.sh
index 2bd06083..bf0eccb4 100755
--- a/scripts/clean.sh
+++ b/scripts/clean.sh
@@ -8,5 +8,3 @@ autoflake --remove-all-unused-imports --in-place --recursive ./cities ./tests
nbqa black docs/guides/
nbqa autoflake --remove-all-unused-imports --recursive --in-place docs/guides/
nbqa isort -in-place docs/guides/
-
-pg_format -c .pg_format -i etl/*.sql
From 8da0487a379a964110206031ee72192a3292b11b Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 22 Aug 2024 14:14:40 -0400
Subject: [PATCH 079/142] switch api to use public schema
---
api/schema.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/api/schema.sql b/api/schema.sql
index 8578cdbf..7c639ef9 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -3,7 +3,7 @@ drop schema if exists api cascade;
create schema api;
create view api.acs_tract_wide as (
- select * from dbt.acs_tract_wide
+ select * from acs_tract_wide
order by random()
);
From 9319009a15f4d1befb00085a726d1a8a7089efff Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 22 Aug 2024 17:41:24 -0400
Subject: [PATCH 080/142] distance to transit should include lines and stops
---
dbt/models/high_frequency_transit_stops.sql | 5 +++--
dbt/models/parcels_distance_to_transit.sql | 17 +++++++++++++----
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/dbt/models/high_frequency_transit_stops.sql b/dbt/models/high_frequency_transit_stops.sql
index 9d9a0459..38f40aa0 100644
--- a/dbt/models/high_frequency_transit_stops.sql
+++ b/dbt/models/high_frequency_transit_stops.sql
@@ -1,9 +1,10 @@
with stops_2015 as (
select
- st_union(st_transform(geom, {{ var("srid") }}))::geometry(multipoint, {{ var("srid") }}) as geom
+ st_union(st_transform(geom, {{ var("srid") }})) as geom
from {{ source('minneapolis', 'high_frequency_transit_2015_freq_rail_stops') }}
)
select
- '[,]'::daterange as valid
+ 0 as high_frequency_transit_stop_id
+ , '[,]'::daterange as valid
, geom
from stops_2015
diff --git a/dbt/models/parcels_distance_to_transit.sql b/dbt/models/parcels_distance_to_transit.sql
index 77a5daf6..6580ca58 100644
--- a/dbt/models/parcels_distance_to_transit.sql
+++ b/dbt/models/parcels_distance_to_transit.sql
@@ -7,13 +7,22 @@
)
}}
+-- This model calculates the distance from each parcel to the nearest high
+-- frequency transit line or stop
with
parcels as (select * from {{ ref('parcels') }})
- , high_freq_transit as (select * from {{ ref('high_frequency_transit_lines') }})
+ , lines as (select * from {{ ref('high_frequency_transit_lines') }})
+ , stops as (select * from {{ ref('high_frequency_transit_stops') }})
+ , lines_and_stops as materialized (
+ select
+ lines.valid * stops.valid as valid
+ , st_union(lines.geom, stops.geom) as geom
+ from
+ lines inner join stops on lines.valid && stops.valid
+)
select
parcels.parcel_id
- , st_distance(parcels.geom, high_frequency_transit_lines.geom) as distance
+ , st_distance(parcels.geom, lines_and_stops.geom) as distance
from
parcels
- inner join high_frequency_transit_lines
- on parcels.valid && high_frequency_transit_lines.valid
+ inner join lines_and_stops on parcels.valid && lines_and_stops.valid
From d35316799103ac627601341f5d8d497e5184be57 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 22 Aug 2024 17:41:47 -0400
Subject: [PATCH 081/142] correct year range for parcels
---
dbt/models/parcels_base.sql | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 29055aa4..4e7a6fbb 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -12,24 +12,35 @@
{% set city = 'MINNEAPOLIS' %}
{% set county_id = '053' %}
-with parcels as (
+with
+-- This is a union of all the parcels from the years 2002 to 2023
+parcels_union as (
{% for year_ in years %}
select
ogc_fid,
replace(pin, '{{ county_id }}-', '') as pin,
- '[{{ year_ - 1 }}-01-01,{{ year_ }}-01-01)'::daterange as valid,
+
+ -- parcels are a year-end snapshot, named after the year they cover
+ '[{{ year_ }}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid,
nullif(emv_land, 0) as emv_land,
nullif(emv_bldg, 0) as emv_bldg,
nullif(emv_total, 0) as emv_total,
nullif(year_built, 0) as year_built,
- sale_date,
+ nullif(sale_date, '1899-12-30'::date),
nullif(sale_value, 0) as sale_value,
st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'parcels_shp_plan_regonal_' ~ year_ ~ '_parcels' ~ year_ ~ 'hennepin') }}
where upper({{ "city" if year_ < 2018 else "ctu_name" }}) = '{{ city }}'
{% if not loop.last %}union all{% endif %}
{% endfor %}
+),
+
+-- Some of the parcel datasets contain exact duplicates that we remove. Note
+-- that duplicate pin/year pairs may remain.
+parcels_distinct as (
+ select distinct on (pin, valid, emv_land, emv_bldg, emv_total, year_built, sale_date, sale_value, geom) *
+ from parcels_union
)
select
{{ dbt_utils.generate_surrogate_key(['ogc_fid', 'valid']) }} as parcel_id, *
-from parcels
+from parcels_distinct
From 24af31a06eb1f6b70acd87110266fcfc17513b67 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 22 Aug 2024 17:42:03 -0400
Subject: [PATCH 082/142] add census_tract field
---
dbt/models/census_tracts.sql | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index c6a67620..4ffe0004 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -8,7 +8,7 @@
)
}}
-with census_tracts as (
+with census_tracts_union as (
{% for year_ in var('census_years') %}
select
{% if year_ == 2010 %}
@@ -30,8 +30,12 @@ from
{{ source('minneapolis', 'census_cb_' ~ year_ ~ '_27_tract_500k') }}
{% if not loop.last %}union all{% endif %}
{% endfor %}
+),
+with_census_tract as (
+ select *, statefp || countyfp || tractce as census_tract
+ from census_tracts_union
)
select
{{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_tract_id, *
from
- census_tracts
+ with_census_tract
From 5a00c7ffcfe3de02e03c7fd893d966bb654ee0f4 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 22 Aug 2024 17:42:12 -0400
Subject: [PATCH 083/142] add acs fields to census_tract_wide
---
dbt/models/census_tracts_wide.sql | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index d080f37e..e078af39 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -12,15 +12,20 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
, census_tracts as (
- select
- census_tract_id
- , statefp || countyfp || tractce as census_tract
- , year_
+ select *
from {{ ref('census_tracts') }}
where
year_ <= 2020
and census_tract_id in (select census_tract_id from in_city_boundary)
)
+, white as (
+ select * from {{ ref('acs_tract') }}
+ where name_ = 'B02001_002E' -- white population
+)
+, income as (
+ select * from {{ ref('acs_tract') }}
+ where name_ = 'B19013_001E' -- median household income
+)
, raw_data as (
select
census_tracts.census_tract
@@ -32,6 +37,8 @@ select
, distance_to_transit.mean_distance_to_transit
, parcel_area.parcel_sqm
, parking_limits.mean_limit
+ , white.value_ as white
+ , income.value_ as income
from
census_tracts
inner join housing_units using (census_tract_id)
@@ -39,6 +46,8 @@ from
inner join distance_to_transit using (census_tract_id)
inner join parcel_area using (census_tract_id)
inner join parking_limits using (census_tract_id)
+ inner join white using (census_tract_id)
+ inner join income using (census_tract_id)
)
, with_std as (
select
@@ -50,7 +59,9 @@ select
, median_distance_to_transit
, mean_distance_to_transit
, parcel_sqm
- , {{ standardize(['num_units', 'total_value', 'median_value', 'median_distance_to_transit', 'mean_distance_to_transit', 'parcel_sqm']) }}
+ , {{ standardize(['num_units', 'total_value', 'median_value',
+ 'median_distance_to_transit', 'mean_distance_to_transit',
+ 'parcel_sqm', 'white', 'income' ]) }}
from
raw_data
)
From 3ab57e7cab165d249fb387e4ff0362297c6dc0dc Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 23 Aug 2024 15:26:17 -0400
Subject: [PATCH 084/142] readd field name
---
dbt/models/parcels_base.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 4e7a6fbb..4929f82b 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -26,7 +26,7 @@ parcels_union as (
nullif(emv_bldg, 0) as emv_bldg,
nullif(emv_total, 0) as emv_total,
nullif(year_built, 0) as year_built,
- nullif(sale_date, '1899-12-30'::date),
+ nullif(sale_date, '1899-12-30'::date) as sale_date,
nullif(sale_value, 0) as sale_value,
st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'parcels_shp_plan_regonal_' ~ year_ ~ '_parcels' ~ year_ ~ 'hennepin') }}
From 353bcfeec501c079715381338b446ed9a444192b Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 23 Aug 2024 15:27:19 -0400
Subject: [PATCH 085/142] add whiteness and income demographic variables to
census_tracts_wide
---
dbt/models/acs_tract.sql | 9 +++----
dbt/models/census_tracts.sql | 37 ++++++++++++++++++++++++----
dbt/models/census_tracts_wide.sql | 40 ++++++++++++++++++++++++++-----
dbt/models/schema.yml | 15 ------------
4 files changed, 70 insertions(+), 31 deletions(-)
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index 3909b2dc..96fc0a02 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -2,7 +2,7 @@
config(
materialized='table',
indexes = [
- {'columns': ['census_tract_id', 'year_', 'name_'], 'unique': true},
+ {'columns': ['census_tract', 'year_', 'name_'], 'unique': true},
]
)
}}
@@ -11,13 +11,10 @@ with
census_tracts as (select * from {{ ref("census_tracts") }})
, acs_tract as (select * from {{ ref('acs_tract_clean') }})
select
- census_tract_id
+ census_tract
, acs_tract.year_
, acs_tract.name_
, acs_tract.value_
from
acs_tract
- inner join census_tracts
- using (statefp, countyfp, tractce)
- where
- to_date(acs_tract.year_::text , 'YYYY') <@ census_tracts.valid
+ inner join census_tracts using (statefp, countyfp, tractce, year_)
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 4ffe0004..28ff79e6 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -16,14 +16,13 @@ select
, county as countyfp
, tract as tractce
, geo_id as geoidfq
- , '[,2013-01-01)'::daterange as valid
{% else %}
statefp
, countyfp
, tractce
, {{ 'geoidfq' if year_ >= 2023 else 'affgeoid' }} as geoidfq
- , '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
{% endif %}
+ , '[{{year_}}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid
, {{ year_ }} as year_
, st_transform(geom, {{ var("srid") }}) as geom
from
@@ -31,11 +30,41 @@ from
{% if not loop.last %}union all{% endif %}
{% endfor %}
),
+years_2011_2012 as (
+ select
+ statefp
+ , countyfp
+ , tractce
+ , geoidfq
+ , '[2011-01-01,2012-01-01)'::daterange as valid
+ , 2011 as year_
+ , geom
+ from census_tracts_union
+ where year_ = 2010
+ union all
+ select
+ statefp
+ , countyfp
+ , tractce
+ , geoidfq
+ , '[2012-01-01,2013-01-01)'::daterange as valid
+ , 2012 as year_
+ , geom
+ from census_tracts_union
+ where year_ = 2010
+),
+add_2011_2012 as (
+ select *
+ from census_tracts_union
+ union all
+ select *
+ from years_2011_2012
+),
with_census_tract as (
select *, statefp || countyfp || tractce as census_tract
- from census_tracts_union
+ from add_2011_2012
)
select
- {{ dbt_utils.generate_surrogate_key(['geoidfq', 'valid']) }} as census_tract_id, *
+ {{ dbt_utils.generate_surrogate_key(['geoidfq', 'year_']) }} as census_tract_id, *
from
with_census_tract
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index e078af39..677aff99 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -11,6 +11,7 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, distance_to_transit as (select * from {{ ref('census_tracts_distance_to_transit') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
+, acs_tract as (select * from {{ ref('acs_tract') }})
, census_tracts as (
select *
from {{ ref('census_tracts') }}
@@ -18,12 +19,37 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
year_ <= 2020
and census_tract_id in (select census_tract_id from in_city_boundary)
)
+
+-- Fill in data for 2011, 2012 using closest available year. Replace 2020 data
+-- with 2019 data to avoid pandemic effects.
+, acs_replace_years as (
+ select * from acs_tract where year_ != 2020
+ union all
+ select census_tract, 2020 as year_, name_, value_
+ from acs_tract where year_ = 2019
+ union all
+ -- select * from acs_tract
+ -- union all
+ select census_tract, 2011 as year_, name_, value_
+ from acs_tract where year_ = 2013
+ union all
+ select census_tract, 2012 as year_, name_, value_
+ from acs_tract where year_ = 2013
+)
, white as (
- select * from {{ ref('acs_tract') }}
- where name_ = 'B02001_002E' -- white population
+ select * from acs_replace_years
+ where name_ = 'B03002_003E' -- white non-hispanic population
+)
+, population as (
+ select * from acs_replace_years
+ where name_ = 'B01003_001E' -- total population
+)
+, white_frac as (
+ select white.census_tract, white.year_, {{ safe_divide('white.value_', 'population.value_') }} as value_
+ from white inner join population using (census_tract, year_)
)
, income as (
- select * from {{ ref('acs_tract') }}
+ select * from acs_replace_years
where name_ = 'B19013_001E' -- median household income
)
, raw_data as (
@@ -37,7 +63,7 @@ select
, distance_to_transit.mean_distance_to_transit
, parcel_area.parcel_sqm
, parking_limits.mean_limit
- , white.value_ as white
+ , white_frac.value_ as white
, income.value_ as income
from
census_tracts
@@ -46,8 +72,8 @@ from
inner join distance_to_transit using (census_tract_id)
inner join parcel_area using (census_tract_id)
inner join parking_limits using (census_tract_id)
- inner join white using (census_tract_id)
- inner join income using (census_tract_id)
+ left join white_frac using (census_tract, year_)
+ left join income using (census_tract, year_)
)
, with_std as (
select
@@ -59,6 +85,8 @@ select
, median_distance_to_transit
, mean_distance_to_transit
, parcel_sqm
+ , white
+ , income
, {{ standardize(['num_units', 'total_value', 'median_value',
'median_distance_to_transit', 'mean_distance_to_transit',
'parcel_sqm', 'white', 'income' ]) }}
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index f9b0ddab..b6d23411 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -108,20 +108,6 @@ models:
to: ref('census_tracts')
field: census_tract_id
- - name: acs_tract
- data_tests:
- - dbt_utils.unique_combination_of_columns:
- combination_of_columns:
- - census_tract_id
- - year_
- - name_
- columns:
- - name: census_tract_id
- data_tests:
- - relationships:
- to: ref('census_tracts')
- field: census_tract_id
-
- name: acs_block_group
data_tests:
- dbt_utils.unique_combination_of_columns:
@@ -164,7 +150,6 @@ models:
field: zip_code_id
- name: census_block_group_id
data_tests:
- - not_null
- relationships:
to: ref('census_block_groups')
field: census_block_group_id
From a5b311fae391d1168689104a01d457f71f0dec3f Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 23 Aug 2024 16:06:47 -0400
Subject: [PATCH 086/142] correctly handle the downtown parking limit
elimination
---
dbt/models/census_tracts_parking_limits.sql | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/dbt/models/census_tracts_parking_limits.sql b/dbt/models/census_tracts_parking_limits.sql
index b40f89ea..be6a25f1 100644
--- a/dbt/models/census_tracts_parking_limits.sql
+++ b/dbt/models/census_tracts_parking_limits.sql
@@ -10,11 +10,13 @@
with
parcels as (select * from {{ ref('parcels') }}),
transit as (select * from {{ ref('high_frequency_transit_lines') }}),
+downtown as (select * from {{ ref('downtown') }}),
with_parking_limit as (
select
parcel_id,
census_tract_id,
case
+ when st_intersects(parcels.geom, downtown.geom) then 'eliminated'
when parcels.valid << '[2015-01-01,)'::daterange then 'full'
else
case
@@ -24,7 +26,7 @@ with_parking_limit as (
end
end as limit_
from
- parcels
+ downtown, parcels
left join transit
on parcels.valid && transit.valid
),
From ea007ac8159bddc234474cd972f8ce67f33d841f Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 23 Aug 2024 16:06:59 -0400
Subject: [PATCH 087/142] add mean_limit to the wide table
---
dbt/models/census_tracts_wide.sql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index 677aff99..24333eb2 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -87,9 +87,10 @@ select
, parcel_sqm
, white
, income
+ , mean_limit
, {{ standardize(['num_units', 'total_value', 'median_value',
'median_distance_to_transit', 'mean_distance_to_transit',
- 'parcel_sqm', 'white', 'income' ]) }}
+ 'parcel_sqm', 'white', 'income', 'mean_limit' ]) }}
from
raw_data
)
From 95973f636bf92640ef1671b9ce3c6f59d1b1caff Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 26 Aug 2024 11:17:40 -0400
Subject: [PATCH 088/142] correctly load hispanic_or_latino acs data
---
dbt/models/schema.yml | 11 ++++++++++
dbt/seeds/acs_variables.csv | 2 +-
dbt/seeds/population_categories.csv | 2 +-
load_data_server/load_acs.py | 33 ++++++++++++++++-------------
4 files changed, 31 insertions(+), 17 deletions(-)
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index b6d23411..2b6698eb 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -295,3 +295,14 @@ models:
data_tests:
- not_null
- unique
+
+seeds:
+ - name: population_categories
+ columns:
+ - name: category
+ data_tests:
+ - unique
+ - not_null
+ - relationships:
+ to: ref('acs_variables')
+ field: description
diff --git a/dbt/seeds/acs_variables.csv b/dbt/seeds/acs_variables.csv
index 8cdeba7c..5520ef20 100644
--- a/dbt/seeds/acs_variables.csv
+++ b/dbt/seeds/acs_variables.csv
@@ -20,7 +20,7 @@ B02001_003E,black
B02001_004E,american_indian_or_alaska_native
B02001_005E,asian
B02001_006E,native_hawaiian_or_pacific_islander
-B03001_003E,population_hispanic_or_latino
+B03001_003E,hispanic_or_latino
B02001_007E,other_race
B02001_008E,multiple_races
B02001_009E,multiple_races_and_other_race
diff --git a/dbt/seeds/population_categories.csv b/dbt/seeds/population_categories.csv
index 79e93b14..501dbf73 100644
--- a/dbt/seeds/population_categories.csv
+++ b/dbt/seeds/population_categories.csv
@@ -1,7 +1,7 @@
category
population_white_non_hispanic
population_black_non_hispanic
-population_hispanic_or_latino
+hispanic_or_latino
population_asian_non_hispanic
population_native_hawaiian_or_pacific_islander_non_hispanic
population_american_indian_or_alaska_native_non_hispanic
diff --git a/load_data_server/load_acs.py b/load_data_server/load_acs.py
index 23ae704e..9ace57b6 100644
--- a/load_data_server/load_acs.py
+++ b/load_data_server/load_acs.py
@@ -49,7 +49,7 @@
"B02001_004E": "american_indian_or_alaska_native",
"B02001_005E": "asian",
"B02001_006E": "native_hawaiian_or_pacific_islander",
- "B03001_003E": "population_hispanic_or_latino",
+ "B03001_003E": "hispanic_or_latino",
"B02001_007E": "other_race",
"B02001_008E": "multiple_races",
"B02001_009E": "multiple_races_and_other_race",
@@ -78,14 +78,6 @@
"B02015_023E": "south_asian_bhutanese",
"B02015_024E": "south_asian_nepalese",
"B02015_025E": "south_asian_pakistani",
- "B02015_026E": "south_asian_sikh",
- "B02015_027E": "south_asian_sri_lankan",
- "B02015_028E": "south_asian_other",
- "B02015_029E": "central_asian_kazakh",
- "B02015_030E": "central_asian_uzbek",
- "B02015_031E": "central_asian_other",
- "B02015_032E": "other_asian_specified",
- "B02015_033E": "other_asian_not_specified",
"B19013_001E": "median_household_income",
"B19013A_001E": "median_household_income_white",
"B19013H_001E": "median_household_income_white_non_hispanic",
@@ -128,10 +120,10 @@
bucket = storage_client.bucket(BUCKET_NAME)
cur = conn.cursor()
- cur.execute(f"drop table if exists {SCHEMA}.acs_tract_raw")
cur.execute(
- f"create table {SCHEMA}.acs_tract_raw (statefp text, countyfp text, tractce text, year int, code text, value numeric)"
+ f"create table if not exists {SCHEMA}.acs_tract_raw (statefp text, countyfp text, tractce text, year int, code text, value numeric)"
)
+ cur.execute(f"truncate table {SCHEMA}.acs_tract_raw")
temp_table = f"{SCHEMA}.acs_tract_temp"
cur.execute(f"drop table if exists {temp_table}")
@@ -141,7 +133,12 @@
for code in tqdm(ACS_CODES.keys()):
desc = ACS_CODES[code]
- for blob in bucket.list_blobs(prefix=f"acs/tracts/{desc}/"):
+ blobs = list(bucket.list_blobs(prefix=f"acs/tracts/{desc}/"))
+ if len(blobs) == 0:
+ logging.info(f"No blobs found for {desc}")
+ continue
+
+ for blob in blobs:
year = blob.name.split("/")[-1].split(".")[0]
cur.execute(f"truncate {temp_table}")
with tempfile.NamedTemporaryFile() as temp:
@@ -155,10 +152,10 @@
cur.execute(f"drop table {temp_table}")
conn.commit()
- cur.execute(f"drop table if exists {SCHEMA}.acs_bg_raw")
cur.execute(
- f"create table {SCHEMA}.acs_bg_raw (statefp text, countyfp text, tractce text, blkgrpce text, year int, code text, value numeric)"
+ f"create table if not exists {SCHEMA}.acs_bg_raw (statefp text, countyfp text, tractce text, blkgrpce text, year int, code text, value numeric)"
)
+ cur.execute(f"truncate table {SCHEMA}.acs_bg_raw")
temp_table = f"{SCHEMA}.acs_tract_temp"
cur.execute(f"drop table if exists {temp_table}")
@@ -168,7 +165,13 @@
for code in tqdm(ACS_CODES.keys()):
desc = ACS_CODES[code]
- for blob in bucket.list_blobs(prefix=f"acs/block_groups/{desc}/"):
+
+ blobs = list(bucket.list_blobs(prefix=f"acs/block_groups/{desc}/"))
+ if len(blobs) == 0:
+ logging.info(f"No blobs found for {desc}")
+ continue
+
+ for blob in blobs:
year = blob.name.split("/")[-1].split(".")[0]
cur.execute(f"truncate {temp_table}")
with tempfile.NamedTemporaryFile() as temp:
From 860565241dd236e992c9c3f4bcc135faf070eae5 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 26 Aug 2024 11:18:19 -0400
Subject: [PATCH 089/142] filter census tracts to city boundary when computing
segregation indexes
---
dbt/models/census_tracts_in_city_boundary.sql | 2 +
dbt/models/segregation_indexes.sql | 64 +++++++++++--------
2 files changed, 38 insertions(+), 28 deletions(-)
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index be4771e3..b19de633 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -6,6 +6,8 @@ with census_tracts as (
)
select
census_tracts.census_tract_id
+ , census_tracts.census_tract
+ , census_tracts.year_
from
census_tracts
, city_boundary
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
index ea47ba0f..4722ecef 100644
--- a/dbt/models/segregation_indexes.sql
+++ b/dbt/models/segregation_indexes.sql
@@ -1,3 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['census_tract', 'year_', 'distribution'], 'unique': true},
+ ]
+ )
+}}
+
-- Segregation index for each tract for each year, computed for each reference
-- distribution.
--
@@ -7,37 +16,41 @@
-- segregation index for the 'average_city' distribution.
with
categories as (select * from {{ ref("population_categories") }})
- , acs_tract as (select * from {{ ref("acs_tract") }})
+ , acs_tract_all as (select * from {{ ref("acs_tract") }})
, acs_variables as (
select
variable as name_,
description
from {{ ref("acs_variables") }}
)
+ , census_tracts_in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
+ , acs_tract as (
+ select * from acs_tract_all inner join census_tracts_in_city_boundary using (census_tract, year_)
+ )
, pop_tyc as
( -- Population by tract, year, and category
- select acs_tract.census_tract_id, acs_tract.year_, categories.category, acs_tract.value_
+ select acs_tract.census_tract, acs_tract.year_, categories.category, acs_tract.value_
from acs_tract
- join acs_variables using (name_)
- join categories on categories.category = acs_variables.description
+ inner join acs_variables using (name_)
+ inner join categories on categories.category = acs_variables.description
),
pop_ty as
- ( -- Population by tract and year (note: using 'population' variable instead of aggregating categories)
- select census_tract_id, year_, value_
- from acs_tract join acs_variables using (name_)
- where acs_variables.description = 'population'
+ (
+ select census_tract, year_, sum(value_) as value_
+ from pop_tyc
+ group by 1, 2
),
pop_yc as
( -- Population by year and category
select year_, category, sum(value_) as value_
from pop_tyc
- group by year_, category
+ group by 1, 2
),
pop_y as
( -- Population by year
select year_, sum(value_) as value_
- from pop_ty
- group by year_
+ from pop_tyc
+ group by 1
),
dist_yc as
( -- Distribution of population by year and category
@@ -45,18 +58,16 @@ with
pop_yc.year_,
pop_yc.category,
{{ safe_divide('pop_yc.value_', 'pop_y.value_') }} as value_
- from pop_yc
- inner join pop_y using (year_)
+ from pop_yc inner join pop_y using (year_)
),
dist_tyc as
( -- Distribution of population by tract, year, and category
select
- pop_tyc.census_tract_id,
+ pop_tyc.census_tract,
pop_tyc.year_,
pop_tyc.category,
{{ safe_divide('pop_tyc.value_', 'pop_ty.value_') }} as value_
- from pop_tyc
- inner join pop_ty using (year_, census_tract_id)
+ from pop_tyc inner join pop_ty using (year_, census_tract)
),
uniform_dist as
( -- Uniform distribution across categories
@@ -68,40 +79,37 @@ with
( -- Average of the annual citywide distributions
select category, avg(value_) as value_
from dist_yc
- group by category
+ group by 1
)
select
- census_tract_id,
+ census_tract,
year_,
dist as distribution,
sum(case when p = 0 or q = 0 then 0 else p * ln(p / q) end) as segregation_index
from
(
select
- dist_tyc.census_tract_id,
+ dist_tyc.census_tract,
dist_tyc.year_,
dist_tyc.value_ as p,
uniform_dist.value_ as q,
'uniform' as dist
- from dist_tyc
- inner join uniform_dist using (category)
+ from dist_tyc inner join uniform_dist using (category)
union all
select
- dist_tyc.census_tract_id,
+ dist_tyc.census_tract,
dist_tyc.year_,
dist_tyc.value_ as p,
dist_yc.value_ as q,
'annual_city' as dist
- from dist_tyc
- inner join dist_yc using (year_, category)
+ from dist_tyc inner join dist_yc using (year_, category)
union all
select
- dist_tyc.census_tract_id,
+ dist_tyc.census_tract,
dist_tyc.year_,
dist_tyc.value_ as p,
average_dist.value_ as q,
'average_city' as dist
- from dist_tyc
- inner join average_dist using (category)
+ from dist_tyc inner join average_dist using (category)
)
-group by census_tract_id, year_, dist
+group by 1, 2, 3
From f69a868bb110656894913d3b58a71e0ebe3d817a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Mon, 26 Aug 2024 13:12:55 -0400
Subject: [PATCH 090/142] unify approach to year replacement for demographic
data
---
dbt/models/census_tracts_wide.sql | 41 ++++++++++++++++++++++---------
dbt/models/schema.yml | 6 ++---
2 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index 24333eb2..f92d7a9a 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -11,7 +11,6 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, distance_to_transit as (select * from {{ ref('census_tracts_distance_to_transit') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
-, acs_tract as (select * from {{ ref('acs_tract') }})
, census_tracts as (
select *
from {{ ref('census_tracts') }}
@@ -20,28 +19,38 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
and census_tract_id in (select census_tract_id from in_city_boundary)
)
+-- Demographic data
+, acs_tract as (select * from {{ ref('acs_tract') }})
+, segregation_indexes as (
+ select census_tract, year_, 'segregation', segregation_index as value_
+ from {{ ref('segregation_indexes') }}
+ where distribution = 'annual_city'
+)
+, demographics as (
+ select * from acs_tract
+ union all
+ select * from segregation_indexes
+)
-- Fill in data for 2011, 2012 using closest available year. Replace 2020 data
-- with 2019 data to avoid pandemic effects.
-, acs_replace_years as (
- select * from acs_tract where year_ != 2020
+, demographics_replace_years as (
+ select * from demographics where year_ != 2020
union all
select census_tract, 2020 as year_, name_, value_
- from acs_tract where year_ = 2019
+ from demographics where year_ = 2019
union all
- -- select * from acs_tract
- -- union all
select census_tract, 2011 as year_, name_, value_
- from acs_tract where year_ = 2013
+ from demographics where year_ = 2013
union all
select census_tract, 2012 as year_, name_, value_
- from acs_tract where year_ = 2013
+ from demographics where year_ = 2013
)
, white as (
- select * from acs_replace_years
+ select * from demographics_replace_years
where name_ = 'B03002_003E' -- white non-hispanic population
)
, population as (
- select * from acs_replace_years
+ select * from demographics_replace_years
where name_ = 'B01003_001E' -- total population
)
, white_frac as (
@@ -49,9 +58,14 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
from white inner join population using (census_tract, year_)
)
, income as (
- select * from acs_replace_years
+ select * from demographics_replace_years
where name_ = 'B19013_001E' -- median household income
)
+, segregation as (
+ select * from demographics_replace_years
+ where name_ = 'segregation'
+)
+
, raw_data as (
select
census_tracts.census_tract
@@ -65,6 +79,7 @@ select
, parking_limits.mean_limit
, white_frac.value_ as white
, income.value_ as income
+ , segregation.value_ as segregation
from
census_tracts
inner join housing_units using (census_tract_id)
@@ -72,6 +87,7 @@ from
inner join distance_to_transit using (census_tract_id)
inner join parcel_area using (census_tract_id)
inner join parking_limits using (census_tract_id)
+ inner join segregation using (census_tract, year_)
left join white_frac using (census_tract, year_)
left join income using (census_tract, year_)
)
@@ -88,9 +104,10 @@ select
, white
, income
, mean_limit
+ , segregation
, {{ standardize(['num_units', 'total_value', 'median_value',
'median_distance_to_transit', 'mean_distance_to_transit',
- 'parcel_sqm', 'white', 'income', 'mean_limit' ]) }}
+ 'parcel_sqm', 'white', 'income', 'mean_limit', 'segregation' ]) }}
from
raw_data
)
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 2b6698eb..2ed844c8 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -126,15 +126,15 @@ models:
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- - census_tract_id
+ - census_tract
- year_
- distribution
columns:
- - name: census_tract_id
+ - name: census_tract
data_tests:
- relationships:
to: ref('census_tracts')
- field: census_tract_id
+ field: census_tract
- name: parcels
columns:
From b77631093db8e47236c0a0cf67e61b906dc28f9d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 27 Aug 2024 13:55:28 -0400
Subject: [PATCH 091/142] fix up acs_tract_wide
---
dbt/models/acs_tract_wide.sql | 50 +++++++++++------------------------
1 file changed, 16 insertions(+), 34 deletions(-)
diff --git a/dbt/models/acs_tract_wide.sql b/dbt/models/acs_tract_wide.sql
index 0d142795..543c38e7 100644
--- a/dbt/models/acs_tract_wide.sql
+++ b/dbt/models/acs_tract_wide.sql
@@ -11,53 +11,35 @@
with
acs_tract as (select * from {{ ref('acs_tract') }})
-, acs_variables as (select * from {{ ref("acs_variables") }})
-, census_tracts_in_city_boundary as (
- select * from {{ ref("census_tracts_in_city_boundary") }}
+, acs_variables as (select * from {{ ref('acs_variables') }})
+, census_tracts as (select * from {{ ref('census_tracts_in_city_boundary') }})
+, acs_tract_filtered as (
+ select acs_tract.*, description
+ from acs_tract
+ inner join census_tracts using (census_tract, year_)
+ inner join acs_variables on acs_tract.name_ = acs_variables.variable
)
-, census_tracts as (
- select
- census_tract_id
- , substring(geoidfq from 10) as geoidfq
- from {{ ref("census_tracts") }}
- where census_tract_id in (select census_tract_id from census_tracts_in_city_boundary)
-)
-, acs_tract_extended as (
- select
- acs_tract.census_tract_id
- , census_tracts.geoidfq
- , acs_tract.year_
- , acs_tract.name_
- , acs_tract.value_
- from
- acs_tract
- inner join census_tracts using (census_tract_id)
-)
-
, distinct_tracts_and_variables as (
select distinct
- geoidfq
+ census_tract
, name_
- from acs_tract_extended
+ , description
+ from acs_tract_filtered
)
-
select
- acs_variables.description
- , distinct_tracts_and_variables.geoidfq as tract_id
+ description
+ , census_tract as tract_id
{% for year_ in years %}
, "{{ year_ }}"
{% endfor %}
-from
-distinct_tracts_and_variables
-inner join acs_variables
- on distinct_tracts_and_variables.name_ = acs_variables.variable
+from distinct_tracts_and_variables
{% for year_ in years %}
left join
(select
- geoidfq
+ census_tract
, name_
, value_ as "{{ year_}}"
-from acs_tract_extended
+from acs_tract_filtered
where year_ = {{ year_ }})
-using (geoidfq, name_)
+using (census_tract, name_)
{% endfor %}
From 465bbfe1267558dcb46ec5eb0bcc436928a298d6 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 27 Aug 2024 15:20:42 -0400
Subject: [PATCH 092/142] add type casts
---
dbt/macros/standardize.sql | 2 +-
dbt/models/census_tracts_housing_units.sql | 2 +-
dbt/models/commercial_permits.sql | 20 ++++++++--------
dbt/models/fair_market_rents.sql | 4 ++--
dbt/models/parcels_base.sql | 10 ++++----
dbt/models/parking.sql | 14 +++++------
dbt/models/residential_permits.sql | 28 +++++++++++-----------
dbt/models/segregation_indexes.sql | 8 +++----
dbt/models/usps_migration.sql | 4 ++--
9 files changed, 46 insertions(+), 46 deletions(-)
diff --git a/dbt/macros/standardize.sql b/dbt/macros/standardize.sql
index 795ebad2..63ec955e 100644
--- a/dbt/macros/standardize.sql
+++ b/dbt/macros/standardize.sql
@@ -1,6 +1,6 @@
{% macro standardize(columns) %}
{% for c in columns %}
- (({{ c }} - (avg({{ c }}) over ())) / (stddev_samp({{ c }}) over ())) as std_{{ c }}
+ (({{ c }} - (avg({{ c }}) over ())) / (stddev_samp({{ c }}) over ()))::double precision as std_{{ c }}
{% if not loop.last %},{% endif %}
{% endfor %}
{% endmacro %}
diff --git a/dbt/models/census_tracts_housing_units.sql b/dbt/models/census_tracts_housing_units.sql
index c60779e2..38c91359 100644
--- a/dbt/models/census_tracts_housing_units.sql
+++ b/dbt/models/census_tracts_housing_units.sql
@@ -18,7 +18,7 @@ with census_tracts as (
)
select
census_tracts.census_tract_id
- , sum(residential_permits.num_units) as num_units
+ , sum(residential_permits.num_units)::int as num_units
from
census_tracts
left join residential_permits_to_census_tracts using (census_tract_id)
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index b51cb23d..4687eb30 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -11,17 +11,17 @@
select
sde_id as commercial_permit_id
, year::int as year_
- , nonres_gro as group_
- , nonres_sub as subgroup
- , nonres_typ as type_category
- , bldg_name as building_name
- , bldg_desc as building_description
- , permit_typ as permit_type
- , permit_val as permit_value
- , sqf as square_feet
- , address
+ , nonres_gro::text as group_
+ , nonres_sub::text as subgroup
+ , nonres_typ::text as type_category
+ , bldg_name::text as building_name
+ , bldg_desc::text as building_description
+ , permit_typ::text as permit_type
+ , permit_val::int as permit_value
+ , nullif(sqf, 0)::int as square_feet
+ , address::text
, st_transform(geom, {{ var("srid") }}) as geom
- from
+from
{{ source('minneapolis', 'commercial_permits_nonresidentialconstruction') }}
where
co_code = '053'
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index a9a9cdbc..847d19e1 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -19,9 +19,9 @@ zip_codes as (select * from {{ ref('zip_codes') }})
{% for bedroom in num_bedrooms %}
select
zip_code_id
- , rent_br{{ bedroom }} as rent
+ , rent_br{{ bedroom }}::int as rent
, {{ bedroom }} as num_bedrooms
- , year_
+ , year_::int
from fmr_zip
{% if not loop.last %} union all {% endif %}
{% endfor %}
diff --git a/dbt/models/parcels_base.sql b/dbt/models/parcels_base.sql
index 4929f82b..d4b0a7c9 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/parcels_base.sql
@@ -22,12 +22,12 @@ parcels_union as (
-- parcels are a year-end snapshot, named after the year they cover
'[{{ year_ }}-01-01,{{ year_ + 1 }}-01-01)'::daterange as valid,
- nullif(emv_land, 0) as emv_land,
- nullif(emv_bldg, 0) as emv_bldg,
- nullif(emv_total, 0) as emv_total,
- nullif(year_built, 0) as year_built,
+ nullif(emv_land, 0)::int as emv_land,
+ nullif(emv_bldg, 0)::int as emv_bldg,
+ nullif(emv_total, 0)::int as emv_total,
+ nullif(year_built, 0)::int as year_built,
nullif(sale_date, '1899-12-30'::date) as sale_date,
- nullif(sale_value, 0) as sale_value,
+ nullif(sale_value, 0)::int as sale_value,
st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'parcels_shp_plan_regonal_' ~ year_ ~ '_parcels' ~ year_ ~ 'hennepin') }}
where upper({{ "city" if year_ < 2018 else "ctu_name" }}) = '{{ city }}'
diff --git a/dbt/models/parking.sql b/dbt/models/parking.sql
index cd0b874e..ac31de4a 100644
--- a/dbt/models/parking.sql
+++ b/dbt/models/parking.sql
@@ -18,13 +18,13 @@ with
select
ogc_fid as parking_id
, to_date("year" || '-' || "date", 'YYYY-DD-Mon') as date_
- , "project na" as project_name
- , address
- , neighborho as neighborhood
- , ward
+ , "project na"::text as project_name
+ , address::text
+ , neighborho::text as neighborhood
+ , ward::int
, "downtown y" = 'Y' as is_downtown
- , "housing un" as num_housing_units
- , "car parkin" as num_car_parking_spaces
- , "bike parki" as num_bike_parking_spaces
+ , "housing un"::int as num_housing_units
+ , "car parkin"::int as num_car_parking_spaces
+ , "bike parki"::int as num_bike_parking_spaces
, st_transform(geom, {{ var("srid") }}) as geom
from parking_raw
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 35018922..6f994a0a 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -11,22 +11,22 @@
select
sde_id::int as residential_permit_id
, year::int as year_
- , tenure
- , housing_ty as housing_type
- , res_permit as permit_type
- , address
- , name as name_
- , buildings as num_buildings
- , units as num_units
- , age_restri as num_age_restricted_units
- , memory_car as num_memory_care_units
- , assisted as num_assisted_living_units
+ , tenure::text
+ , housing_ty::text as housing_type
+ , res_permit::text as permit_type
+ , address::text
+ , name::text as name_
+ , buildings::int as num_buildings
+ , units::int as num_units
+ , age_restri::int as num_age_restricted_units
+ , memory_car::int as num_memory_care_units
+ , assisted::int as num_assisted_living_units
, com_off_re = 'Y' as is_commercial_and_residential
- , nullif(sqf, 0) as square_feet
+ , nullif(sqf, 0)::int as square_feet
, public_fun = 'Y' as is_public_funded
- , nullif(permit_val, 0) as permit_value
- , community_ as community_designation
- , notes
+ , nullif(permit_val, 0)::int as permit_value
+ , community_::text as community_designation
+ , notes::text
, st_transform(geom, {{ var("srid") }}) as geom
from
{{ source('minneapolis', 'residential_permits_residentialpermits') }}
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
index 4722ecef..90f48b7b 100644
--- a/dbt/models/segregation_indexes.sql
+++ b/dbt/models/segregation_indexes.sql
@@ -57,7 +57,7 @@ with
select
pop_yc.year_,
pop_yc.category,
- {{ safe_divide('pop_yc.value_', 'pop_y.value_') }} as value_
+ ({{ safe_divide('pop_yc.value_', 'pop_y.value_') }})::double precision as value_
from pop_yc inner join pop_y using (year_)
),
dist_tyc as
@@ -66,18 +66,18 @@ with
pop_tyc.census_tract,
pop_tyc.year_,
pop_tyc.category,
- {{ safe_divide('pop_tyc.value_', 'pop_ty.value_') }} as value_
+ ({{ safe_divide('pop_tyc.value_', 'pop_ty.value_') }})::double precision as value_
from pop_tyc inner join pop_ty using (year_, census_tract)
),
uniform_dist as
( -- Uniform distribution across categories
with n_cat as (select count(*) as n_cat from categories)
- select category, 1.0 / n_cat as value_
+ select category, (1.0 / n_cat)::double precision as value_
from categories, n_cat
),
average_dist as
( -- Average of the annual citywide distributions
- select category, avg(value_) as value_
+ select category, avg(value_)::double precision as value_
from dist_yc
group by 1
)
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 2cafef6c..541b32c1 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -30,7 +30,7 @@ zip_codes as (select * from {{ ref('zip_codes') }})
, zip_code_id
, '{{ flow_direction }}' as flow_direction
, 'total' as flow_type
- , total_{{ flow_direction }}_zip as flow_value
+ , total_{{ flow_direction }}_zip::int as flow_value
from add_zip_id
union all
{% for flow_type in usps_migration_flow_types %}
@@ -39,7 +39,7 @@ zip_codes as (select * from {{ ref('zip_codes') }})
, zip_code_id
, '{{ flow_direction }}' as flow_direction
, '{{ flow_type }}' as flow_type
- , total_{{ flow_direction }}_zip_{{ flow_type }} as flow_value
+ , total_{{ flow_direction }}_zip_{{ flow_type }}::int as flow_value
from add_zip_id
{% if not loop.last %} union all {% endif %}
{% endfor %}
From be166d267e08dd93c220cfb5ab9b0512ed37a191 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 27 Aug 2024 16:05:03 -0400
Subject: [PATCH 093/142] extract demographics table
---
dbt/models/census_tracts_wide.sql | 36 +++++--------------------
dbt/models/demographics.sql | 45 +++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 30 deletions(-)
create mode 100644 dbt/models/demographics.sql
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index f92d7a9a..05bede3e 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -11,6 +11,7 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, distance_to_transit as (select * from {{ ref('census_tracts_distance_to_transit') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
+, demographics as (select * from {{ ref('demographics') }})
, census_tracts as (
select *
from {{ ref('census_tracts') }}
@@ -20,37 +21,12 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
)
-- Demographic data
-, acs_tract as (select * from {{ ref('acs_tract') }})
-, segregation_indexes as (
- select census_tract, year_, 'segregation', segregation_index as value_
- from {{ ref('segregation_indexes') }}
- where distribution = 'annual_city'
-)
-, demographics as (
- select * from acs_tract
- union all
- select * from segregation_indexes
-)
--- Fill in data for 2011, 2012 using closest available year. Replace 2020 data
--- with 2019 data to avoid pandemic effects.
-, demographics_replace_years as (
- select * from demographics where year_ != 2020
- union all
- select census_tract, 2020 as year_, name_, value_
- from demographics where year_ = 2019
- union all
- select census_tract, 2011 as year_, name_, value_
- from demographics where year_ = 2013
- union all
- select census_tract, 2012 as year_, name_, value_
- from demographics where year_ = 2013
-)
, white as (
- select * from demographics_replace_years
+ select * from demographics
where name_ = 'B03002_003E' -- white non-hispanic population
)
, population as (
- select * from demographics_replace_years
+ select * from demographics
where name_ = 'B01003_001E' -- total population
)
, white_frac as (
@@ -58,12 +34,12 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
from white inner join population using (census_tract, year_)
)
, income as (
- select * from demographics_replace_years
+ select * from demographics
where name_ = 'B19013_001E' -- median household income
)
, segregation as (
- select * from demographics_replace_years
- where name_ = 'segregation'
+ select * from demographics
+ where description = 'segregation_index_annual_city'
)
, raw_data as (
diff --git a/dbt/models/demographics.sql b/dbt/models/demographics.sql
new file mode 100644
index 00000000..3720dac5
--- /dev/null
+++ b/dbt/models/demographics.sql
@@ -0,0 +1,45 @@
+-- Demographic data
+-- Contains data from the ACS and the computed segregation indexes.
+with
+acs_tract as (select * from {{ ref('acs_tract') }}),
+acs_variables as (select * from {{ ref('acs_variables') }}),
+acs_tract_with_description as (
+ select
+ acs_tract.census_tract,
+ acs_tract.year_,
+ acs_tract.name_,
+ acs_variables.description,
+ acs_tract.value_
+ from acs_tract
+ inner join acs_variables on acs_tract.name_ = acs_variables.variable
+),
+segregation_indexes as (
+ select
+ census_tract,
+ year_,
+ null as name_,
+ 'segregation_index_' || distribution as description,
+ segregation_index as value_
+ from {{ ref('segregation_indexes') }}
+),
+demographics as (
+ select * from acs_tract_with_description
+ union all
+ select * from segregation_indexes
+)
+-- Fill in data for 2011, 2012 using closest available year. Replace 2020 data
+-- with 2019 data to avoid pandemic effects.
+, demographics_replace_years as (
+ select * from demographics where year_ != 2020
+ union all
+ select census_tract, 2020 as year_, name_, description, value_
+ from demographics where year_ = 2019
+ union all
+ select census_tract, 2011 as year_, name_, description, value_
+ from demographics where year_ = 2013
+ union all
+ select census_tract, 2012 as year_, name_, description, value_
+ from demographics where year_ = 2013
+)
+select *
+from demographics_replace_years
From 4cd4b3652014a8130fb4c9337a4502a121a84c2c Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 27 Aug 2024 16:39:50 -0400
Subject: [PATCH 094/142] switch api over to demographics wide table
---
api/schema.sql | 5 ++---
dbt/models/demographics_wide.sql | 34 ++++++++++++++++++++++++++++++++
2 files changed, 36 insertions(+), 3 deletions(-)
create mode 100644 dbt/models/demographics_wide.sql
diff --git a/api/schema.sql b/api/schema.sql
index 7c639ef9..694076f6 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -2,9 +2,8 @@ drop schema if exists api cascade;
create schema api;
-create view api.acs_tract_wide as (
- select * from acs_tract_wide
- order by random()
+create view api.demographics as (
+ select * from demographics_wide
);
drop role if exists web_anon;
diff --git a/dbt/models/demographics_wide.sql b/dbt/models/demographics_wide.sql
new file mode 100644
index 00000000..ca9104bd
--- /dev/null
+++ b/dbt/models/demographics_wide.sql
@@ -0,0 +1,34 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['description']}
+ ]
+ )
+}}
+
+-- This is used by the web app. It has a row for each tract, demographic
+-- variable pair and a column for each year.
+with
+demographics as (select * from {{ ref('demographics') }}),
+census_tracts as (select * from {{ ref('census_tracts_in_city_boundary') }}),
+demographics_filtered as (
+ select demographics.*
+ from demographics
+ inner join census_tracts using (census_tract, year_)
+),
+final_ as (
+ select
+ description,
+ census_tract as tract_id,
+ {{ dbt_utils.pivot('year_',
+ dbt_utils.get_column_values(ref('demographics'),
+ 'year_',
+ order_by='year_'),
+ then_value='value_',
+ else_value='null',
+ agg='max') }}
+ from demographics_filtered
+ group by 1, 2
+)
+select * from final_
From 5ca1ba8cdf4d242f3c48fcc81c7dac137ff66276 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 27 Aug 2024 18:08:39 -0400
Subject: [PATCH 095/142] add census_tracts api endpoint
---
api/schema.sql | 17 ++++++++++++++++-
dbt/models/census_tracts.sql | 1 +
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/api/schema.sql b/api/schema.sql
index 694076f6..85783ad7 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -6,8 +6,23 @@ create view api.demographics as (
select * from demographics_wide
);
-drop role if exists web_anon;
+create view api.census_tracts as (
+ select
+ census_tract,
+ year_,
+ geom
+ from census_tracts
+);
+
+do $$
+begin
create role web_anon nologin;
+exception when duplicate_object then raise notice '%, skipping', sqlerrm using errcode = sqlstate;
+end
+$$;
+
+grant usage on schema public to web_anon;
+grant select on table public.spatial_ref_sys TO web_anon;
grant usage on schema api to web_anon;
grant select on all tables in schema api to web_anon;
grant web_anon to postgres;
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 28ff79e6..496470a9 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -4,6 +4,7 @@
indexes = [
{'columns': ['census_tract_id'], 'unique': true},
{'columns': ['valid', 'geom'], 'type': 'gist'}
+ {'columns': ['year']}
]
)
}}
From ef1fb5b1be40f35316833245a07bc9135d6d5b98 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 28 Aug 2024 14:36:20 -0400
Subject: [PATCH 096/142] use filtered census tracts in api endpoint
---
api/schema.sql | 2 +-
dbt/models/census_tracts.sql | 4 ++--
dbt/models/census_tracts_in_city_boundary.sql | 10 ++++++++++
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/api/schema.sql b/api/schema.sql
index 85783ad7..1ee655e0 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -11,7 +11,7 @@ create view api.census_tracts as (
census_tract,
year_,
geom
- from census_tracts
+ from census_tracts_in_city_boundary
);
do $$
diff --git a/dbt/models/census_tracts.sql b/dbt/models/census_tracts.sql
index 496470a9..50462489 100644
--- a/dbt/models/census_tracts.sql
+++ b/dbt/models/census_tracts.sql
@@ -3,8 +3,8 @@
materialized='table',
indexes = [
{'columns': ['census_tract_id'], 'unique': true},
- {'columns': ['valid', 'geom'], 'type': 'gist'}
- {'columns': ['year']}
+ {'columns': ['valid', 'geom'], 'type': 'gist'},
+ {'columns': ['year_']}
]
)
}}
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index b19de633..266332b0 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -1,3 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['year_']}
+ ]
+ )
+}}
+
with census_tracts as (
select * from {{ ref('census_tracts') }}
)
@@ -8,6 +17,7 @@ select
census_tracts.census_tract_id
, census_tracts.census_tract
, census_tracts.year_
+ , census_tracts.geom
from
census_tracts
, city_boundary
From 2912c19ef57da321e6f0a08477aa509d40fd9c8e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 28 Aug 2024 15:57:04 -0400
Subject: [PATCH 097/142] add dbt to dev depends
---
setup.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/setup.py b/setup.py
index 419f142a..3f14029a 100644
--- a/setup.py
+++ b/setup.py
@@ -25,6 +25,8 @@
"graphviz",
"python-dotenv",
"google-cloud-storage",
+ "dbt-core",
+ "dbt-postgres",
]
setup(
From 027085e3b6c42141e7d0f000a2e0a0648b3a3586 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 28 Aug 2024 15:57:10 -0400
Subject: [PATCH 098/142] add numeric encoding of census tracts
---
dbt/models/census_tracts_wide.sql | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index 05bede3e..eac750b5 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -40,11 +40,17 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, segregation as (
select * from demographics
where description = 'segregation_index_annual_city'
+),
+census_tract_numeric as (
+ select
+ census_tract
+ , row_number() over () as census_tract_numeric
+ from (select distinct census_tract from census_tracts order by 1)
)
-
, raw_data as (
select
census_tracts.census_tract
+ , census_tract_numeric.census_tract_numeric
, census_tracts.year_
, coalesce(housing_units.num_units, 0) as num_units
, property_values.total_value
@@ -58,6 +64,7 @@ select
, segregation.value_ as segregation
from
census_tracts
+ inner join census_tract_numeric using (census_tract)
inner join housing_units using (census_tract_id)
inner join property_values using (census_tract_id)
inner join distance_to_transit using (census_tract_id)
@@ -70,6 +77,7 @@ from
, with_std as (
select
census_tract
+ , census_tract_numeric
, year_
, num_units
, total_value
From 8a00c91783a24bdc9b143963771c700ba1f66d42 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 28 Aug 2024 16:15:43 -0400
Subject: [PATCH 099/142] use original column names in census_tracts_wide
---
dbt/macros/standardize.sql | 2 +-
dbt/models/census_tracts_wide.sql | 32 +++++++++++--------------------
2 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/dbt/macros/standardize.sql b/dbt/macros/standardize.sql
index 63ec955e..83d71af6 100644
--- a/dbt/macros/standardize.sql
+++ b/dbt/macros/standardize.sql
@@ -1,6 +1,6 @@
{% macro standardize(columns) %}
{% for c in columns %}
- (({{ c }} - (avg({{ c }}) over ())) / (stddev_samp({{ c }}) over ()))::double precision as std_{{ c }}
+ {{ c }} as {{ c }}_original, (({{ c }} - (avg({{ c }}) over ())) / (stddev_samp({{ c }}) over ()))::double precision as {{ c }}
{% if not loop.last %},{% endif %}
{% endfor %}
{% endmacro %}
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index eac750b5..66361721 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -49,14 +49,14 @@ census_tract_numeric as (
)
, raw_data as (
select
- census_tracts.census_tract
- , census_tract_numeric.census_tract_numeric
- , census_tracts.year_
- , coalesce(housing_units.num_units, 0) as num_units
+ census_tracts.census_tract as census_tract_fips
+ , census_tract_numeric.census_tract_numeric as census_tract
+ , census_tracts.year_ as "year"
+ , coalesce(housing_units.num_units, 0) as housing_units
, property_values.total_value
, property_values.median_value
- , distance_to_transit.median_distance_to_transit
- , distance_to_transit.mean_distance_to_transit
+ , distance_to_transit.median_distance_to_transit as median_distance
+ , distance_to_transit.mean_distance_to_transit as mean_distance
, parcel_area.parcel_sqm
, parking_limits.mean_limit
, white_frac.value_ as white
@@ -76,21 +76,11 @@ from
)
, with_std as (
select
- census_tract
- , census_tract_numeric
- , year_
- , num_units
- , total_value
- , median_value
- , median_distance_to_transit
- , mean_distance_to_transit
- , parcel_sqm
- , white
- , income
- , mean_limit
- , segregation
- , {{ standardize(['num_units', 'total_value', 'median_value',
- 'median_distance_to_transit', 'mean_distance_to_transit',
+ census_tract_fips
+ , census_tract
+ , "year"
+ , {{ standardize(['housing_units', 'total_value', 'median_value',
+ 'median_distance', 'mean_distance',
'parcel_sqm', 'white', 'income', 'mean_limit', 'segregation' ]) }}
from
raw_data
From 0ede91404417bd51e73565489469be7e40e71d40 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 28 Aug 2024 16:36:39 -0400
Subject: [PATCH 100/142] add standardize functions for categorical variables
---
dbt/macros/standardize.sql | 9 ++++++++-
dbt/models/census_tracts_wide.sql | 21 ++++++---------------
2 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/dbt/macros/standardize.sql b/dbt/macros/standardize.sql
index 83d71af6..742e971f 100644
--- a/dbt/macros/standardize.sql
+++ b/dbt/macros/standardize.sql
@@ -1,6 +1,13 @@
-{% macro standardize(columns) %}
+{% macro standardize_cont(columns) %}
{% for c in columns %}
{{ c }} as {{ c }}_original, (({{ c }} - (avg({{ c }}) over ())) / (stddev_samp({{ c }}) over ()))::double precision as {{ c }}
{% if not loop.last %},{% endif %}
{% endfor %}
{% endmacro %}
+
+{% macro standardize_cat(columns) %}
+ {% for c in columns %}
+ {{ c }} as {{ c }}_original, (dense_rank() over (order by {{ c }})) - 1 as {{ c }}
+ {% if not loop.last %},{% endif %}
+ {% endfor %}
+{% endmacro %}
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/census_tracts_wide.sql
index 66361721..f9014586 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/census_tracts_wide.sql
@@ -40,17 +40,10 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, segregation as (
select * from demographics
where description = 'segregation_index_annual_city'
-),
-census_tract_numeric as (
- select
- census_tract
- , row_number() over () as census_tract_numeric
- from (select distinct census_tract from census_tracts order by 1)
)
, raw_data as (
select
- census_tracts.census_tract as census_tract_fips
- , census_tract_numeric.census_tract_numeric as census_tract
+ census_tracts.census_tract::numeric
, census_tracts.year_ as "year"
, coalesce(housing_units.num_units, 0) as housing_units
, property_values.total_value
@@ -64,7 +57,6 @@ select
, segregation.value_ as segregation
from
census_tracts
- inner join census_tract_numeric using (census_tract)
inner join housing_units using (census_tract_id)
inner join property_values using (census_tract_id)
inner join distance_to_transit using (census_tract_id)
@@ -76,12 +68,11 @@ from
)
, with_std as (
select
- census_tract_fips
- , census_tract
- , "year"
- , {{ standardize(['housing_units', 'total_value', 'median_value',
- 'median_distance', 'mean_distance',
- 'parcel_sqm', 'white', 'income', 'mean_limit', 'segregation' ]) }}
+ census_tract::numeric
+ , {{ standardize_cat(['year']) }}
+ , {{ standardize_cont(['housing_units', 'total_value', 'median_value',
+ 'median_distance', 'mean_distance', 'parcel_sqm',
+ 'white', 'income', 'mean_limit', 'segregation' ]) }}
from
raw_data
)
From ca3f8e756103d5b1a37dea21b617ddca7a798796 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 29 Aug 2024 10:19:07 -0400
Subject: [PATCH 101/142] census tracts need to be in 4269 for the api
---
api/schema.sql | 2 +-
dbt/models/census_tracts_api.sql | 16 ++++++++++++++++
dbt/models/census_tracts_in_city_boundary.sql | 9 ---------
3 files changed, 17 insertions(+), 10 deletions(-)
create mode 100644 dbt/models/census_tracts_api.sql
diff --git a/api/schema.sql b/api/schema.sql
index 1ee655e0..3af7fea9 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -11,7 +11,7 @@ create view api.census_tracts as (
census_tract,
year_,
geom
- from census_tracts_in_city_boundary
+ from census_tracts_api
);
do $$
diff --git a/dbt/models/census_tracts_api.sql b/dbt/models/census_tracts_api.sql
new file mode 100644
index 00000000..5208ae44
--- /dev/null
+++ b/dbt/models/census_tracts_api.sql
@@ -0,0 +1,16 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['year_']}
+ ]
+ )
+}}
+
+with census_tracts as (select * from {{ ref('census_tracts_in_city_boundary') }})
+select
+ census_tract
+ , year_
+ , st_transform(geom, 4269) as geom
+from
+ census_tracts
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index 266332b0..3604fb04 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -1,12 +1,3 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['year_']}
- ]
- )
-}}
-
with census_tracts as (
select * from {{ ref('census_tracts') }}
)
From eccbd849b8ad50db96dc248dd96514c11e9c1f2d Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 29 Aug 2024 11:50:27 -0400
Subject: [PATCH 102/142] reorganize dbt models to support census tract model
---
.../census_tracts_distance_to_transit.sql | 0
.../census_tracts_housing_units.sql | 0
.../census_tracts_parcel_area.sql | 0
.../census_tracts_parking_limits.sql | 8 ++++
.../census_tracts_property_values.sql | 0
.../parcels_distance_to_transit.sql | 13 +----
.../intermediate/parcels_parking_limits.sql} | 47 ++++++++-----------
.../tracts_model__census_tracts.sql} | 2 +
.../tracts_model/tracts_model__parcels.sql | 23 +++++++++
9 files changed, 54 insertions(+), 39 deletions(-)
rename dbt/models/{ => tracts_model/intermediate}/census_tracts_distance_to_transit.sql (100%)
rename dbt/models/{ => tracts_model/intermediate}/census_tracts_housing_units.sql (100%)
rename dbt/models/{ => tracts_model/intermediate}/census_tracts_parcel_area.sql (100%)
create mode 100644 dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql
rename dbt/models/{ => tracts_model/intermediate}/census_tracts_property_values.sql (100%)
rename dbt/models/{ => tracts_model/intermediate}/parcels_distance_to_transit.sql (74%)
rename dbt/models/{census_tracts_parking_limits.sql => tracts_model/intermediate/parcels_parking_limits.sql} (54%)
rename dbt/models/{census_tracts_wide.sql => tracts_model/tracts_model__census_tracts.sql} (98%)
create mode 100644 dbt/models/tracts_model/tracts_model__parcels.sql
diff --git a/dbt/models/census_tracts_distance_to_transit.sql b/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
similarity index 100%
rename from dbt/models/census_tracts_distance_to_transit.sql
rename to dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
diff --git a/dbt/models/census_tracts_housing_units.sql b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
similarity index 100%
rename from dbt/models/census_tracts_housing_units.sql
rename to dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
diff --git a/dbt/models/census_tracts_parcel_area.sql b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
similarity index 100%
rename from dbt/models/census_tracts_parcel_area.sql
rename to dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql b/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql
new file mode 100644
index 00000000..430e5fd6
--- /dev/null
+++ b/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql
@@ -0,0 +1,8 @@
+with
+parcels_parking_limits as (select * from {{ ref('parcels_parking_limits') }}),
+parcels as (select * from {{ ref('parcels') }})
+select
+ census_tract_id,
+ avg(limit_numeric) as mean_limit
+from parcels join parcels_parking_limits using (parcel_id)
+group by census_tract_id
diff --git a/dbt/models/census_tracts_property_values.sql b/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
similarity index 100%
rename from dbt/models/census_tracts_property_values.sql
rename to dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
diff --git a/dbt/models/parcels_distance_to_transit.sql b/dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql
similarity index 74%
rename from dbt/models/parcels_distance_to_transit.sql
rename to dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql
index 6580ca58..eb543f29 100644
--- a/dbt/models/parcels_distance_to_transit.sql
+++ b/dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql
@@ -1,12 +1,3 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['parcel_id'], 'unique': true},
- ]
- )
-}}
-
-- This model calculates the distance from each parcel to the nearest high
-- frequency transit line or stop
with
@@ -21,8 +12,8 @@ with
lines inner join stops on lines.valid && stops.valid
)
select
- parcels.parcel_id
- , st_distance(parcels.geom, lines_and_stops.geom) as distance
+ parcels.parcel_id,
+ st_distance(parcels.geom, lines_and_stops.geom) as distance
from
parcels
inner join lines_and_stops on parcels.valid && lines_and_stops.valid
diff --git a/dbt/models/census_tracts_parking_limits.sql b/dbt/models/tracts_model/intermediate/parcels_parking_limits.sql
similarity index 54%
rename from dbt/models/census_tracts_parking_limits.sql
rename to dbt/models/tracts_model/intermediate/parcels_parking_limits.sql
index be6a25f1..3436ae30 100644
--- a/dbt/models/census_tracts_parking_limits.sql
+++ b/dbt/models/tracts_model/intermediate/parcels_parking_limits.sql
@@ -1,22 +1,21 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['census_tract_id'], 'unique': true},
- ]
- )
-}}
-
with
parcels as (select * from {{ ref('parcels') }}),
transit as (select * from {{ ref('high_frequency_transit_lines') }}),
downtown as (select * from {{ ref('downtown') }}),
-with_parking_limit as (
+with_is_downtown as (
select
- parcel_id,
- census_tract_id,
+ parcels.parcel_id,
+ parcels.valid,
+ parcels.geom,
+ st_intersects(parcels.geom, downtown.geom) as is_downtown
+ from downtown, parcels
+),
+with_limit as (
+ select
+ parcels.parcel_id,
+ parcels.is_downtown,
case
- when st_intersects(parcels.geom, downtown.geom) then 'eliminated'
+ when parcels.is_downtown then 'eliminated'
when parcels.valid << '[2015-01-01,)'::daterange then 'full'
else
case
@@ -26,27 +25,19 @@ with_parking_limit as (
end
end as limit_
from
- downtown, parcels
- left join transit
- on parcels.valid && transit.valid
+ with_is_downtown as parcels
+ join transit on parcels.valid && transit.valid
),
with_limit_numeric as (
select
- parcel_id,
- census_tract_id,
- limit_,
+ parcels.parcel_id,
+ parcels.is_downtown,
+ parcels.limit_,
case limit_
when 'full' then 1
when 'reduced' then 0.5
when 'eliminated' then 0
end as limit_numeric
- from with_parking_limit
-),
-by_census_tract as (
- select
- census_tract_id,
- avg(limit_numeric) as mean_limit
- from with_limit_numeric
- group by census_tract_id
+ from with_limit as parcels
)
-select * from by_census_tract
+select * from with_limit_numeric
diff --git a/dbt/models/census_tracts_wide.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
similarity index 98%
rename from dbt/models/census_tracts_wide.sql
rename to dbt/models/tracts_model/tracts_model__census_tracts.sql
index f9014586..f00225a4 100644
--- a/dbt/models/census_tracts_wide.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -12,6 +12,7 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
, demographics as (select * from {{ ref('demographics') }})
+, downtown as (select * from {{ ref('downtown') }})
, census_tracts as (
select *
from {{ ref('census_tracts') }}
@@ -41,6 +42,7 @@ in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
select * from demographics
where description = 'segregation_index_annual_city'
)
+
, raw_data as (
select
census_tracts.census_tract::numeric
diff --git a/dbt/models/tracts_model/tracts_model__parcels.sql b/dbt/models/tracts_model/tracts_model__parcels.sql
new file mode 100644
index 00000000..c2f1eecb
--- /dev/null
+++ b/dbt/models/tracts_model/tracts_model__parcels.sql
@@ -0,0 +1,23 @@
+{{
+ config(
+ materialized='table',
+ )
+}}
+
+with
+parcels_parking_limits as (select * from {{ ref('parcels_parking_limits') }}),
+parcels_distance_to_transit as (select * from {{ ref('parcels_distance_to_transit') }}),
+parcels as (select * from {{ ref('parcels') }}),
+census_tracts as (select * from {{ ref('census_tracts') }})
+select
+ parcels.pin,
+ census_tracts.census_tract,
+ census_tracts.year_,
+ parcels_distance_to_transit.distance as distance_to_transit,
+ parcels_parking_limits.limit_numeric as limit_con,
+ parcels_parking_limits.is_downtown as downtown_yn
+from
+ parcels
+ join parcels_parking_limits using (parcel_id)
+ join parcels_distance_to_transit using (parcel_id)
+ join census_tracts using (census_tract_id)
From 1fdbf2b14b7151b234215de93277840475be770b Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 29 Aug 2024 15:53:36 -0400
Subject: [PATCH 103/142] refactor
---
dbt/models/census_tracts_in_city_boundary.sql | 2 +-
.../census_tracts_distance_to_transit.sql | 25 ++++++-------------
.../census_tracts_housing_units.sql | 20 +++------------
.../census_tracts_parcel_area.sql | 16 +++---------
.../census_tracts_property_values.sql | 23 +++++------------
.../tracts_model__census_tracts.sql | 10 +++-----
.../tracts_model/tracts_model__parcels.sql | 4 +--
7 files changed, 27 insertions(+), 73 deletions(-)
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index 3604fb04..18a8d773 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -13,4 +13,4 @@ from
census_tracts
, city_boundary
where st_intersects(census_tracts.geom, city_boundary.geom)
- and st_area(st_intersection(census_tracts.geom, city_boundary.geom)) / st_area(census_tracts.geom) > 0.2
+ and st_area(st_intersection(census_tracts.geom, city_boundary.geom)) / st_area(census_tracts.geom) > 0.9
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql b/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
index edf28211..abe15828 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
@@ -1,24 +1,15 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['census_tract_id'], 'unique': true},
- ]
- )
-}}
-
with
parcels_distance_to_transit as (
select * from {{ ref('parcels_distance_to_transit') }}
- )
- , census_tracts as (select * from {{ ref('census_tracts') }})
- , parcels as (select * from {{ ref('parcels') }})
+ ),
+ census_tracts as (select * from {{ ref('census_tracts') }}),
+ parcels as (select * from {{ ref('parcels') }})
select
- census_tracts.census_tract_id
- , avg(parcels_distance_to_transit.distance) as mean_distance_to_transit
- , {{ median('parcels_distance_to_transit.distance') }} as median_distance_to_transit
+ census_tracts.census_tract_id,
+ avg(parcels_distance_to_transit.distance) as mean_distance_to_transit,
+ {{ median('parcels_distance_to_transit.distance') }} as median_distance_to_transit
from
census_tracts
- left join parcels using (census_tract_id)
- left join parcels_distance_to_transit using (parcel_id)
+ left join parcels using (census_tract_id)
+ left join parcels_distance_to_transit using (parcel_id)
group by 1
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
index 38c91359..0b5aa907 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
@@ -1,19 +1,7 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['census_tract_id'], 'unique': true},
- ]
- )
-}}
-
-with census_tracts as (
- select * from {{ ref('census_tracts') }}
-)
-, residential_permits as (
- select * from {{ ref('residential_permits') }}
-)
-, residential_permits_to_census_tracts as (
+with
+census_tracts as (select * from {{ ref('census_tracts') }}),
+residential_permits as (select * from {{ ref('residential_permits') }}),
+residential_permits_to_census_tracts as (
select * from {{ ref('residential_permits_to_census_tracts') }}
)
select
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
index 687d2274..d2e9b5d5 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
@@ -1,19 +1,9 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['census_tract_id'], 'unique': true},
- ]
- )
-}}
-
with
census_tracts as (select * from {{ ref('census_tracts') }}),
parcels as (select * from {{ ref('parcels') }})
select
- census_tract_id
- , sum(st_area(parcels.geom)) as parcel_sqm
+ census_tract_id,
+ sum(st_area(parcels.geom)) as parcel_sqm
from
- census_tracts
- left join parcels using (census_tract_id)
+ census_tracts left join parcels using (census_tract_id)
group by 1
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql b/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
index 7bb18e72..60cf69c9 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
@@ -1,22 +1,11 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['census_tract_id'], 'unique': true},
- ]
- )
-}}
-
-- Median and total parcel property values aggregated by census tract.
-
with
-parcels as (select * from {{ ref('parcels') }})
-, census_tracts as (select * from {{ ref('census_tracts') }})
+parcels as (select * from {{ ref('parcels') }}),
+census_tracts as (select * from {{ ref('census_tracts') }})
select
- census_tracts.census_tract_id
- , sum(parcels.emv_total) as total_value
- , {{ median('parcels.emv_total') }} as median_value
+ census_tracts.census_tract_id,
+ sum(parcels.emv_total) as total_value,
+ {{ median('parcels.emv_total') }} as median_value
from
- census_tracts
- left join parcels using (census_tract_id)
+ census_tracts left join parcels using (census_tract_id)
group by 1
diff --git a/dbt/models/tracts_model/tracts_model__census_tracts.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
index f00225a4..a2c31c40 100644
--- a/dbt/models/tracts_model/tracts_model__census_tracts.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -5,20 +5,16 @@
}}
with
-in_city_boundary as (select * from {{ ref('census_tracts_in_city_boundary') }})
-, housing_units as (select * from {{ ref('census_tracts_housing_units') }})
+housing_units as (select * from {{ ref('census_tracts_housing_units') }})
, property_values as (select * from {{ ref('census_tracts_property_values') }})
, distance_to_transit as (select * from {{ ref('census_tracts_distance_to_transit') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
, demographics as (select * from {{ ref('demographics') }})
-, downtown as (select * from {{ ref('downtown') }})
, census_tracts as (
select *
- from {{ ref('census_tracts') }}
- where
- year_ <= 2020
- and census_tract_id in (select census_tract_id from in_city_boundary)
+ from {{ ref('census_tracts_in_city_boundary') }}
+ where year_ <= 2020
)
-- Demographic data
diff --git a/dbt/models/tracts_model/tracts_model__parcels.sql b/dbt/models/tracts_model/tracts_model__parcels.sql
index c2f1eecb..92e471eb 100644
--- a/dbt/models/tracts_model/tracts_model__parcels.sql
+++ b/dbt/models/tracts_model/tracts_model__parcels.sql
@@ -8,9 +8,9 @@ with
parcels_parking_limits as (select * from {{ ref('parcels_parking_limits') }}),
parcels_distance_to_transit as (select * from {{ ref('parcels_distance_to_transit') }}),
parcels as (select * from {{ ref('parcels') }}),
-census_tracts as (select * from {{ ref('census_tracts') }})
+census_tracts as (select * from {{ ref('census_tracts_in_city_boundary') }})
select
- parcels.pin,
+ parcels.*,
census_tracts.census_tract,
census_tracts.year_,
parcels_distance_to_transit.distance as distance_to_transit,
From cfa9713b9d1594a2545c5abf6f61fcf93e141af4 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 29 Aug 2024 16:33:44 -0400
Subject: [PATCH 104/142] move tract filters to shared view
---
.../intermediate/census_tracts_distance_to_transit.sql | 10 +++-------
.../intermediate/census_tracts_housing_units.sql | 2 +-
.../intermediate/census_tracts_parcel_area.sql | 4 ++--
.../intermediate/census_tracts_parking_limits.sql | 6 +++---
.../intermediate/census_tracts_property_values.sql | 4 ++--
.../intermediate/parcels_distance_to_transit.sql | 3 ++-
.../intermediate/parcels_parking_limits.sql | 5 ++++-
.../tracts_model_int__census_tracts_filtered.sql | 3 +++
.../tracts_model_int__parcels_filtered.sql | 4 ++++
.../tracts_model/tracts_model__census_tracts.sql | 6 +-----
dbt/models/tracts_model/tracts_model__parcels.sql | 6 +++---
11 files changed, 28 insertions(+), 25 deletions(-)
create mode 100644 dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
create mode 100644 dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql b/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
index abe15828..a25c6005 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_distance_to_transit.sql
@@ -1,15 +1,11 @@
with
- parcels_distance_to_transit as (
- select * from {{ ref('parcels_distance_to_transit') }}
- ),
- census_tracts as (select * from {{ ref('census_tracts') }}),
- parcels as (select * from {{ ref('parcels') }})
+parcels_distance_to_transit as (select * from {{ ref('parcels_distance_to_transit') }}),
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }})
select
census_tracts.census_tract_id,
avg(parcels_distance_to_transit.distance) as mean_distance_to_transit,
{{ median('parcels_distance_to_transit.distance') }} as median_distance_to_transit
from
census_tracts
- left join parcels using (census_tract_id)
- left join parcels_distance_to_transit using (parcel_id)
+ left join parcels_distance_to_transit using (census_tract_id)
group by 1
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
index 0b5aa907..e0654c55 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
@@ -1,5 +1,5 @@
with
-census_tracts as (select * from {{ ref('census_tracts') }}),
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
residential_permits as (select * from {{ ref('residential_permits') }}),
residential_permits_to_census_tracts as (
select * from {{ ref('residential_permits_to_census_tracts') }}
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
index d2e9b5d5..cb6760fe 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
@@ -1,6 +1,6 @@
with
-census_tracts as (select * from {{ ref('census_tracts') }}),
-parcels as (select * from {{ ref('parcels') }})
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
+parcels as (select * from {{ ref('tracts_model_int__parcels_filtered') }})
select
census_tract_id,
sum(st_area(parcels.geom)) as parcel_sqm
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql b/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql
index 430e5fd6..cf99bf05 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_parking_limits.sql
@@ -1,8 +1,8 @@
with
-parcels_parking_limits as (select * from {{ ref('parcels_parking_limits') }}),
-parcels as (select * from {{ ref('parcels') }})
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
+parcels_parking_limits as (select * from {{ ref('parcels_parking_limits') }})
select
census_tract_id,
avg(limit_numeric) as mean_limit
-from parcels join parcels_parking_limits using (parcel_id)
+from census_tracts left join parcels_parking_limits using (census_tract_id)
group by census_tract_id
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql b/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
index 60cf69c9..71f8b74a 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_property_values.sql
@@ -1,7 +1,7 @@
-- Median and total parcel property values aggregated by census tract.
with
-parcels as (select * from {{ ref('parcels') }}),
-census_tracts as (select * from {{ ref('census_tracts') }})
+parcels as (select * from {{ ref('tracts_model_int__parcels_filtered') }}),
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }})
select
census_tracts.census_tract_id,
sum(parcels.emv_total) as total_value,
diff --git a/dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql b/dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql
index eb543f29..18cdbf48 100644
--- a/dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql
+++ b/dbt/models/tracts_model/intermediate/parcels_distance_to_transit.sql
@@ -1,7 +1,7 @@
-- This model calculates the distance from each parcel to the nearest high
-- frequency transit line or stop
with
- parcels as (select * from {{ ref('parcels') }})
+ parcels as (select * from {{ ref('tracts_model_int__parcels_filtered') }})
, lines as (select * from {{ ref('high_frequency_transit_lines') }})
, stops as (select * from {{ ref('high_frequency_transit_stops') }})
, lines_and_stops as materialized (
@@ -13,6 +13,7 @@ with
)
select
parcels.parcel_id,
+ parcels.census_tract_id,
st_distance(parcels.geom, lines_and_stops.geom) as distance
from
parcels
diff --git a/dbt/models/tracts_model/intermediate/parcels_parking_limits.sql b/dbt/models/tracts_model/intermediate/parcels_parking_limits.sql
index 3436ae30..aebd7b00 100644
--- a/dbt/models/tracts_model/intermediate/parcels_parking_limits.sql
+++ b/dbt/models/tracts_model/intermediate/parcels_parking_limits.sql
@@ -1,10 +1,11 @@
with
-parcels as (select * from {{ ref('parcels') }}),
+parcels as (select * from {{ ref('tracts_model_int__parcels_filtered') }}),
transit as (select * from {{ ref('high_frequency_transit_lines') }}),
downtown as (select * from {{ ref('downtown') }}),
with_is_downtown as (
select
parcels.parcel_id,
+ parcels.census_tract_id,
parcels.valid,
parcels.geom,
st_intersects(parcels.geom, downtown.geom) as is_downtown
@@ -13,6 +14,7 @@ with_is_downtown as (
with_limit as (
select
parcels.parcel_id,
+ parcels.census_tract_id,
parcels.is_downtown,
case
when parcels.is_downtown then 'eliminated'
@@ -31,6 +33,7 @@ with_limit as (
with_limit_numeric as (
select
parcels.parcel_id,
+ parcels.census_tract_id,
parcels.is_downtown,
parcels.limit_,
case limit_
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
new file mode 100644
index 00000000..7bd1a884
--- /dev/null
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
@@ -0,0 +1,3 @@
+select *
+from {{ ref('census_tracts_in_city_boundary') }}
+where year_ <= 2020
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
new file mode 100644
index 00000000..30fe050c
--- /dev/null
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
@@ -0,0 +1,4 @@
+with
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }})
+select parcels.*
+from {{ ref('parcels') }} join census_tracts using (census_tract_id)
diff --git a/dbt/models/tracts_model/tracts_model__census_tracts.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
index a2c31c40..77131d20 100644
--- a/dbt/models/tracts_model/tracts_model__census_tracts.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -11,11 +11,7 @@ housing_units as (select * from {{ ref('census_tracts_housing_units') }})
, parcel_area as (select * from {{ ref('census_tracts_parcel_area') }})
, parking_limits as (select * from {{ ref('census_tracts_parking_limits') }})
, demographics as (select * from {{ ref('demographics') }})
-, census_tracts as (
- select *
- from {{ ref('census_tracts_in_city_boundary') }}
- where year_ <= 2020
-)
+, census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }})
-- Demographic data
, white as (
diff --git a/dbt/models/tracts_model/tracts_model__parcels.sql b/dbt/models/tracts_model/tracts_model__parcels.sql
index 92e471eb..d11f4605 100644
--- a/dbt/models/tracts_model/tracts_model__parcels.sql
+++ b/dbt/models/tracts_model/tracts_model__parcels.sql
@@ -7,8 +7,8 @@
with
parcels_parking_limits as (select * from {{ ref('parcels_parking_limits') }}),
parcels_distance_to_transit as (select * from {{ ref('parcels_distance_to_transit') }}),
-parcels as (select * from {{ ref('parcels') }}),
-census_tracts as (select * from {{ ref('census_tracts_in_city_boundary') }})
+parcels as (select * from {{ ref('tracts_model_int__parcels_filtered') }}),
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }})
select
parcels.*,
census_tracts.census_tract,
@@ -18,6 +18,6 @@ select
parcels_parking_limits.is_downtown as downtown_yn
from
parcels
+ join census_tracts using (census_tract_id)
join parcels_parking_limits using (parcel_id)
join parcels_distance_to_transit using (parcel_id)
- join census_tracts using (census_tract_id)
From d602f9c7fdfee43853a395c91902ae9c138c8b18 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 29 Aug 2024 16:50:24 -0400
Subject: [PATCH 105/142] filter out data before 2011, because we don't have
demographics
---
.../intermediate/tracts_model_int__census_tracts_filtered.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
index 7bd1a884..28656986 100644
--- a/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
@@ -1,3 +1,3 @@
select *
from {{ ref('census_tracts_in_city_boundary') }}
-where year_ <= 2020
+where 2010 < year_ and year_ <= 2020
From 08cc805840d9c96ea9f91775426b2d097d84661e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 30 Aug 2024 09:48:44 -0400
Subject: [PATCH 106/142] reorganize api and add high frequency transit
---
api/schema.sql | 12 ++++++------
.../api__census_tracts.sql} | 0
.../api__demographics.sql} | 0
.../api/api__high_frequency_transit_lines.sql | 17 +++++++++++++++++
4 files changed, 23 insertions(+), 6 deletions(-)
rename dbt/models/{census_tracts_api.sql => api/api__census_tracts.sql} (100%)
rename dbt/models/{demographics_wide.sql => api/api__demographics.sql} (100%)
create mode 100644 dbt/models/api/api__high_frequency_transit_lines.sql
diff --git a/api/schema.sql b/api/schema.sql
index 3af7fea9..42fd4e4c 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -3,15 +3,15 @@ drop schema if exists api cascade;
create schema api;
create view api.demographics as (
- select * from demographics_wide
+ select * from api__demographics
);
create view api.census_tracts as (
- select
- census_tract,
- year_,
- geom
- from census_tracts_api
+ select * from api__census_tracts
+);
+
+create view api.high_frequency_transit_lines as (
+ select * from api__high_frequency_transit_lines
);
do $$
diff --git a/dbt/models/census_tracts_api.sql b/dbt/models/api/api__census_tracts.sql
similarity index 100%
rename from dbt/models/census_tracts_api.sql
rename to dbt/models/api/api__census_tracts.sql
diff --git a/dbt/models/demographics_wide.sql b/dbt/models/api/api__demographics.sql
similarity index 100%
rename from dbt/models/demographics_wide.sql
rename to dbt/models/api/api__demographics.sql
diff --git a/dbt/models/api/api__high_frequency_transit_lines.sql b/dbt/models/api/api__high_frequency_transit_lines.sql
new file mode 100644
index 00000000..d48fb342
--- /dev/null
+++ b/dbt/models/api/api__high_frequency_transit_lines.sql
@@ -0,0 +1,17 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['valid']}
+ ]
+ )
+}}
+
+select
+ high_frequency_transit_line_id,
+ valid,
+ geom,
+ blue_zone_geom,
+ yellow_zone_geom
+from
+ {{ ref('high_frequency_transit_lines') }}
From 68de90becbe1d6a103b83164a1824af3e1634ae1 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 30 Aug 2024 13:20:17 -0400
Subject: [PATCH 107/142] replace 2020 census tract geometry with 2019
- mirrors the replacement of demographic data
- requires parcels & residential permits to be retagged, since census_tract_id changes
---
dbt/models/census_tracts_in_city_boundary.sql | 1 +
.../census_tracts_housing_units.sql | 22 ++++++++++--
...acts_model_int__census_tracts_filtered.sql | 36 +++++++++++++++++--
.../tracts_model_int__parcels_filtered.sql | 24 +++++++++++--
.../tracts_model__census_tracts.sql | 2 +-
5 files changed, 75 insertions(+), 10 deletions(-)
diff --git a/dbt/models/census_tracts_in_city_boundary.sql b/dbt/models/census_tracts_in_city_boundary.sql
index 18a8d773..5a2955fc 100644
--- a/dbt/models/census_tracts_in_city_boundary.sql
+++ b/dbt/models/census_tracts_in_city_boundary.sql
@@ -6,6 +6,7 @@ with census_tracts as (
)
select
census_tracts.census_tract_id
+ , census_tracts.valid
, census_tracts.census_tract
, census_tracts.year_
, census_tracts.geom
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
index e0654c55..42033743 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_housing_units.sql
@@ -2,11 +2,27 @@ with
census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
residential_permits as (select * from {{ ref('residential_permits') }}),
residential_permits_to_census_tracts as (
- select * from {{ ref('residential_permits_to_census_tracts') }}
+ with
+ residential_permits_tag as (
+ select
+ residential_permit_id as id
+ , daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
+ , geom
+ from residential_permits
+ ),
+ census_tracts_tag as (
+ select census_tract_id as id, valid, geom from census_tracts
+ )
+ select
+ child_id as residential_permit_id,
+ parent_id as census_tract_id,
+ valid,
+ type_
+ from {{ tag_regions("residential_permits_tag", "census_tracts_tag") }}
)
select
- census_tracts.census_tract_id
- , sum(residential_permits.num_units)::int as num_units
+ census_tracts.census_tract_id,
+ sum(residential_permits.num_units)::int as num_units
from
census_tracts
left join residential_permits_to_census_tracts using (census_tract_id)
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
index 28656986..eeb99fcd 100644
--- a/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__census_tracts_filtered.sql
@@ -1,3 +1,33 @@
-select *
-from {{ ref('census_tracts_in_city_boundary') }}
-where 2010 < year_ and year_ <= 2020
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
+-- Consider only tracts in the city boundary, replace 2020 tracts with 2019
+-- tracts, and regenerate the surrogate key.
+with census_tracts_in_city_boundary as (
+ select *
+ from {{ ref('census_tracts_in_city_boundary') }}
+ where 2010 < year_ and year_ < 2020
+),
+census_tracts_union as (
+select census_tract, year_, valid, geom from census_tracts_in_city_boundary
+union all
+select
+ census_tract,
+ 2020 as year_,
+ '[2020-01-01,2021-01-01)'::daterange as valid,
+ geom
+from census_tracts_in_city_boundary where year_ = 2019
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['census_tract', 'year_']) }} as census_tract_id,
+ census_tract,
+ year_,
+ valid,
+ geom
+from census_tracts_union
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
index 30fe050c..f14ca0fc 100644
--- a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
@@ -1,4 +1,22 @@
+{{
+ config(
+ materialized='table'
+ )
+}}
+
+-- Retag parcels with census tracts (because we replaced the 2020 tracts with the 2019 tracts)
with
-census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }})
-select parcels.*
-from {{ ref('parcels') }} join census_tracts using (census_tract_id)
+census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
+parcels as (select * from {{ ref('parcels_base') }}),
+
+parcels_tag as (select parcel_id as id, valid, geom from parcels),
+census_tracts_tag as (select census_tract_id as id, valid, geom from census_tracts),
+parcels_to_census_tracts as (
+ select
+ child_id as parcel_id,
+ parent_id as census_tract_id
+ from {{ tag_regions("parcels_tag", "census_tracts_tag") }}
+)
+
+select parcels.*, parcels_to_census_tracts.census_tract_id
+from parcels join parcels_to_census_tracts using (parcel_id)
diff --git a/dbt/models/tracts_model/tracts_model__census_tracts.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
index 77131d20..dea70ec5 100644
--- a/dbt/models/tracts_model/tracts_model__census_tracts.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -56,7 +56,7 @@ from
inner join distance_to_transit using (census_tract_id)
inner join parcel_area using (census_tract_id)
inner join parking_limits using (census_tract_id)
- inner join segregation using (census_tract, year_)
+ left join segregation using (census_tract, year_)
left join white_frac using (census_tract, year_)
left join income using (census_tract, year_)
)
From 9da68434ad17d6760baebd766182d6c4076be196 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 30 Aug 2024 15:18:45 -0400
Subject: [PATCH 108/142] fix srid for api
---
dbt/models/api/api__high_frequency_transit_lines.sql | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/dbt/models/api/api__high_frequency_transit_lines.sql b/dbt/models/api/api__high_frequency_transit_lines.sql
index d48fb342..3e445e5b 100644
--- a/dbt/models/api/api__high_frequency_transit_lines.sql
+++ b/dbt/models/api/api__high_frequency_transit_lines.sql
@@ -10,8 +10,8 @@
select
high_frequency_transit_line_id,
valid,
- geom,
- blue_zone_geom,
- yellow_zone_geom
+ st_transform(geom, 4269) as geom,
+ st_transform(blue_zone_geom, 4269) as blue_zone_geom,
+ st_transform(yellow_zone_geom, 4269) as yellow_zone_geom
from
{{ ref('high_frequency_transit_lines') }}
From 2de2b05d2111001201e1da99796f4a9b51f90680 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 10:46:03 -0400
Subject: [PATCH 109/142] type conversion
---
dbt/models/tracts_model/tracts_model__census_tracts.sql | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/dbt/models/tracts_model/tracts_model__census_tracts.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
index dea70ec5..8d584472 100644
--- a/dbt/models/tracts_model/tracts_model__census_tracts.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -37,15 +37,15 @@ housing_units as (select * from {{ ref('census_tracts_housing_units') }})
, raw_data as (
select
- census_tracts.census_tract::numeric
- , census_tracts.year_ as "year"
+ census_tracts.census_tract::bigint
+ , census_tracts.year_::smallint as "year"
, coalesce(housing_units.num_units, 0) as housing_units
, property_values.total_value
, property_values.median_value
, distance_to_transit.median_distance_to_transit as median_distance
, distance_to_transit.mean_distance_to_transit as mean_distance
, parcel_area.parcel_sqm
- , parking_limits.mean_limit
+ , parking_limits.mean_limit::double precision
, white_frac.value_ as white
, income.value_ as income
, segregation.value_ as segregation
@@ -62,7 +62,7 @@ from
)
, with_std as (
select
- census_tract::numeric
+ census_tract
, {{ standardize_cat(['year']) }}
, {{ standardize_cont(['housing_units', 'total_value', 'median_value',
'median_distance', 'mean_distance', 'parcel_sqm',
From affab56961d585b1ba5875488d85e5c88833b7d0 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:00:33 -0400
Subject: [PATCH 110/142] simplify acs models
---
dbt/models/acs_block_group.sql | 19 ++++--------
dbt/models/acs_block_group_clean.sql | 10 -------
dbt/models/acs_tract.sql | 15 ++++------
dbt/models/acs_tract_clean.sql | 20 -------------
dbt/models/acs_tract_wide.sql | 45 ----------------------------
5 files changed, 11 insertions(+), 98 deletions(-)
delete mode 100644 dbt/models/acs_block_group_clean.sql
delete mode 100644 dbt/models/acs_tract_clean.sql
delete mode 100644 dbt/models/acs_tract_wide.sql
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index 37d6f96e..ea77a2b4 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -2,21 +2,14 @@
config(
materialized='table',
indexes = [
- {'columns': ['census_block_group_id', 'year_', 'name_'], 'unique': true},
+ {'columns': ['census_block_group', 'year_', 'name_'], 'unique': true},
]
)
}}
-with
-census_block_groups as (select * from {{ ref('census_block_groups') }})
-, acs_bg as (select * from {{ ref('acs_block_group_clean') }})
select
- census_block_groups.census_block_group_id
- , acs_bg.year_
- , acs_bg.name_
- , acs_bg.value_
-from
- acs_bg
- inner join census_block_groups using (statefp, countyfp, tractce, blkgrpce)
-where
- to_date(acs_bg.year_::text , 'YYYY') <@ census_block_groups.valid
+ year::smallint as year_,
+ code as name_,
+ statefp || countyfp || tractce || blkgrpce as census_block_group,
+ case when "value" < 0 then null else "value" end as value_
+from {{ source('minneapolis', 'acs_bg_raw') }}
diff --git a/dbt/models/acs_block_group_clean.sql b/dbt/models/acs_block_group_clean.sql
deleted file mode 100644
index 22cf94e4..00000000
--- a/dbt/models/acs_block_group_clean.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-select
- statefp
- , countyfp
- , tractce
- , blkgrpce
- , year as year_
- , code as name_
- , case when "value" < 0 then null else "value" end as value_
-from
- {{ source('minneapolis', 'acs_bg_raw') }}
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index 96fc0a02..3a4d1b74 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -7,14 +7,9 @@
)
}}
-with
-census_tracts as (select * from {{ ref("census_tracts") }})
-, acs_tract as (select * from {{ ref('acs_tract_clean') }})
select
- census_tract
- , acs_tract.year_
- , acs_tract.name_
- , acs_tract.value_
-from
- acs_tract
- inner join census_tracts using (statefp, countyfp, tractce, year_)
+ year::smallint as year_,
+ code as name_,
+ statefp || countyfp || tractce as census_tract,
+ case when "value" < 0 then null else "value" end as value_
+from {{ source('minneapolis', 'acs_tract_raw') }}
diff --git a/dbt/models/acs_tract_clean.sql b/dbt/models/acs_tract_clean.sql
deleted file mode 100644
index 1c631ff4..00000000
--- a/dbt/models/acs_tract_clean.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-with
-acs_tract_raw as (
- select
- statefp
- , countyfp
- , tractce
- , year
- , code
- , value
- from {{ source('minneapolis', 'acs_tract_raw') }}
-)
-select
- statefp
- , countyfp
- , tractce
- , year as year_
- , code as name_
- , case when "value" < 0 then null else "value" end as value_
-from
- acs_tract_raw
diff --git a/dbt/models/acs_tract_wide.sql b/dbt/models/acs_tract_wide.sql
deleted file mode 100644
index 543c38e7..00000000
--- a/dbt/models/acs_tract_wide.sql
+++ /dev/null
@@ -1,45 +0,0 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['description']}
- ]
- )
-}}
-
-{% set years = range(2013, 2023) %}
-
-with
-acs_tract as (select * from {{ ref('acs_tract') }})
-, acs_variables as (select * from {{ ref('acs_variables') }})
-, census_tracts as (select * from {{ ref('census_tracts_in_city_boundary') }})
-, acs_tract_filtered as (
- select acs_tract.*, description
- from acs_tract
- inner join census_tracts using (census_tract, year_)
- inner join acs_variables on acs_tract.name_ = acs_variables.variable
-)
-, distinct_tracts_and_variables as (
- select distinct
- census_tract
- , name_
- , description
- from acs_tract_filtered
-)
-select
- description
- , census_tract as tract_id
-{% for year_ in years %}
- , "{{ year_ }}"
-{% endfor %}
-from distinct_tracts_and_variables
-{% for year_ in years %}
-left join
-(select
- census_tract
- , name_
- , value_ as "{{ year_}}"
-from acs_tract_filtered
-where year_ = {{ year_ }})
-using (census_tract, name_)
-{% endfor %}
From cea3777bbdf36642b94aad6e182066645c69109b Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:11:29 -0400
Subject: [PATCH 111/142] simplify zip_codes
---
dbt/models/all_zip_codes.sql | 20 --------------------
dbt/models/zip_codes.sql | 30 ++++++++++++++++--------------
2 files changed, 16 insertions(+), 34 deletions(-)
delete mode 100644 dbt/models/all_zip_codes.sql
diff --git a/dbt/models/all_zip_codes.sql b/dbt/models/all_zip_codes.sql
deleted file mode 100644
index ac438099..00000000
--- a/dbt/models/all_zip_codes.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-with
-zip_codes as (
-select
- zip_code,
- '[2020-01-01,)'::daterange as valid,
- geom
-from {{ ref('all_zip_codes_2020') }}
-union all
-select
- zip_code,
- '[,2020-01-01)'::daterange as valid,
- geom
-from {{ ref('all_zip_codes_2010') }}
-)
-select
- {{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id
- , zip_code
- , valid
- , geom
-from zip_codes
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 77d9ddd3..e218218a 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -8,19 +8,21 @@
)
}}
-with city_boundary as (
- select
- geom
- from
- {{ ref('city_boundary') }}
+with
+zip_codes as (
+select
+ zip_code,
+ '[2020-01-01,)'::daterange as valid,
+ geom
+from {{ ref('all_zip_codes_2020') }}
+union all
+select
+ zip_code,
+ '[,2020-01-01)'::daterange as valid,
+ geom
+from {{ ref('all_zip_codes_2010') }}
)
select
- all_zip_codes.zip_code_id
- , all_zip_codes.zip_code
- , all_zip_codes.valid
- , all_zip_codes.geom
-from
- {{ ref('all_zip_codes') }} as all_zip_codes,
- city_boundary
-where
- st_intersects(all_zip_codes.geom, city_boundary.geom)
+ {{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id,
+ zip_codes.*
+from zip_codes
From 80e1fb26b1e41ba7857357612acac9dbf4ca65c6 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:40:15 -0400
Subject: [PATCH 112/142] document acs data
---
dbt/models/acs_block_group.sql | 11 +++++++++++
dbt/models/acs_tract.sql | 11 +++++++++++
2 files changed, 22 insertions(+)
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index ea77a2b4..a323d31c 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -7,6 +7,17 @@
)
}}
+{% docs acs_block_group %}
+
+Contains American Community Survey (ACS) demographic data at a census block
+group granularity.
+
+The `name_` column contains the name of the demographic variable (e.g.
+`B03002_003E`). See `acs_variables` for a mapping of these codes to
+human-readable names.
+
+{% enddocs %}
+
select
year::smallint as year_,
code as name_,
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index 3a4d1b74..ae113e66 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -7,6 +7,17 @@
)
}}
+{% docs acs_tract %}
+
+Contains American Community Survey (ACS) demographic data at a census tract
+granularity.
+
+The `name_` column contains the name of the demographic variable (e.g.
+`B03002_003E`). See `acs_variables` for a mapping of these codes to
+human-readable names.
+
+{% enddocs %}
+
select
year::smallint as year_,
code as name_,
From 1b4878532ff7f6f0728a8430505f1399c11bbc4c Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:40:45 -0400
Subject: [PATCH 113/142] document and reorganize zip code models
---
.../stg_zip_codes_2010.sql} | 0
.../stg_zip_codes_2020.sql} | 0
dbt/models/zip_codes.sql | 11 +++++++++--
3 files changed, 9 insertions(+), 2 deletions(-)
rename dbt/models/{all_zip_codes_2010.sql => staging/stg_zip_codes_2010.sql} (100%)
rename dbt/models/{all_zip_codes_2020.sql => staging/stg_zip_codes_2020.sql} (100%)
diff --git a/dbt/models/all_zip_codes_2010.sql b/dbt/models/staging/stg_zip_codes_2010.sql
similarity index 100%
rename from dbt/models/all_zip_codes_2010.sql
rename to dbt/models/staging/stg_zip_codes_2010.sql
diff --git a/dbt/models/all_zip_codes_2020.sql b/dbt/models/staging/stg_zip_codes_2020.sql
similarity index 100%
rename from dbt/models/all_zip_codes_2020.sql
rename to dbt/models/staging/stg_zip_codes_2020.sql
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index e218218a..623ab23f 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -8,19 +8,26 @@
)
}}
+{% docs zip_codes %}
+
+Contains the geometry and metadata for all zip code tabulation areas (ZCTAs) in
+the United States.
+
+{% enddocs %}
+
with
zip_codes as (
select
zip_code,
'[2020-01-01,)'::daterange as valid,
geom
-from {{ ref('all_zip_codes_2020') }}
+from {{ ref('stg_zip_codes_2020') }}
union all
select
zip_code,
'[,2020-01-01)'::daterange as valid,
geom
-from {{ ref('all_zip_codes_2010') }}
+from {{ ref('stg_zip_codes_2010') }}
)
select
{{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id,
From 489cbaafd2796e7d000b64a902555917d7705a3f Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:41:24 -0400
Subject: [PATCH 114/142] document and reorganize residential permit model
---
dbt/models/residential_permits.sql | 46 +++++++++----------
.../residential_permits_to_census_tracts.sql | 31 -------------
.../staging/stg_residential_permits.sql | 25 ++++++++++
.../stg_residential_permits_to_parcels.sql} | 10 ----
4 files changed, 48 insertions(+), 64 deletions(-)
delete mode 100644 dbt/models/residential_permits_to_census_tracts.sql
create mode 100644 dbt/models/staging/stg_residential_permits.sql
rename dbt/models/{residential_permits_to_parcels.sql => staging/stg_residential_permits_to_parcels.sql} (75%)
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 6f994a0a..181af2b5 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -8,28 +8,28 @@
)
}}
+{% docs residential_permits %}
+
+Contains residential building permit applications.
+
+Notes:
+ - Permits are filtered to only include those in Minneapolis.
+ - `square_feet` is treated as missing if it is 0.
+ - `permit_value` is treated as missing if it is 0.
+
+{% enddocs %}
+
+with
+stg_residential_permits as (select * from {{ ref('stg_residential_permits') }}),
+stg_residential_permits_to_parcels as (select * from {{ ref('stg_residential_permits_to_parcels') }}),
+parcels as (select * from {{ ref('parcels') }})
select
- sde_id::int as residential_permit_id
- , year::int as year_
- , tenure::text
- , housing_ty::text as housing_type
- , res_permit::text as permit_type
- , address::text
- , name::text as name_
- , buildings::int as num_buildings
- , units::int as num_units
- , age_restri::int as num_age_restricted_units
- , memory_car::int as num_memory_care_units
- , assisted::int as num_assisted_living_units
- , com_off_re = 'Y' as is_commercial_and_residential
- , nullif(sqf, 0)::int as square_feet
- , public_fun = 'Y' as is_public_funded
- , nullif(permit_val, 0)::int as permit_value
- , community_::text as community_designation
- , notes::text
- , st_transform(geom, {{ var("srid") }}) as geom
+ stg_residential_permits.*,
+ stg_residential_permits_to_parcels.parcel_id,
+ parcels.census_block_group_id,
+ parcels.census_tract_id,
+ parcels.zip_code_id
from
- {{ source('minneapolis', 'residential_permits_residentialpermits') }}
-where
- co_code = '053'
- and lower(ctu_name) = 'minneapolis'
+ stg_residential_permits
+ left join stg_residential_permits_to_parcels using residential_permit_id
+ left join parcels using parcel_id
diff --git a/dbt/models/residential_permits_to_census_tracts.sql b/dbt/models/residential_permits_to_census_tracts.sql
deleted file mode 100644
index 79a48be4..00000000
--- a/dbt/models/residential_permits_to_census_tracts.sql
+++ /dev/null
@@ -1,31 +0,0 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['residential_permit_id']},
- {'columns': ['census_tract_id']}
- ]
- )
-}}
-
-with
-residential_permits as (
- select
- residential_permit_id as id
- , daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
- , geom
- from {{ ref("residential_permits") }}
-)
-, census_tracts as (
- select
- census_tract_id as id
- , valid
- , geom
- from {{ ref("census_tracts") }}
-)
-select
- child_id as residential_permit_id
- , parent_id as census_tract_id
- , valid
- , type_
-from {{ tag_regions("residential_permits", "census_tracts") }}
diff --git a/dbt/models/staging/stg_residential_permits.sql b/dbt/models/staging/stg_residential_permits.sql
new file mode 100644
index 00000000..c6788cc4
--- /dev/null
+++ b/dbt/models/staging/stg_residential_permits.sql
@@ -0,0 +1,25 @@
+select
+ sde_id::int as residential_permit_id
+ , year::smallint as year_
+ , tenure::text
+ , housing_ty::text as housing_type
+ , res_permit::text as permit_type
+ , address::text
+ , name::text as name_
+ , buildings::smallint as num_buildings
+ , units::smallint as num_units
+ , age_restri::smallint as num_age_restricted_units
+ , memory_car::smallint as num_memory_care_units
+ , assisted::smallint as num_assisted_living_units
+ , com_off_re = 'Y' as is_commercial_and_residential
+ , nullif(sqf, 0)::int as square_feet
+ , public_fun = 'Y' as is_public_funded
+ , nullif(permit_val, 0)::int as permit_value
+ , community_::text as community_designation
+ , notes::text
+ , st_transform(geom, {{ var("srid") }}) as geom
+from
+ {{ source('minneapolis', 'residential_permits_residentialpermits') }}
+where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/residential_permits_to_parcels.sql b/dbt/models/staging/stg_residential_permits_to_parcels.sql
similarity index 75%
rename from dbt/models/residential_permits_to_parcels.sql
rename to dbt/models/staging/stg_residential_permits_to_parcels.sql
index daedfab1..2b00cbf0 100644
--- a/dbt/models/residential_permits_to_parcels.sql
+++ b/dbt/models/staging/stg_residential_permits_to_parcels.sql
@@ -1,13 +1,3 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['residential_permit_id']},
- {'columns': ['parcel_id']}
- ]
- )
-}}
-
with
residential_permits as (
select
From d263aae647dce386eeb69e21c18ddce69de70e9a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:45:29 -0400
Subject: [PATCH 115/142] reorganize and document commercial permits
---
dbt/models/commercial_permits.sql | 38 +++++++++++--------
dbt/models/staging/stg_commercial_permits.sql | 18 +++++++++
.../stg_commercial_permits_to_parcels.sql} | 10 -----
3 files changed, 40 insertions(+), 26 deletions(-)
create mode 100644 dbt/models/staging/stg_commercial_permits.sql
rename dbt/models/{commercial_permits_to_parcels.sql => staging/stg_commercial_permits_to_parcels.sql} (75%)
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index 4687eb30..2db5d798 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -8,21 +8,27 @@
)
}}
+{% docs commercial_permits %}
+
+Contains commercial building permit applications.
+
+Notes:
+ - Permits are filtered to only include those in Minneapolis.
+ - `square_feet` is treated as missing if it is 0.
+
+{% enddocs %}
+
+with
+stg_commercial_permits as (select * from {{ ref('stg_commercial_permits') }}),
+stg_commercial_permits_to_parcels as (select * from {{ ref('stg_commercial_permits_to_parcels') }}),
+parcels as (select * from {{ ref('parcels') }})
select
- sde_id as commercial_permit_id
- , year::int as year_
- , nonres_gro::text as group_
- , nonres_sub::text as subgroup
- , nonres_typ::text as type_category
- , bldg_name::text as building_name
- , bldg_desc::text as building_description
- , permit_typ::text as permit_type
- , permit_val::int as permit_value
- , nullif(sqf, 0)::int as square_feet
- , address::text
- , st_transform(geom, {{ var("srid") }}) as geom
+ stg_commercial_permits.*,
+ stg_commercial_permits_to_parcels.parcel_id,
+ parcels.census_block_group_id,
+ parcels.census_tract_id,
+ parcels.zip_code_id
from
- {{ source('minneapolis', 'commercial_permits_nonresidentialconstruction') }}
- where
- co_code = '053'
- and lower(ctu_name) = 'minneapolis'
+ stg_commercial_permits
+ left join stg_commercial_permits_to_parcels using commercial_permit_id
+ left join parcels using parcel_id
diff --git a/dbt/models/staging/stg_commercial_permits.sql b/dbt/models/staging/stg_commercial_permits.sql
new file mode 100644
index 00000000..af5aec34
--- /dev/null
+++ b/dbt/models/staging/stg_commercial_permits.sql
@@ -0,0 +1,18 @@
+select
+ sde_id as commercial_permit_id
+ , year::smallint as year_
+ , nonres_gro::text as group_
+ , nonres_sub::text as subgroup
+ , nonres_typ::text as type_category
+ , bldg_name::text as building_name
+ , bldg_desc::text as building_description
+ , permit_typ::text as permit_type
+ , permit_val::int as permit_value
+ , nullif(sqf, 0)::int as square_feet
+ , address::text
+ , st_transform(geom, {{ var("srid") }}) as geom
+from
+ {{ source('minneapolis', 'commercial_permits_nonresidentialconstruction') }}
+ where
+ co_code = '053'
+ and lower(ctu_name) = 'minneapolis'
diff --git a/dbt/models/commercial_permits_to_parcels.sql b/dbt/models/staging/stg_commercial_permits_to_parcels.sql
similarity index 75%
rename from dbt/models/commercial_permits_to_parcels.sql
rename to dbt/models/staging/stg_commercial_permits_to_parcels.sql
index b74a47f4..fc619f42 100644
--- a/dbt/models/commercial_permits_to_parcels.sql
+++ b/dbt/models/staging/stg_commercial_permits_to_parcels.sql
@@ -1,13 +1,3 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['commercial_permit_id']},
- {'columns': ['parcel_id']}
- ]
- )
-}}
-
with
commercial_permits as (
select
From f5ae90c2e0423ed0806e96400229061fd58ac4ce Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 16:48:17 -0400
Subject: [PATCH 116/142] reorganize fair market rents
---
dbt/models/fair_market_rents.sql | 17 ++++++++++++++++-
.../stg_fair_market_rents_union.sql} | 0
2 files changed, 16 insertions(+), 1 deletion(-)
rename dbt/models/{fair_market_rents_union.sql => staging/stg_fair_market_rents_union.sql} (100%)
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index 847d19e1..71fe57e1 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -1,8 +1,23 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['zip_code_id', 'year_', 'num_bedrooms'], 'unique': true}
+ ]
+ )
+}}
+
{% set num_bedrooms = range(0, 5) %}
+{% doc fair_market_rents %}
+
+Contains fair market rent data for different numbers of bedrooms by zip code.
+
+{% enddoc %}
+
with
zip_codes as (select * from {{ ref('zip_codes') }})
-, fair_market_rents as (select * from {{ ref('fair_market_rents_union') }})
+, fair_market_rents as (select * from {{ ref('stg_fair_market_rents_union') }})
, fmr_zip as (
select
zip_codes.zip_code_id
diff --git a/dbt/models/fair_market_rents_union.sql b/dbt/models/staging/stg_fair_market_rents_union.sql
similarity index 100%
rename from dbt/models/fair_market_rents_union.sql
rename to dbt/models/staging/stg_fair_market_rents_union.sql
From 9a84d87007632d9824a2daad743b507260872ee7 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 17:00:14 -0400
Subject: [PATCH 117/142] document high frequency transit lines
---
dbt/models/high_frequency_transit_lines.sql | 12 +++++++++++-
.../stg_high_frequency_transit_lines_union.sql} | 0
2 files changed, 11 insertions(+), 1 deletion(-)
rename dbt/models/{high_frequency_transit_lines_union.sql => staging/stg_high_frequency_transit_lines_union.sql} (100%)
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
index 34d1238a..d59af114 100644
--- a/dbt/models/high_frequency_transit_lines.sql
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -8,7 +8,17 @@
)
}}
-with lines as (select * from {{ ref('high_frequency_transit_lines_union') }})
+{% doc high_frequency_transit_lines %}
+
+Contains the geometry and metadata for high frequency transit lines in the city of Minneapolis.
+
+Notes:
+- `blue_zone_geom` is a 350 foot buffer around both lines and stops.
+- `yellow_zone_geom` is a quarter mile buffer around lines and a half mile buffer around stops.
+
+{% enddoc %}
+
+with lines as (select * from {{ ref('stg_high_frequency_transit_lines_union') }})
, stops as (select * from {{ ref('high_frequency_transit_stops') }})
, lines_and_stops as (
select
diff --git a/dbt/models/high_frequency_transit_lines_union.sql b/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
similarity index 100%
rename from dbt/models/high_frequency_transit_lines_union.sql
rename to dbt/models/staging/stg_high_frequency_transit_lines_union.sql
From ab2eb4346789f0c9ee39031917b7de68c2b0e67a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 17:13:50 -0400
Subject: [PATCH 118/142] reorganize parcels
---
dbt/models/parcels.sql | 20 ++++++++++++++++---
.../stg_parcels.sql} | 10 ----------
.../stg_parcels_to_census_block_groups.sql} | 14 ++-----------
.../stg_parcels_to_zip_codes.sql} | 12 +----------
4 files changed, 20 insertions(+), 36 deletions(-)
rename dbt/models/{parcels_base.sql => staging/stg_parcels.sql} (88%)
rename dbt/models/{parcels_to_census_block_groups.sql => staging/stg_parcels_to_census_block_groups.sql} (56%)
rename dbt/models/{parcels_to_zip_codes.sql => staging/stg_parcels_to_zip_codes.sql} (61%)
diff --git a/dbt/models/parcels.sql b/dbt/models/parcels.sql
index 12a48c54..835262d0 100644
--- a/dbt/models/parcels.sql
+++ b/dbt/models/parcels.sql
@@ -8,10 +8,24 @@
)
}}
+{% doc parcels %}
+
+Contains the geometry and metadata for all parcels in the city of Minneapolis.
+
+Notes:
+- Parcels data is released yearly. Parcels are considered valid for the year they were released.
+- Parcels are filtered to only include those in Minneapolis.
+- `emv_total`, `emv_bldg`, `emv_land`, `year_built`, and `sale_value` are treated as missing if they are 0.
+- `sale_date` is treated as missing if it is equal to `1899-12-30`.
+- `pin` is the county-assigned parcel identification number. The county prefix '053-' is removed.
+- Duplicate rows are removed. Note that this is based on the entire row, not just the `pin`. There may still be duplicate `pin, year_` pairs.
+
+{% enddoc %}
+
with
-parcels as (select * from {{ ref('parcels_base') }}),
-to_zip_codes as (select * from {{ref('parcels_to_zip_codes')}}),
-to_census_bgs as (select * from {{ref('parcels_to_census_block_groups')}}),
+parcels as (select * from {{ ref('stg_parcels') }}),
+to_zip_codes as (select * from {{ref('stg_parcels_to_zip_codes')}}),
+to_census_bgs as (select * from {{ref('stg_parcels_to_census_block_groups')}}),
census_bgs as (select * from {{ref('census_block_groups')}})
select
parcels.*
diff --git a/dbt/models/parcels_base.sql b/dbt/models/staging/stg_parcels.sql
similarity index 88%
rename from dbt/models/parcels_base.sql
rename to dbt/models/staging/stg_parcels.sql
index d4b0a7c9..9ffc665e 100644
--- a/dbt/models/parcels_base.sql
+++ b/dbt/models/staging/stg_parcels.sql
@@ -1,13 +1,3 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['parcel_id'], 'unique': true},
- {'columns': ['valid', 'geom'], 'type': 'gist'}
- ]
- )
-}}
-
{% set years = range(2002, 2024) %}
{% set city = 'MINNEAPOLIS' %}
{% set county_id = '053' %}
diff --git a/dbt/models/parcels_to_census_block_groups.sql b/dbt/models/staging/stg_parcels_to_census_block_groups.sql
similarity index 56%
rename from dbt/models/parcels_to_census_block_groups.sql
rename to dbt/models/staging/stg_parcels_to_census_block_groups.sql
index bb6cc212..39f51d45 100644
--- a/dbt/models/parcels_to_census_block_groups.sql
+++ b/dbt/models/staging/stg_parcels_to_census_block_groups.sql
@@ -1,27 +1,17 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['parcel_id'], 'unique': true},
- {'columns': ['census_block_group_id']}
- ]
- )
-}}
-
with
parcels as (
select
parcel_id as id
, valid
, geom
- from {{ ref("parcels_base") }}
+ from {{ ref('stg_parcels_base') }}
),
census_block_groups as (
select
census_block_group_id as id
, valid
, geom
- from {{ ref("census_block_groups") }}
+ from {{ ref('census_block_groups') }}
)
select
child_id as parcel_id
diff --git a/dbt/models/parcels_to_zip_codes.sql b/dbt/models/staging/stg_parcels_to_zip_codes.sql
similarity index 61%
rename from dbt/models/parcels_to_zip_codes.sql
rename to dbt/models/staging/stg_parcels_to_zip_codes.sql
index 6a045300..964215bb 100644
--- a/dbt/models/parcels_to_zip_codes.sql
+++ b/dbt/models/staging/stg_parcels_to_zip_codes.sql
@@ -1,20 +1,10 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['parcel_id'], 'unique': true},
- {'columns': ['zip_code_id']}
- ]
- )
-}}
-
with
parcels as (
select
parcel_id as id
, valid
, geom
- from {{ ref("parcels_base") }}
+ from {{ ref("stg_parcels_base") }}
),
zip_codes as (
select
From 0bda2c779f17f168b21e1410451d610bb392c752 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Tue, 3 Sep 2024 17:59:21 -0400
Subject: [PATCH 119/142] finish reorganization
---
dbt/models/acs_block_group.sql | 11 --
dbt/models/acs_tract.sql | 11 --
dbt/models/commercial_permits.sql | 10 --
dbt/models/docs.md | 121 ++++++++++++++++++
dbt/models/fair_market_rents.sql | 6 -
dbt/models/high_frequency_transit_lines.sql | 10 --
dbt/models/parcels.sql | 14 --
dbt/models/parking.sql | 50 ++++----
dbt/models/residential_permits.sql | 11 --
dbt/models/schema.yml | 115 +++--------------
dbt/models/segregation_indexes.sql | 7 -
dbt/models/staging/schema.yml | 14 ++
.../stg_commercial_permits_to_parcels.sql | 2 +-
.../stg_parcels_to_census_block_groups.sql | 2 +-
.../staging/stg_parcels_to_zip_codes.sql | 2 +-
dbt/models/staging/stg_parking.sql | 15 +++
.../stg_parking_to_parcels.sql} | 14 +-
.../stg_residential_permits_to_parcels.sql | 2 +-
.../stg_usps_migration_union.sql} | 0
.../tracts_model_int__parcels_filtered.sql | 2 +-
dbt/models/usps_migration.sql | 2 +-
dbt/models/zip_codes.sql | 7 -
22 files changed, 198 insertions(+), 230 deletions(-)
create mode 100644 dbt/models/docs.md
create mode 100644 dbt/models/staging/schema.yml
create mode 100644 dbt/models/staging/stg_parking.sql
rename dbt/models/{parking_to_parcels.sql => staging/stg_parking_to_parcels.sql} (61%)
rename dbt/models/{usps_migration_union.sql => staging/stg_usps_migration_union.sql} (100%)
diff --git a/dbt/models/acs_block_group.sql b/dbt/models/acs_block_group.sql
index a323d31c..ea77a2b4 100644
--- a/dbt/models/acs_block_group.sql
+++ b/dbt/models/acs_block_group.sql
@@ -7,17 +7,6 @@
)
}}
-{% docs acs_block_group %}
-
-Contains American Community Survey (ACS) demographic data at a census block
-group granularity.
-
-The `name_` column contains the name of the demographic variable (e.g.
-`B03002_003E`). See `acs_variables` for a mapping of these codes to
-human-readable names.
-
-{% enddocs %}
-
select
year::smallint as year_,
code as name_,
diff --git a/dbt/models/acs_tract.sql b/dbt/models/acs_tract.sql
index ae113e66..3a4d1b74 100644
--- a/dbt/models/acs_tract.sql
+++ b/dbt/models/acs_tract.sql
@@ -7,17 +7,6 @@
)
}}
-{% docs acs_tract %}
-
-Contains American Community Survey (ACS) demographic data at a census tract
-granularity.
-
-The `name_` column contains the name of the demographic variable (e.g.
-`B03002_003E`). See `acs_variables` for a mapping of these codes to
-human-readable names.
-
-{% enddocs %}
-
select
year::smallint as year_,
code as name_,
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index 2db5d798..58961e23 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -8,16 +8,6 @@
)
}}
-{% docs commercial_permits %}
-
-Contains commercial building permit applications.
-
-Notes:
- - Permits are filtered to only include those in Minneapolis.
- - `square_feet` is treated as missing if it is 0.
-
-{% enddocs %}
-
with
stg_commercial_permits as (select * from {{ ref('stg_commercial_permits') }}),
stg_commercial_permits_to_parcels as (select * from {{ ref('stg_commercial_permits_to_parcels') }}),
diff --git a/dbt/models/docs.md b/dbt/models/docs.md
new file mode 100644
index 00000000..8e0bafd8
--- /dev/null
+++ b/dbt/models/docs.md
@@ -0,0 +1,121 @@
+{% docs commercial_permits %}
+
+Contains commercial building permit applications.
+
+Notes:
+ - Permits are filtered to only include those in Minneapolis.
+ - `square_feet` is treated as missing if it is 0.
+
+{% enddocs %}
+
+{% docs residential_permits %}
+
+Contains residential building permit applications.
+
+Notes:
+ - Permits are filtered to only include those in Minneapolis.
+ - `square_feet` is treated as missing if it is 0.
+ - `permit_value` is treated as missing if it is 0.
+
+{% enddocs %}
+
+{% docs zip_codes %}
+
+Contains the geometry and metadata for all zip code tabulation areas (ZCTAs) in
+the United States.
+
+{% enddocs %}
+
+{% docs parcels %}
+
+Contains the geometry and metadata for all parcels in the city of Minneapolis.
+
+Notes:
+- Parcels data is released yearly. Parcels are considered valid for the year they were released.
+- Parcels are filtered to only include those in Minneapolis.
+- `emv_total`, `emv_bldg`, `emv_land`, `year_built`, and `sale_value` are treated as missing if they are 0.
+- `sale_date` is treated as missing if it is equal to `1899-12-30`.
+- `pin` is the county-assigned parcel identification number. The county prefix '053-' is removed.
+- Duplicate rows are removed. Note that this is based on the entire row, not just the `pin`. There may still be duplicate `pin, year_` pairs.
+
+{% enddocs %}
+
+{% docs census_tracts %}
+
+Contains geometry and metadata for census tracts. Currently only includes census
+tracts for Minnesota.
+
+{% enddocs %}
+
+{% docs census_block_groups %}
+
+Contains geometry and metadata for census block groups. Currently only includes
+census block groups for Minnesota.
+
+{% enddocs %}
+
+{% docs acs_block_group %}
+
+Contains American Community Survey (ACS) demographic data at a census block
+group granularity.
+
+The `name_` column contains the name of the demographic variable (e.g.
+`B03002_003E`). See `acs_variables` for a mapping of these codes to
+human-readable names.
+
+{% enddocs %}
+
+{% docs acs_tract %}
+
+Contains American Community Survey (ACS) demographic data at a census tract
+granularity.
+
+The `name_` column contains the name of the demographic variable (e.g.
+`B03002_003E`). See `acs_variables` for a mapping of these codes to
+human-readable names.
+
+{% enddocs %}
+
+{% docs fair_market_rents %}
+
+Contains fair market rent data for different numbers of bedrooms by zip code.
+
+{% enddocs %}
+
+{% docs high_frequency_transit_lines %}
+
+Contains the geometry and metadata for high frequency transit lines in the city of Minneapolis.
+
+Notes:
+- `blue_zone_geom` is a 350 foot buffer around both lines and stops.
+- `yellow_zone_geom` is a quarter mile buffer around lines and a half mile buffer around stops.
+
+{% enddocs %}
+
+{% docs segregation_indexes %}
+
+Segregation index for each tract for each year, computed for each reference
+distribution.
+
+The segregation index is the KL-divergence between the distribution of
+population in a tract and a reference distribution. For example, a tract that
+has many more white people than the average for the city will have a high
+segregation index for the 'average_city' distribution.
+
+Available distributions:
+- `uniform`: Uniform distribution.
+- `annual_city`: Citywide distribution for the current year.
+- `average_city`: Citywide distribution averaged over all available years.
+
+{% enddocs %}
+
+{% docs usps_migration %}
+
+Contains USPS migration data sourced from change of address forms. Migrations
+are broken down by month and year, zip_code, flow direction, and flow type. Flow
+directions are either `from` (out of) the zip code or `to` (in to) the zip code.
+
+Flow types are one of `business`, `family`, `individual`, `perm` (permanent),
+`temp` (temporary), or `total`.
+
+{% enddocs %}
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index 71fe57e1..92c9bafc 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -9,12 +9,6 @@
{% set num_bedrooms = range(0, 5) %}
-{% doc fair_market_rents %}
-
-Contains fair market rent data for different numbers of bedrooms by zip code.
-
-{% enddoc %}
-
with
zip_codes as (select * from {{ ref('zip_codes') }})
, fair_market_rents as (select * from {{ ref('stg_fair_market_rents_union') }})
diff --git a/dbt/models/high_frequency_transit_lines.sql b/dbt/models/high_frequency_transit_lines.sql
index d59af114..c27885ca 100644
--- a/dbt/models/high_frequency_transit_lines.sql
+++ b/dbt/models/high_frequency_transit_lines.sql
@@ -8,16 +8,6 @@
)
}}
-{% doc high_frequency_transit_lines %}
-
-Contains the geometry and metadata for high frequency transit lines in the city of Minneapolis.
-
-Notes:
-- `blue_zone_geom` is a 350 foot buffer around both lines and stops.
-- `yellow_zone_geom` is a quarter mile buffer around lines and a half mile buffer around stops.
-
-{% enddoc %}
-
with lines as (select * from {{ ref('stg_high_frequency_transit_lines_union') }})
, stops as (select * from {{ ref('high_frequency_transit_stops') }})
, lines_and_stops as (
diff --git a/dbt/models/parcels.sql b/dbt/models/parcels.sql
index 835262d0..bd7b07ab 100644
--- a/dbt/models/parcels.sql
+++ b/dbt/models/parcels.sql
@@ -8,20 +8,6 @@
)
}}
-{% doc parcels %}
-
-Contains the geometry and metadata for all parcels in the city of Minneapolis.
-
-Notes:
-- Parcels data is released yearly. Parcels are considered valid for the year they were released.
-- Parcels are filtered to only include those in Minneapolis.
-- `emv_total`, `emv_bldg`, `emv_land`, `year_built`, and `sale_value` are treated as missing if they are 0.
-- `sale_date` is treated as missing if it is equal to `1899-12-30`.
-- `pin` is the county-assigned parcel identification number. The county prefix '053-' is removed.
-- Duplicate rows are removed. Note that this is based on the entire row, not just the `pin`. There may still be duplicate `pin, year_` pairs.
-
-{% enddoc %}
-
with
parcels as (select * from {{ ref('stg_parcels') }}),
to_zip_codes as (select * from {{ref('stg_parcels_to_zip_codes')}}),
diff --git a/dbt/models/parking.sql b/dbt/models/parking.sql
index ac31de4a..e49574fe 100644
--- a/dbt/models/parking.sql
+++ b/dbt/models/parking.sql
@@ -1,30 +1,24 @@
-with
- parking_raw as (
- select
- ogc_fid
- , "date"
- , "project na"
- , address
- , neighborho
- , ward
- , "downtown y"
- , "housing un"
- , "car parkin"
- , "bike parki"
- , "year"
- , geom
- from {{ source('minneapolis', 'parking_parcels') }}
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parking_id'], 'unique': true},
+ {'columns': ['geom'], 'type': 'gist'}
+ ]
)
+}}
+
+with
+ stg_parking as (select * from {{ ref('stg_parking') }}),
+ stg_parking_to_parcels as (select * from {{ ref('stg_parking_to_parcels') }}),
+ parcels as (select * from {{ ref('parcels') }})
select
- ogc_fid as parking_id
- , to_date("year" || '-' || "date", 'YYYY-DD-Mon') as date_
- , "project na"::text as project_name
- , address::text
- , neighborho::text as neighborhood
- , ward::int
- , "downtown y" = 'Y' as is_downtown
- , "housing un"::int as num_housing_units
- , "car parkin"::int as num_car_parking_spaces
- , "bike parki"::int as num_bike_parking_spaces
- , st_transform(geom, {{ var("srid") }}) as geom
-from parking_raw
+ stg_parking.*,
+ stg_parking_to_parcels.parcel_id,
+ parcels.census_block_group_id,
+ parcels.census_tract_id,
+ parcels.zip_code_id
+from
+ stg_parking
+ left join stg_parking_to_parcels using parking_id
+ left join parcels using parcel_id
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index 181af2b5..bcba6ab4 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -8,17 +8,6 @@
)
}}
-{% docs residential_permits %}
-
-Contains residential building permit applications.
-
-Notes:
- - Permits are filtered to only include those in Minneapolis.
- - `square_feet` is treated as missing if it is 0.
- - `permit_value` is treated as missing if it is 0.
-
-{% enddocs %}
-
with
stg_residential_permits as (select * from {{ ref('stg_residential_permits') }}),
stg_residential_permits_to_parcels as (select * from {{ ref('stg_residential_permits_to_parcels') }}),
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 2ed844c8..5022e8ea 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -90,6 +90,7 @@ sources:
models:
- name: census_tracts
+ description: '{{ doc("census_tracts") }}'
columns:
- name: census_tract_id
data_tests:
@@ -97,6 +98,7 @@ models:
- not_null
- name: census_block_groups
+ description: '{{ doc("census_block_groups") }}'
columns:
- name: census_block_group_id
data_tests:
@@ -109,12 +111,7 @@ models:
field: census_tract_id
- name: acs_block_group
- data_tests:
- - dbt_utils.unique_combination_of_columns:
- combination_of_columns:
- - census_block_group_id
- - year_
- - name_
+ description: '{{ doc("acs_block_group") }}'
columns:
- name: census_block_group_id
data_tests:
@@ -122,7 +119,17 @@ models:
to: ref('census_block_groups')
field: census_block_group_id
+ - name: acs_tract
+ description: '{{ doc("acs_tract") }}'
+
+ - name: fair_market_rents
+ description: '{{ doc("fair_market_rents") }}'
+
+ - name: high_frequency_transit_lines
+ description: '{{ doc("high_frequency_transit_lines") }}'
+
- name: segregation_indexes
+ description: '{{ doc("segregation_indexes") }}'
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
@@ -137,6 +144,7 @@ models:
field: census_tract
- name: parcels
+ description: '{{ doc("parcels") }}'
columns:
- name: parcel_id
data_tests:
@@ -154,68 +162,8 @@ models:
to: ref('census_block_groups')
field: census_block_group_id
- - name: parcels_to_census_block_groups
- data_tests:
- - dbt_utils.unique_combination_of_columns:
- combination_of_columns:
- - parcel_id
- - census_block_group_id
- columns:
- - name: parcel_id
- data_tests:
- - not_null
- - relationships:
- to: ref('parcels')
- field: parcel_id
- - name: census_block_group_id
- data_tests:
- - not_null
- - relationships:
- to: ref('census_block_groups')
- field: census_block_group_id
-
- - name: parcels_to_zip_codes
- data_tests:
- - dbt_utils.unique_combination_of_columns:
- combination_of_columns:
- - parcel_id
- - zip_code_id
- columns:
- - name: parcel_id
- data_tests:
- - not_null
- - relationships:
- to: ref('parcels')
- field: parcel_id
- - name: zip_code_id
- data_tests:
- - not_null
- - relationships:
- to: ref('zip_codes')
- field: zip_code_id
-
- - name: all_zip_codes_2010
- columns:
- - name: zip_code
- data_tests:
- - not_null
- - unique
-
- - name: all_zip_codes_2020
- columns:
- - name: zip_code
- data_tests:
- - not_null
- - unique
-
- - name: all_zip_codes
- columns:
- - name: zip_code_id
- data_tests:
- - not_null
- - unique
-
- name: zip_codes
+ description: '{{ doc("zip_codes") }}'
columns:
- name: zip_code_id
data_tests:
@@ -223,6 +171,7 @@ models:
- unique
- name: usps_migration
+ description: '{{ doc("usps_migration") }}'
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
@@ -239,6 +188,7 @@ models:
field: zip_code_id
- name: commercial_permits
+ description: '{{ doc("commercial_permits") }}'
columns:
- name: commercial_permit_id
data_tests:
@@ -246,42 +196,13 @@ models:
- unique
- name: residential_permits
+ description: '{{ doc("residential_permits") }}'
columns:
- name: residential_permit_id
data_tests:
- not_null
- unique
- - name: residential_permits_to_parcels
- columns:
- - name: residential_permit_id
- data_tests:
- - not_null
- - relationships:
- to: ref('residential_permits')
- field: residential_permit_id
- - name: parcel_id
- data_tests:
- - not_null
- - relationships:
- to: ref('parcels')
- field: parcel_id
-
- - name: commercial_permits_to_parcels
- columns:
- - name: commercial_permit_id
- data_tests:
- - not_null
- - relationships:
- to: ref('commercial_permits')
- field: commercial_permit_id
- - name: parcel_id
- data_tests:
- - not_null
- - relationships:
- to: ref('parcels')
- field: parcel_id
-
- name: neighborhoods
columns:
- name: neighborhood_id
diff --git a/dbt/models/segregation_indexes.sql b/dbt/models/segregation_indexes.sql
index 90f48b7b..cdadbc67 100644
--- a/dbt/models/segregation_indexes.sql
+++ b/dbt/models/segregation_indexes.sql
@@ -7,13 +7,6 @@
)
}}
--- Segregation index for each tract for each year, computed for each reference
--- distribution.
---
--- The segregation index is the KL-divergence between the distribution of
--- population in a tract and a reference distribution. For example, a tract that
--- has many more white people than the average for the city will have a high
--- segregation index for the 'average_city' distribution.
with
categories as (select * from {{ ref("population_categories") }})
, acs_tract_all as (select * from {{ ref("acs_tract") }})
diff --git a/dbt/models/staging/schema.yml b/dbt/models/staging/schema.yml
new file mode 100644
index 00000000..5328c7d1
--- /dev/null
+++ b/dbt/models/staging/schema.yml
@@ -0,0 +1,14 @@
+models:
+ - name: stg_zip_codes_2010
+ columns:
+ - name: zip_code
+ data_tests:
+ - not_null
+ - unique
+
+ - name: stg_zip_codes_2020
+ columns:
+ - name: zip_code
+ data_tests:
+ - not_null
+ - unique
diff --git a/dbt/models/staging/stg_commercial_permits_to_parcels.sql b/dbt/models/staging/stg_commercial_permits_to_parcels.sql
index fc619f42..bbc44326 100644
--- a/dbt/models/staging/stg_commercial_permits_to_parcels.sql
+++ b/dbt/models/staging/stg_commercial_permits_to_parcels.sql
@@ -4,7 +4,7 @@ commercial_permits as (
commercial_permit_id as id
, daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
, geom
- from {{ ref("commercial_permits") }}
+ from {{ ref('stg_commercial_permits') }}
)
, parcels as (
select
diff --git a/dbt/models/staging/stg_parcels_to_census_block_groups.sql b/dbt/models/staging/stg_parcels_to_census_block_groups.sql
index 39f51d45..d65f230f 100644
--- a/dbt/models/staging/stg_parcels_to_census_block_groups.sql
+++ b/dbt/models/staging/stg_parcels_to_census_block_groups.sql
@@ -4,7 +4,7 @@ parcels as (
parcel_id as id
, valid
, geom
- from {{ ref('stg_parcels_base') }}
+ from {{ ref('stg_parcels') }}
),
census_block_groups as (
select
diff --git a/dbt/models/staging/stg_parcels_to_zip_codes.sql b/dbt/models/staging/stg_parcels_to_zip_codes.sql
index 964215bb..15b643c7 100644
--- a/dbt/models/staging/stg_parcels_to_zip_codes.sql
+++ b/dbt/models/staging/stg_parcels_to_zip_codes.sql
@@ -4,7 +4,7 @@ parcels as (
parcel_id as id
, valid
, geom
- from {{ ref("stg_parcels_base") }}
+ from {{ ref("stg_parcels") }}
),
zip_codes as (
select
diff --git a/dbt/models/staging/stg_parking.sql b/dbt/models/staging/stg_parking.sql
new file mode 100644
index 00000000..ed00a8b1
--- /dev/null
+++ b/dbt/models/staging/stg_parking.sql
@@ -0,0 +1,15 @@
+with
+parking_raw as (select * from {{ source('minneapolis', 'parking_parcels') }})
+select
+ ogc_fid as parking_id
+ , to_date("year" || '-' || "date", 'YYYY-DD-Mon') as date_
+ , "project na"::text as project_name
+ , address::text
+ , neighborho::text as neighborhood
+ , ward::smallint
+ , "downtown y" = 'Y' as is_downtown
+ , "housing un"::smallint as num_housing_units
+ , "car parkin"::smallint as num_car_parking_spaces
+ , "bike parki"::smallint as num_bike_parking_spaces
+ , st_transform(geom, {{ var("srid") }}) as geom
+from parking_raw
diff --git a/dbt/models/parking_to_parcels.sql b/dbt/models/staging/stg_parking_to_parcels.sql
similarity index 61%
rename from dbt/models/parking_to_parcels.sql
rename to dbt/models/staging/stg_parking_to_parcels.sql
index 7eb1c755..6e708e17 100644
--- a/dbt/models/parking_to_parcels.sql
+++ b/dbt/models/staging/stg_parking_to_parcels.sql
@@ -1,27 +1,17 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['parking_id']},
- {'columns': ['parcel_id']}
- ]
- )
-}}
-
with
parking as (
select
parking_id as id
, daterange(date_, date_, '[]') as valid
, geom
- from {{ ref('parking') }}
+ from {{ ref('stg_parking') }}
)
, parcels as (
select
parcel_id as id
, valid
, geom
- from {{ ref('parcels_base') }}
+ from {{ ref('parcels') }}
)
select
child_id as parking_id
diff --git a/dbt/models/staging/stg_residential_permits_to_parcels.sql b/dbt/models/staging/stg_residential_permits_to_parcels.sql
index 2b00cbf0..d3b5ae37 100644
--- a/dbt/models/staging/stg_residential_permits_to_parcels.sql
+++ b/dbt/models/staging/stg_residential_permits_to_parcels.sql
@@ -4,7 +4,7 @@ residential_permits as (
residential_permit_id as id
, daterange(to_date(year_::text, 'YYYY'), to_date(year_::text, 'YYYY'), '[]') as valid
, geom
- from {{ ref("residential_permits") }}
+ from {{ ref('stg_residential_permits') }}
)
, parcels as (
select
diff --git a/dbt/models/usps_migration_union.sql b/dbt/models/staging/stg_usps_migration_union.sql
similarity index 100%
rename from dbt/models/usps_migration_union.sql
rename to dbt/models/staging/stg_usps_migration_union.sql
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
index f14ca0fc..80055cc2 100644
--- a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
@@ -7,7 +7,7 @@
-- Retag parcels with census tracts (because we replaced the 2020 tracts with the 2019 tracts)
with
census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
-parcels as (select * from {{ ref('parcels_base') }}),
+parcels as (select * from {{ ref('parcels') }}),
parcels_tag as (select parcel_id as id, valid, geom from parcels),
census_tracts_tag as (select census_tract_id as id, valid, geom from census_tracts),
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 541b32c1..0bb0ef93 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -14,7 +14,7 @@ with
zip_codes as (select * from {{ ref('zip_codes') }})
, process_date as (
select to_date(yyyy_mm, 'YYYYMM') as date_, *
- from {{ ref('usps_migration_union') }}
+ from {{ ref('stg_usps_migration_union') }}
)
, add_zip_id as (
select zip_code_id, process_date.*
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
index 623ab23f..078f14ef 100644
--- a/dbt/models/zip_codes.sql
+++ b/dbt/models/zip_codes.sql
@@ -8,13 +8,6 @@
)
}}
-{% docs zip_codes %}
-
-Contains the geometry and metadata for all zip code tabulation areas (ZCTAs) in
-the United States.
-
-{% enddocs %}
-
with
zip_codes as (
select
From fd89ca43ebbf9153c38b42c119ef92b4d3d4c3eb Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 11:05:12 -0400
Subject: [PATCH 120/142] reorganize fair market rents data
---
dbt/models/fair_market_rents.sql | 39 +++++++------------
dbt/models/schema.yml | 6 ---
.../staging/stg_fair_market_rents_dedup.sql | 1 +
.../staging/stg_fair_market_rents_union.sql | 10 ++---
.../staging/stg_fair_market_rents_unpivot.sql | 16 ++++++++
5 files changed, 36 insertions(+), 36 deletions(-)
create mode 100644 dbt/models/staging/stg_fair_market_rents_dedup.sql
create mode 100644 dbt/models/staging/stg_fair_market_rents_unpivot.sql
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index 92c9bafc..40979aa7 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -2,35 +2,24 @@
config(
materialized='table',
indexes = [
- {'columns': ['zip_code_id', 'year_', 'num_bedrooms'], 'unique': true}
+ {'columns': ['zip_code_id', 'year_', 'num_bedrooms']}
]
)
}}
-{% set num_bedrooms = range(0, 5) %}
-
with
+stg_fair_market_rents_unpivot as (
+ select * from {{ ref('stg_fair_market_rents_unpivot') }}
+),
zip_codes as (select * from {{ ref('zip_codes') }})
-, fair_market_rents as (select * from {{ ref('stg_fair_market_rents_union') }})
-, fmr_zip as (
- select
- zip_codes.zip_code_id
- {% for bedroom in num_bedrooms %}
- , fair_market_rents.rent_br{{ bedroom }}
- {% endfor %}
- , fair_market_rents.year_
- from
- fair_market_rents
- inner join zip_codes
- on zip_codes.zip_code = fair_market_rents.zip_code
- and zip_codes.valid @> to_date(year_::text , 'YYYY')
-)
-{% for bedroom in num_bedrooms %}
select
- zip_code_id
- , rent_br{{ bedroom }}::int as rent
- , {{ bedroom }} as num_bedrooms
- , year_::int
-from fmr_zip
-{% if not loop.last %} union all {% endif %}
-{% endfor %}
+ zip_codes.zip_code_id,
+ stg_fair_market_rents_unpivot.zip_code,
+ stg_fair_market_rents_unpivot.year_::smallint,
+ stg_fair_market_rents_unpivot.num_bedrooms::smallint,
+ stg_fair_market_rents_unpivot.rent::smallint
+from
+ stg_fair_market_rents_unpivot
+ left join zip_codes
+ on stg_fair_market_rents_unpivot.zip_code = zip_codes.zip_code
+ and (stg_fair_market_rents_unpivot.year_ || '-01-01')::date <@ zip_codes.valid
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 5022e8ea..b2b0ea78 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -112,12 +112,6 @@ models:
- name: acs_block_group
description: '{{ doc("acs_block_group") }}'
- columns:
- - name: census_block_group_id
- data_tests:
- - relationships:
- to: ref('census_block_groups')
- field: census_block_group_id
- name: acs_tract
description: '{{ doc("acs_tract") }}'
diff --git a/dbt/models/staging/stg_fair_market_rents_dedup.sql b/dbt/models/staging/stg_fair_market_rents_dedup.sql
new file mode 100644
index 00000000..fec86c06
--- /dev/null
+++ b/dbt/models/staging/stg_fair_market_rents_dedup.sql
@@ -0,0 +1 @@
+select distinct * from {{ ref('stg_fair_market_rents_unpivot') }}
diff --git a/dbt/models/staging/stg_fair_market_rents_union.sql b/dbt/models/staging/stg_fair_market_rents_union.sql
index 696d0a34..5bf52020 100644
--- a/dbt/models/staging/stg_fair_market_rents_union.sql
+++ b/dbt/models/staging/stg_fair_market_rents_union.sql
@@ -3,11 +3,11 @@
{% for year_ in years %}
select
zip_code
- , rent_br0
- , rent_br1
- , rent_br2
- , rent_br3
- , rent_br4
+ , replace(rent_br0, '.00', '') as rent_br0
+ , replace(rent_br1, '.00', '') as rent_br1
+ , replace(rent_br2, '.00', '') as rent_br2
+ , replace(rent_br3, '.00', '') as rent_br3
+ , replace(rent_br4, '.00', '') as rent_br4
, year as year_
from
{{ source('minneapolis', 'fair_market_rents_' ~ year_) }}
diff --git a/dbt/models/staging/stg_fair_market_rents_unpivot.sql b/dbt/models/staging/stg_fair_market_rents_unpivot.sql
new file mode 100644
index 00000000..92e64612
--- /dev/null
+++ b/dbt/models/staging/stg_fair_market_rents_unpivot.sql
@@ -0,0 +1,16 @@
+with
+stg_fair_market_rents_dedup as (select * from {{ ref('stg_fair_market_rents_union') }})
+select
+ stg_fair_market_rents_dedup.zip_code,
+ stg_fair_market_rents_dedup.year_,
+ x.num_bedrooms,
+ x.rent
+from
+ stg_fair_market_rents_dedup
+ cross join lateral (
+ values (0, rent_br0),
+ (1, rent_br1),
+ (2, rent_br2),
+ (3, rent_br3),
+ (4, rent_br4)
+ ) as x(num_bedrooms, rent)
From 4d3cf74b564f7c66917112ced359eea93d10ac22 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 11:40:21 -0400
Subject: [PATCH 121/142] add mapping table for translating from zip codes to
zctas
---
dbt/models/docs.md | 6 ++-
dbt/models/fair_market_rents.sql | 14 +++---
dbt/models/parcels.sql | 4 +-
dbt/models/schema.yml | 21 +++++----
dbt/models/staging/schema.yml | 8 ++--
...zip_codes.sql => stg_parcels_to_zctas.sql} | 10 ++--
.../staging/stg_usps_migration_unpivot.sql | 29 ++++++++++++
dbt/models/staging/stg_zctas_2010.sql | 4 ++
..._zip_codes_2020.sql => stg_zctas_2020.sql} | 2 +-
dbt/models/staging/stg_zip_codes_2010.sql | 5 --
dbt/models/usps_migration.sql | 46 +++++--------------
dbt/models/zctas.sql | 28 +++++++++++
dbt/models/zip_codes.sql | 28 -----------
dbt/models/zip_codes_to_zctas.sql | 2 +
14 files changed, 111 insertions(+), 96 deletions(-)
rename dbt/models/staging/{stg_parcels_to_zip_codes.sql => stg_parcels_to_zctas.sql} (57%)
create mode 100644 dbt/models/staging/stg_usps_migration_unpivot.sql
create mode 100644 dbt/models/staging/stg_zctas_2010.sql
rename dbt/models/staging/{stg_zip_codes_2020.sql => stg_zctas_2020.sql} (82%)
delete mode 100644 dbt/models/staging/stg_zip_codes_2010.sql
create mode 100644 dbt/models/zctas.sql
delete mode 100644 dbt/models/zip_codes.sql
create mode 100644 dbt/models/zip_codes_to_zctas.sql
diff --git a/dbt/models/docs.md b/dbt/models/docs.md
index 8e0bafd8..02494f2e 100644
--- a/dbt/models/docs.md
+++ b/dbt/models/docs.md
@@ -19,11 +19,15 @@ Notes:
{% enddocs %}
-{% docs zip_codes %}
+{% docs zctas %}
Contains the geometry and metadata for all zip code tabulation areas (ZCTAs) in
the United States.
+These are not the same as zip codes. Zip codes are created by the postal service, and they change regularly. ZCTAs are created by the census bureau alongside the census. Not every zip code has a corresponding ZCTA (unpopulated zip codes are not represented, for example), and some ZCTAs cover multiple zip codes.
+
+Use the mapping table `zip_codes_to_zctas` to translate from zip codes to ZCTAs.
+
{% enddocs %}
{% docs parcels %}
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index 40979aa7..c82afc4b 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -11,15 +11,17 @@ with
stg_fair_market_rents_unpivot as (
select * from {{ ref('stg_fair_market_rents_unpivot') }}
),
-zip_codes as (select * from {{ ref('zip_codes') }})
+zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }}),
+zctas as (select * from {{ ref('zctas') }})
select
- zip_codes.zip_code_id,
stg_fair_market_rents_unpivot.zip_code,
stg_fair_market_rents_unpivot.year_::smallint,
stg_fair_market_rents_unpivot.num_bedrooms::smallint,
- stg_fair_market_rents_unpivot.rent::smallint
+ stg_fair_market_rents_unpivot.rent::smallint,
+ zctas.zcta_id
from
stg_fair_market_rents_unpivot
- left join zip_codes
- on stg_fair_market_rents_unpivot.zip_code = zip_codes.zip_code
- and (stg_fair_market_rents_unpivot.year_ || '-01-01')::date <@ zip_codes.valid
+ left join zip_codes_to_zctas using zip_code
+ left join zctas
+ on zip_codes_to_zctas.zcta = zctas.zcta
+ and (stg_fair_market_rents_unpivot.year_ || '-01-01')::date <@ zctas.valid
diff --git a/dbt/models/parcels.sql b/dbt/models/parcels.sql
index bd7b07ab..9897974f 100644
--- a/dbt/models/parcels.sql
+++ b/dbt/models/parcels.sql
@@ -10,12 +10,12 @@
with
parcels as (select * from {{ ref('stg_parcels') }}),
-to_zip_codes as (select * from {{ref('stg_parcels_to_zip_codes')}}),
+to_zctas as (select * from {{ref('stg_parcels_to_zctas')}}),
to_census_bgs as (select * from {{ref('stg_parcels_to_census_block_groups')}}),
census_bgs as (select * from {{ref('census_block_groups')}})
select
parcels.*
- , to_zip_codes.zip_code_id
+ , to_zctas.zcta_id
, to_census_bgs.census_block_group_id
, census_bgs.census_tract_id
from
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index b2b0ea78..52800c91 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -37,6 +37,7 @@ sources:
- name: usps_y2023
- name: zip_codes_tl_2020_us_zcta510
- name: zip_codes_tl_2020_us_zcta520
+ - name: zip_codes_zcta_xref
- name: census_cb_2010_27_bg_500k
- name: census_cb_2010_27_tract_500k
- name: census_cb_2013_27_bg_500k
@@ -144,22 +145,22 @@ models:
data_tests:
- unique
- not_null
- - name: zip_code_id
+ - name: zcta_id
data_tests:
- not_null
- relationships:
- to: ref('zip_codes')
- field: zip_code_id
+ to: ref('zctas')
+ field: zcta_id
- name: census_block_group_id
data_tests:
- relationships:
to: ref('census_block_groups')
field: census_block_group_id
- - name: zip_codes
- description: '{{ doc("zip_codes") }}'
+ - name: zctas
+ description: '{{ doc("zctas") }}'
columns:
- - name: zip_code_id
+ - name: zcta_id
data_tests:
- not_null
- unique
@@ -170,16 +171,16 @@ models:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date_
- - zip_code_id
+ - zcta_id
- flow_direction
- flow_type
columns:
- - name: zip_code_id
+ - name: zcta_id
data_tests:
- not_null
- relationships:
- to: ref('zip_codes')
- field: zip_code_id
+ to: ref('zctas')
+ field: zcta_id
- name: commercial_permits
description: '{{ doc("commercial_permits") }}'
diff --git a/dbt/models/staging/schema.yml b/dbt/models/staging/schema.yml
index 5328c7d1..dccd58b5 100644
--- a/dbt/models/staging/schema.yml
+++ b/dbt/models/staging/schema.yml
@@ -1,14 +1,14 @@
models:
- - name: stg_zip_codes_2010
+ - name: stg_zctas_2010
columns:
- - name: zip_code
+ - name: zcta
data_tests:
- not_null
- unique
- - name: stg_zip_codes_2020
+ - name: stg_zctas_2020
columns:
- - name: zip_code
+ - name: zcta
data_tests:
- not_null
- unique
diff --git a/dbt/models/staging/stg_parcels_to_zip_codes.sql b/dbt/models/staging/stg_parcels_to_zctas.sql
similarity index 57%
rename from dbt/models/staging/stg_parcels_to_zip_codes.sql
rename to dbt/models/staging/stg_parcels_to_zctas.sql
index 15b643c7..680e304e 100644
--- a/dbt/models/staging/stg_parcels_to_zip_codes.sql
+++ b/dbt/models/staging/stg_parcels_to_zctas.sql
@@ -6,16 +6,16 @@ parcels as (
, geom
from {{ ref("stg_parcels") }}
),
-zip_codes as (
+zctas as (
select
- zip_code_id as id
+ zcta_id as id
, valid
, geom
- from {{ ref("zip_codes") }}
+ from {{ ref("zctas") }}
)
select
child_id as parcel_id
- , parent_id as zip_code_id
+ , parent_id as zcta_id
, valid
, type_
-from {{ tag_regions("parcels", "zip_codes") }}
+from {{ tag_regions("parcels", "zctas") }}
diff --git a/dbt/models/staging/stg_usps_migration_unpivot.sql b/dbt/models/staging/stg_usps_migration_unpivot.sql
new file mode 100644
index 00000000..d8ba1c49
--- /dev/null
+++ b/dbt/models/staging/stg_usps_migration_unpivot.sql
@@ -0,0 +1,29 @@
+{% set usps_migration_flow_types = ['business', 'family', 'individual', 'perm', 'temp'] %}
+{% set usps_migration_flow_directions = ['from', 'to'] %}
+
+with
+process_date as (
+ select to_date(yyyy_mm, 'YYYYMM') as date_, *
+ from {{ ref('stg_usps_migration_union') }}
+)
+{% for flow_direction in usps_migration_flow_directions %}
+ select
+ date_
+ , zip_code
+ , '{{ flow_direction }}' as flow_direction
+ , 'total' as flow_type
+ , total_{{ flow_direction }}_zip::int as flow_value
+ from process_date
+ union all
+ {% for flow_type in usps_migration_flow_types %}
+ select
+ date_
+ , zip_code
+ , '{{ flow_direction }}' as flow_direction
+ , '{{ flow_type }}' as flow_type
+ , total_{{ flow_direction }}_zip_{{ flow_type }}::int as flow_value
+ from process_date
+ {% if not loop.last %} union all {% endif %}
+ {% endfor %}
+{% if not loop.last %} union all {% endif %}
+{% endfor %}
diff --git a/dbt/models/staging/stg_zctas_2010.sql b/dbt/models/staging/stg_zctas_2010.sql
new file mode 100644
index 00000000..51921be6
--- /dev/null
+++ b/dbt/models/staging/stg_zctas_2010.sql
@@ -0,0 +1,4 @@
+select
+ zcta5ce10 as zcta,
+ st_transform(geom, {{ var("srid") }}) as geom
+from {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta510') }}
diff --git a/dbt/models/staging/stg_zip_codes_2020.sql b/dbt/models/staging/stg_zctas_2020.sql
similarity index 82%
rename from dbt/models/staging/stg_zip_codes_2020.sql
rename to dbt/models/staging/stg_zctas_2020.sql
index 9a9a77b0..21c131d1 100644
--- a/dbt/models/staging/stg_zip_codes_2020.sql
+++ b/dbt/models/staging/stg_zctas_2020.sql
@@ -1,4 +1,4 @@
select
- zcta5ce20 as zip_code,
+ zcta5ce20 as zcta,
st_transform(geom, {{ var("srid") }}) as geom
from {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta520') }}
diff --git a/dbt/models/staging/stg_zip_codes_2010.sql b/dbt/models/staging/stg_zip_codes_2010.sql
deleted file mode 100644
index e6f2c5c5..00000000
--- a/dbt/models/staging/stg_zip_codes_2010.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-select
- zcta5ce10 as zip_code,
- st_transform(geom, {{ var("srid") }}) as geom
-from
- {{ source('minneapolis', 'zip_codes_tl_2020_us_zcta510') }}
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index 0bb0ef93..ed028cdc 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -11,37 +11,15 @@
{% set usps_migration_flow_directions = ['from', 'to'] %}
with
-zip_codes as (select * from {{ ref('zip_codes') }})
-, process_date as (
- select to_date(yyyy_mm, 'YYYYMM') as date_, *
- from {{ ref('stg_usps_migration_union') }}
-)
-, add_zip_id as (
- select zip_code_id, process_date.*
- from
- process_date
- inner join zip_codes
- on zip_codes.zip_code = replace(process_date.zip_code, '=', '')
- and process_date.date_ <@ zip_codes.valid
-)
-{% for flow_direction in usps_migration_flow_directions %}
- select
- date_
- , zip_code_id
- , '{{ flow_direction }}' as flow_direction
- , 'total' as flow_type
- , total_{{ flow_direction }}_zip::int as flow_value
- from add_zip_id
- union all
- {% for flow_type in usps_migration_flow_types %}
- select
- date_
- , zip_code_id
- , '{{ flow_direction }}' as flow_direction
- , '{{ flow_type }}' as flow_type
- , total_{{ flow_direction }}_zip_{{ flow_type }}::int as flow_value
- from add_zip_id
- {% if not loop.last %} union all {% endif %}
- {% endfor %}
-{% if not loop.last %} union all {% endif %}
-{% endfor %}
+usps_migration as (select * from {{ ref('stg_usps_migration_union') }}),
+zctas as (select * from {{ ref('zctas') }}),
+zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }})
+select
+ usps_migration.*,
+ zctas.zcta_id
+from
+ usps_migration
+ left join zip_codes_to_zctas using zip_code
+ left join zctas
+ on zip_codes_to_zctas.zcta = zctas.zcta and
+ and usps_migration.date_ <@ zctas.valid
diff --git a/dbt/models/zctas.sql b/dbt/models/zctas.sql
new file mode 100644
index 00000000..62212a9b
--- /dev/null
+++ b/dbt/models/zctas.sql
@@ -0,0 +1,28 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['zcta_id'], 'unique': true},
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
+with
+zctas as (
+select
+ zcta,
+ '[2020-01-01,)'::daterange as valid,
+ geom
+from {{ ref('stg_zctas_2020') }}
+union all
+select
+ zcta,
+ '[,2020-01-01)'::daterange as valid,
+ geom
+from {{ ref('stg_zctas_2010') }}
+)
+select
+ {{ dbt_utils.generate_surrogate_key(['zcta', 'valid']) }} as zcta_id,
+ zctas.*
+from zctas
diff --git a/dbt/models/zip_codes.sql b/dbt/models/zip_codes.sql
deleted file mode 100644
index 078f14ef..00000000
--- a/dbt/models/zip_codes.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-{{
- config(
- materialized='table',
- indexes = [
- {'columns': ['zip_code_id'], 'unique': true},
- {'columns': ['valid', 'geom'], 'type': 'gist'}
- ]
- )
-}}
-
-with
-zip_codes as (
-select
- zip_code,
- '[2020-01-01,)'::daterange as valid,
- geom
-from {{ ref('stg_zip_codes_2020') }}
-union all
-select
- zip_code,
- '[,2020-01-01)'::daterange as valid,
- geom
-from {{ ref('stg_zip_codes_2010') }}
-)
-select
- {{ dbt_utils.generate_surrogate_key(['zip_code', 'valid']) }} as zip_code_id,
- zip_codes.*
-from zip_codes
diff --git a/dbt/models/zip_codes_to_zctas.sql b/dbt/models/zip_codes_to_zctas.sql
new file mode 100644
index 00000000..84bffa64
--- /dev/null
+++ b/dbt/models/zip_codes_to_zctas.sql
@@ -0,0 +1,2 @@
+select zip_code, zcta
+from {{ source('minneapolis', 'zip_codes_zcta_xref') }}
From 6639fa0dc117ee970c24e9387c03d642f9b4707e Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 14:28:57 -0400
Subject: [PATCH 122/142] add new api code
---
api/main.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100644 api/main.py
diff --git a/api/main.py b/api/main.py
new file mode 100644
index 00000000..ca217e29
--- /dev/null
+++ b/api/main.py
@@ -0,0 +1,78 @@
+import os
+
+from fastapi import FastAPI, Depends
+import psycopg2
+
+# from cities.deployment.tracts_minneapolis.predict import TractsModelPredictor
+
+USERNAME = os.getenv("USERNAME")
+PASSWORD = os.getenv("PASSWORD")
+HOST = os.getenv("HOST")
+DATABASE = os.getenv("DATABASE")
+
+app = FastAPI()
+
+
+def get_db() -> psycopg2.extensions.connection:
+ db = psycopg2.connect(
+ host=HOST, database=DATABASE, user=USERNAME, password=PASSWORD
+ )
+ try:
+ yield db
+ finally:
+ db.close()
+
+
+# def get_predictor(
+# db: psycopg2.extensions.connection = Depends(get_db),
+# ) -> TractsModelPredictor:
+# return TractsModelPredictor(db)
+
+
+@app.get("/demographics")
+async def read_demographics(category: str, db=Depends(get_db)):
+ cur = db.cursor()
+ cur.execute("select * from api__demographics where description = %s", (category,))
+ return cur.fetchall()
+
+
+@app.get("/census_tracts")
+async def read_census_tracts(year: int, db=Depends(get_db)):
+ cur = db.cursor()
+ cur.execute(
+ """
+ with census_tracts as (
+ select census_tract, geom from api__census_tracts
+ where year_ = %s
+ )
+ select json_build_object('type', 'FeatureCollection', 'features', json_agg(ST_AsGeoJSON(census_tracts.*)::json))
+ from census_tracts
+ """,
+ (year,),
+ )
+ return cur.fetchall()
+
+
+@app.get("/high_frequency_transit_lines")
+async def read_census_tracts(year: int, db=Depends(get_db)):
+ cur = db.cursor()
+ cur.execute(
+ """
+ with census_tracts as (
+ select census_tract, geom from api__census_tracts
+ where year_ = %s
+ )
+ select json_build_object('type', 'FeatureCollection', 'features', json_agg(ST_AsGeoJSON(census_tracts.*)::json))
+ from census_tracts
+ """,
+ (year,),
+ )
+ return cur.fetchall()
+
+
+# @app.get("/predict")
+# async def read_predict(
+# samples=100, predictor: TractsModelPredictor = Depends(get_predictor)
+# ):
+# result = predictor.predict(samples=samples)
+# return result.tolist()
From 7b818b91fcd268c5b24e43a2aee7c0704ae0cf59 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 15:04:01 -0400
Subject: [PATCH 123/142] continue refactoring zip codes to zctas
---
dbt/models/docs.md | 4 +++
dbt/models/fair_market_rents.sql | 25 +++++----------
dbt/models/parcels.sql | 2 +-
dbt/models/parking.sql | 12 ++++---
dbt/models/schema.yml | 1 -
.../stg_fair_market_rents_add_zcta.sql | 18 +++++++++++
dbt/models/staging/stg_parcels.sql | 23 +++++++++++--
dbt/models/staging/stg_parking.sql | 2 +-
.../staging/stg_usps_migration_add_zcta.sql | 13 ++++++++
.../staging/stg_usps_migration_union.sql | 32 +++++++++----------
.../staging/stg_usps_migration_unpivot.sql | 9 ++----
.../tracts_model_int__parcels_filtered.sql | 15 +++++++--
dbt/models/usps_migration.sql | 24 ++++++--------
dbt/models/zip_codes_to_zctas.sql | 10 ++++++
14 files changed, 124 insertions(+), 66 deletions(-)
create mode 100644 dbt/models/staging/stg_fair_market_rents_add_zcta.sql
create mode 100644 dbt/models/staging/stg_usps_migration_add_zcta.sql
diff --git a/dbt/models/docs.md b/dbt/models/docs.md
index 02494f2e..2342912d 100644
--- a/dbt/models/docs.md
+++ b/dbt/models/docs.md
@@ -122,4 +122,8 @@ directions are either `from` (out of) the zip code or `to` (in to) the zip code.
Flow types are one of `business`, `family`, `individual`, `perm` (permanent),
`temp` (temporary), or `total`.
+We associate zip codes to ZCTAs and provide aggregate flows for ZCTAs. Note that
+some zip codes do not find a match in our zip to ZCTA mapping table, so there is
+some missingness in this data.
+
{% enddocs %}
diff --git a/dbt/models/fair_market_rents.sql b/dbt/models/fair_market_rents.sql
index c82afc4b..620c0457 100644
--- a/dbt/models/fair_market_rents.sql
+++ b/dbt/models/fair_market_rents.sql
@@ -2,26 +2,17 @@
config(
materialized='table',
indexes = [
- {'columns': ['zip_code_id', 'year_', 'num_bedrooms']}
+ {'columns': ['zcta_id', 'year_', 'num_bedrooms']}
]
)
}}
with
-stg_fair_market_rents_unpivot as (
- select * from {{ ref('stg_fair_market_rents_unpivot') }}
-),
-zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }}),
-zctas as (select * from {{ ref('zctas') }})
+fair_market_rents as (select * from {{ ref('stg_fair_market_rents_add_zcta') }})
select
- stg_fair_market_rents_unpivot.zip_code,
- stg_fair_market_rents_unpivot.year_::smallint,
- stg_fair_market_rents_unpivot.num_bedrooms::smallint,
- stg_fair_market_rents_unpivot.rent::smallint,
- zctas.zcta_id
-from
- stg_fair_market_rents_unpivot
- left join zip_codes_to_zctas using zip_code
- left join zctas
- on zip_codes_to_zctas.zcta = zctas.zcta
- and (stg_fair_market_rents_unpivot.year_ || '-01-01')::date <@ zctas.valid
+ zcta_id,
+ year_::smallint,
+ num_bedrooms::smallint,
+ avg(rent) as rent
+from fair_market_rents
+group by 1,2,3
diff --git a/dbt/models/parcels.sql b/dbt/models/parcels.sql
index 9897974f..3cc0f915 100644
--- a/dbt/models/parcels.sql
+++ b/dbt/models/parcels.sql
@@ -20,6 +20,6 @@ select
, census_bgs.census_tract_id
from
parcels
- left join to_zip_codes using (parcel_id)
+ left join to_zctas using (parcel_id)
left join to_census_bgs using (parcel_id)
left join census_bgs using (census_block_group_id)
diff --git a/dbt/models/parking.sql b/dbt/models/parking.sql
index e49574fe..717db5a2 100644
--- a/dbt/models/parking.sql
+++ b/dbt/models/parking.sql
@@ -11,14 +11,18 @@
with
stg_parking as (select * from {{ ref('stg_parking') }}),
stg_parking_to_parcels as (select * from {{ ref('stg_parking_to_parcels') }}),
+ stg_parking_to_first_parcel as (
+ select parking_id, min(parcel_id) as parcel_id
+ from stg_parking_to_parcels group by 1
+ ),
parcels as (select * from {{ ref('parcels') }})
select
stg_parking.*,
- stg_parking_to_parcels.parcel_id,
+ stg_parking_to_first_parcel.parcel_id,
parcels.census_block_group_id,
parcels.census_tract_id,
- parcels.zip_code_id
+ parcels.zcta_id
from
stg_parking
- left join stg_parking_to_parcels using parking_id
- left join parcels using parcel_id
+ left join stg_parking_to_first_parcel using (parking_id)
+ left join parcels using (parcel_id)
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index 52800c91..d807cb4a 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -177,7 +177,6 @@ models:
columns:
- name: zcta_id
data_tests:
- - not_null
- relationships:
to: ref('zctas')
field: zcta_id
diff --git a/dbt/models/staging/stg_fair_market_rents_add_zcta.sql b/dbt/models/staging/stg_fair_market_rents_add_zcta.sql
new file mode 100644
index 00000000..30bee443
--- /dev/null
+++ b/dbt/models/staging/stg_fair_market_rents_add_zcta.sql
@@ -0,0 +1,18 @@
+with
+stg_fair_market_rents_unpivot as (
+ select * from {{ ref('stg_fair_market_rents_unpivot') }}
+),
+zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }}),
+zctas as (select * from {{ ref('zctas') }})
+select
+ stg_fair_market_rents_unpivot.zip_code,
+ stg_fair_market_rents_unpivot.year_::smallint,
+ stg_fair_market_rents_unpivot.num_bedrooms::smallint,
+ stg_fair_market_rents_unpivot.rent::smallint,
+ zctas.zcta_id
+from
+ stg_fair_market_rents_unpivot
+ left join zip_codes_to_zctas using (zip_code)
+ left join zctas
+ on zip_codes_to_zctas.zcta = zctas.zcta
+ and (stg_fair_market_rents_unpivot.year_ || '-01-01')::date <@ zctas.valid
diff --git a/dbt/models/staging/stg_parcels.sql b/dbt/models/staging/stg_parcels.sql
index 9ffc665e..83b9c77a 100644
--- a/dbt/models/staging/stg_parcels.sql
+++ b/dbt/models/staging/stg_parcels.sql
@@ -1,3 +1,13 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['parcel_id'], 'unique': true},
+ {'columns': ['valid', 'geom'], 'type': 'gist'}
+ ]
+ )
+}}
+
{% set years = range(2002, 2024) %}
{% set city = 'MINNEAPOLIS' %}
{% set county_id = '053' %}
@@ -15,7 +25,7 @@ parcels_union as (
nullif(emv_land, 0)::int as emv_land,
nullif(emv_bldg, 0)::int as emv_bldg,
nullif(emv_total, 0)::int as emv_total,
- nullif(year_built, 0)::int as year_built,
+ nullif(year_built, 0)::smallint as year_built,
nullif(sale_date, '1899-12-30'::date) as sale_date,
nullif(sale_value, 0)::int as sale_value,
st_transform(geom, {{ var("srid") }}) as geom
@@ -32,5 +42,14 @@ parcels_distinct as (
from parcels_union
)
select
- {{ dbt_utils.generate_surrogate_key(['ogc_fid', 'valid']) }} as parcel_id, *
+ {{ dbt_utils.generate_surrogate_key(['ogc_fid', 'valid']) }} as parcel_id,
+ pin,
+ valid,
+ emv_land,
+ emv_bldg,
+ emv_total,
+ year_built,
+ sale_date,
+ sale_value,
+ geom
from parcels_distinct
diff --git a/dbt/models/staging/stg_parking.sql b/dbt/models/staging/stg_parking.sql
index ed00a8b1..61667cb0 100644
--- a/dbt/models/staging/stg_parking.sql
+++ b/dbt/models/staging/stg_parking.sql
@@ -10,6 +10,6 @@ select
, "downtown y" = 'Y' as is_downtown
, "housing un"::smallint as num_housing_units
, "car parkin"::smallint as num_car_parking_spaces
- , "bike parki"::smallint as num_bike_parking_spaces
+ , replace("bike parki", ',', '')::smallint as num_bike_parking_spaces
, st_transform(geom, {{ var("srid") }}) as geom
from parking_raw
diff --git a/dbt/models/staging/stg_usps_migration_add_zcta.sql b/dbt/models/staging/stg_usps_migration_add_zcta.sql
new file mode 100644
index 00000000..4097c6b2
--- /dev/null
+++ b/dbt/models/staging/stg_usps_migration_add_zcta.sql
@@ -0,0 +1,13 @@
+with
+usps_migration as (select * from {{ ref('stg_usps_migration_unpivot') }}),
+zctas as (select * from {{ ref('zctas') }}),
+zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }})
+select
+ usps_migration.*,
+ zctas.zcta_id
+from
+ usps_migration
+ left join zip_codes_to_zctas using (zip_code)
+ left join zctas
+ on zip_codes_to_zctas.zcta = zctas.zcta
+ and usps_migration.date_ <@ zctas.valid
diff --git a/dbt/models/staging/stg_usps_migration_union.sql b/dbt/models/staging/stg_usps_migration_union.sql
index e1e790e7..4ab16fb4 100644
--- a/dbt/models/staging/stg_usps_migration_union.sql
+++ b/dbt/models/staging/stg_usps_migration_union.sql
@@ -2,22 +2,22 @@
{% for year_ in years %}
select
- "YYYYMM" as yyyy_mm
- , "ZIPCODE" as zip_code
- , "CITY" as city
- , "STATE" as state_
- , "TOTAL_FROM_ZIP" as total_from_zip
- , "TOTAL_BUSINESS" as total_from_zip_business
- , "TOTAL_FAMILY" as total_from_zip_family
- , "TOTAL_INDIVIDUAL" as total_from_zip_individual
- , "TOTAL_PERM" as total_from_zip_perm
- , "TOTAL_TEMP" as total_from_zip_temp
- , "TOTAL_TO_ZIP" as total_to_zip
- , "TOTAL_BUSINESS_dup" as total_to_zip_business
- , "TOTAL_FAMILY_dup" as total_to_zip_family
- , "TOTAL_INDIVIDUAL_dup" as total_to_zip_individual
- , "TOTAL_PERM_dup" as total_to_zip_perm
- , "TOTAL_TEMP_dup" as total_to_zip_temp
+ to_date("YYYYMM", 'YYYYMM') as date_,
+ replace("ZIPCODE", '=', '') as zip_code,
+ "CITY" as city,
+ "STATE" as state_,
+ "TOTAL_FROM_ZIP" as total_from_zip,
+ "TOTAL_BUSINESS" as total_from_zip_business,
+ "TOTAL_FAMILY" as total_from_zip_family,
+ "TOTAL_INDIVIDUAL" as total_from_zip_individual,
+ "TOTAL_PERM" as total_from_zip_perm,
+ "TOTAL_TEMP" as total_from_zip_temp,
+ "TOTAL_TO_ZIP" as total_to_zip,
+ "TOTAL_BUSINESS_dup" as total_to_zip_business,
+ "TOTAL_FAMILY_dup" as total_to_zip_family,
+ "TOTAL_INDIVIDUAL_dup" as total_to_zip_individual,
+ "TOTAL_PERM_dup" as total_to_zip_perm,
+ "TOTAL_TEMP_dup" as total_to_zip_temp
from {{ source('minneapolis', 'usps_y' ~ year_) }}
{% if not loop.last %} union all {% endif %}
{% endfor %}
diff --git a/dbt/models/staging/stg_usps_migration_unpivot.sql b/dbt/models/staging/stg_usps_migration_unpivot.sql
index d8ba1c49..86dee67e 100644
--- a/dbt/models/staging/stg_usps_migration_unpivot.sql
+++ b/dbt/models/staging/stg_usps_migration_unpivot.sql
@@ -2,10 +2,7 @@
{% set usps_migration_flow_directions = ['from', 'to'] %}
with
-process_date as (
- select to_date(yyyy_mm, 'YYYYMM') as date_, *
- from {{ ref('stg_usps_migration_union') }}
-)
+usps_migration as (select * from {{ ref('stg_usps_migration_union') }})
{% for flow_direction in usps_migration_flow_directions %}
select
date_
@@ -13,7 +10,7 @@ process_date as (
, '{{ flow_direction }}' as flow_direction
, 'total' as flow_type
, total_{{ flow_direction }}_zip::int as flow_value
- from process_date
+ from usps_migration
union all
{% for flow_type in usps_migration_flow_types %}
select
@@ -22,7 +19,7 @@ process_date as (
, '{{ flow_direction }}' as flow_direction
, '{{ flow_type }}' as flow_type
, total_{{ flow_direction }}_zip_{{ flow_type }}::int as flow_value
- from process_date
+ from usps_migration
{% if not loop.last %} union all {% endif %}
{% endfor %}
{% if not loop.last %} union all {% endif %}
diff --git a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
index 80055cc2..42b97bef 100644
--- a/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
+++ b/dbt/models/tracts_model/intermediate/tracts_model_int__parcels_filtered.sql
@@ -8,7 +8,6 @@
with
census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered') }}),
parcels as (select * from {{ ref('parcels') }}),
-
parcels_tag as (select parcel_id as id, valid, geom from parcels),
census_tracts_tag as (select census_tract_id as id, valid, geom from census_tracts),
parcels_to_census_tracts as (
@@ -17,6 +16,16 @@ parcels_to_census_tracts as (
parent_id as census_tract_id
from {{ tag_regions("parcels_tag", "census_tracts_tag") }}
)
-
-select parcels.*, parcels_to_census_tracts.census_tract_id
+select
+ parcels.parcel_id,
+ parcels.pin,
+ parcels.valid,
+ parcels.emv_land,
+ parcels.emv_bldg,
+ parcels.emv_total,
+ parcels.year_built,
+ parcels.sale_date,
+ parcels.sale_value,
+ parcels.geom,
+ parcels_to_census_tracts.census_tract_id
from parcels join parcels_to_census_tracts using (parcel_id)
diff --git a/dbt/models/usps_migration.sql b/dbt/models/usps_migration.sql
index ed028cdc..d7b1fc73 100644
--- a/dbt/models/usps_migration.sql
+++ b/dbt/models/usps_migration.sql
@@ -2,24 +2,18 @@
config(
materialized='table',
indexes = [
- {'columns': ['date_', 'zip_code_id', 'flow_direction', 'flow_type'], 'unique': true},
+ {'columns': ['date_', 'zcta_id', 'flow_direction', 'flow_type'], 'unique': true},
]
)
}}
-{% set usps_migration_flow_types = ['business', 'family', 'individual', 'perm', 'temp'] %}
-{% set usps_migration_flow_directions = ['from', 'to'] %}
-
with
-usps_migration as (select * from {{ ref('stg_usps_migration_union') }}),
-zctas as (select * from {{ ref('zctas') }}),
-zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }})
+usps_migration as (select * from {{ ref('stg_usps_migration_add_zcta') }})
select
- usps_migration.*,
- zctas.zcta_id
-from
- usps_migration
- left join zip_codes_to_zctas using zip_code
- left join zctas
- on zip_codes_to_zctas.zcta = zctas.zcta and
- and usps_migration.date_ <@ zctas.valid
+ date_,
+ flow_direction,
+ flow_type,
+ zcta_id,
+ sum(flow_value) as flow_value
+from usps_migration
+group by 1,2,3,4
diff --git a/dbt/models/zip_codes_to_zctas.sql b/dbt/models/zip_codes_to_zctas.sql
index 84bffa64..9ac3a70f 100644
--- a/dbt/models/zip_codes_to_zctas.sql
+++ b/dbt/models/zip_codes_to_zctas.sql
@@ -1,2 +1,12 @@
+{{
+ config(
+ materialized='table',
+ indexes = [
+ {'columns': ['zip_code']},
+ {'columns': ['zcta']}
+ ]
+ )
+}}
+
select zip_code, zcta
from {{ source('minneapolis', 'zip_codes_zcta_xref') }}
From f3c2fc18920610101a9857d93c46a61461344039 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 15:04:24 -0400
Subject: [PATCH 124/142] refactor load_server to no longer require the
password as an env var
---
load_data_server/load_server.py | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/load_data_server/load_server.py b/load_data_server/load_server.py
index 8c3089e8..a68fad0d 100644
--- a/load_data_server/load_server.py
+++ b/load_data_server/load_server.py
@@ -32,7 +32,6 @@
HOST = os.getenv("HOST")
DATABASE = os.getenv("DATABASE")
USERNAME = os.getenv("USERNAME")
-PASSWORD = os.getenv("PASSWORD")
OGR2OGR_OPTS = [
"--config",
@@ -47,9 +46,7 @@
"-nlt",
"PROMOTE_TO_MULTI",
]
-DB_OPTS = [
- f"PG:dbname={DATABASE} host={HOST} user={USERNAME} password={PASSWORD} port=5432"
-]
+DB_OPTS = [f"PG:dbname={DATABASE} host={HOST} user={USERNAME} port=5432"]
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
@@ -59,9 +56,7 @@ def get_db_connection():
"""Create a database connection with retries."""
for attempt in range(MAX_RETRIES):
try:
- conn = psycopg2.connect(
- host=HOST, database=DATABASE, user=USERNAME, password=PASSWORD
- )
+ conn = psycopg2.connect(f"postgresql://{USERNAME}@{HOST}/{DATABASE}")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
return conn
except psycopg2.OperationalError as e:
From 76fa4a7bb21f35a85716ba2e01f3436600c375aa Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 15:31:53 -0400
Subject: [PATCH 125/142] handle one to many mapping problem
---
dbt/models/commercial_permits.sql | 13 +++++++++----
dbt/models/residential_permits.sql | 12 ++++++++----
2 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/dbt/models/commercial_permits.sql b/dbt/models/commercial_permits.sql
index 58961e23..755de463 100644
--- a/dbt/models/commercial_permits.sql
+++ b/dbt/models/commercial_permits.sql
@@ -11,14 +11,19 @@
with
stg_commercial_permits as (select * from {{ ref('stg_commercial_permits') }}),
stg_commercial_permits_to_parcels as (select * from {{ ref('stg_commercial_permits_to_parcels') }}),
+permits_to_first_parcel as (
+ select commercial_permit_id, min(parcel_id) as parcel_id
+ from stg_commercial_permits_to_parcels group by 1
+),
+
parcels as (select * from {{ ref('parcels') }})
select
stg_commercial_permits.*,
- stg_commercial_permits_to_parcels.parcel_id,
+ permits_to_first_parcel.parcel_id,
parcels.census_block_group_id,
parcels.census_tract_id,
- parcels.zip_code_id
+ parcels.zcta_id
from
stg_commercial_permits
- left join stg_commercial_permits_to_parcels using commercial_permit_id
- left join parcels using parcel_id
+ left join permits_to_first_parcel using (commercial_permit_id)
+ left join parcels using (parcel_id)
diff --git a/dbt/models/residential_permits.sql b/dbt/models/residential_permits.sql
index bcba6ab4..6613e374 100644
--- a/dbt/models/residential_permits.sql
+++ b/dbt/models/residential_permits.sql
@@ -11,14 +11,18 @@
with
stg_residential_permits as (select * from {{ ref('stg_residential_permits') }}),
stg_residential_permits_to_parcels as (select * from {{ ref('stg_residential_permits_to_parcels') }}),
+permits_to_first_parcel as (
+ select residential_permit_id, min(parcel_id) as parcel_id
+ from stg_residential_permits_to_parcels group by 1
+),
parcels as (select * from {{ ref('parcels') }})
select
stg_residential_permits.*,
- stg_residential_permits_to_parcels.parcel_id,
+ permits_to_first_parcel.parcel_id,
parcels.census_block_group_id,
parcels.census_tract_id,
- parcels.zip_code_id
+ parcels.zcta_id
from
stg_residential_permits
- left join stg_residential_permits_to_parcels using residential_permit_id
- left join parcels using parcel_id
+ left join permits_to_first_parcel using (residential_permit_id)
+ left join parcels using (parcel_id)
From 85d8325da2a3de4e0043a78a81e74503fff94e45 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 15:32:06 -0400
Subject: [PATCH 126/142] add more documentation
---
dbt/models/docs.md | 55 +++++++++++++++++++++++++++++++++++++++++++
dbt/models/schema.yml | 17 +++++++++++++
2 files changed, 72 insertions(+)
diff --git a/dbt/models/docs.md b/dbt/models/docs.md
index 2342912d..fd74fa38 100644
--- a/dbt/models/docs.md
+++ b/dbt/models/docs.md
@@ -5,6 +5,10 @@ Contains commercial building permit applications.
Notes:
- Permits are filtered to only include those in Minneapolis.
- `square_feet` is treated as missing if it is 0.
+ - When mapping permits to parcels, if more than one parcel contains the permit
+ location, a parcel will be chosen arbitrarily. This can happen because the
+ same parcel spatial extent can appear multiple times with different PINs, to
+ represent e.g. units in a condominium.
{% enddocs %}
@@ -16,6 +20,16 @@ Notes:
- Permits are filtered to only include those in Minneapolis.
- `square_feet` is treated as missing if it is 0.
- `permit_value` is treated as missing if it is 0.
+ - If more than one parcel contains the permit location, a parcel is selected
+ arbitrarily. See `commercial_permits`.
+
+{% enddocs %}
+
+{% docs parking %}
+
+Notes:
+ - If more than one parcel contains the permit location, a parcel is selected
+ arbitrarily. See `commercial_permits`.
{% enddocs %}
@@ -127,3 +141,44 @@ some zip codes do not find a match in our zip to ZCTA mapping table, so there is
some missingness in this data.
{% enddocs %}
+
+{% docs demographics %}
+
+Contains demographic data at census tract granularity.
+Combines ACS data and segregation indexes in one table.
+
+Notes:
+- Fills in missing demographic data from 2011 and 2012 with data from 2013.
+- Replaces pandemic-affected data from 2020 with data from 2019.
+
+{% enddocs %}
+
+{% docs neighborhoods %}
+
+Neighborhood boundaries in the city of Minneapolis.
+
+{% enddocs %}
+
+{% docs wards %}
+
+Ward boundaries in the city of Minneapolis.
+
+{% enddocs %}
+
+{% docs university %}
+
+Boundary of the University of Minnesota.
+
+{% enddocs %}
+
+{% docs downtown %}
+
+Boundary of the downtown of Minneapolis.
+
+{% enddocs %}
+
+{% docs city_boundary %}
+
+Boundary of the city of Minneapolis.
+
+{% enddocs %}
diff --git a/dbt/models/schema.yml b/dbt/models/schema.yml
index d807cb4a..e3948f2d 100644
--- a/dbt/models/schema.yml
+++ b/dbt/models/schema.yml
@@ -123,6 +123,21 @@ models:
- name: high_frequency_transit_lines
description: '{{ doc("high_frequency_transit_lines") }}'
+ - name: demographics
+ description: '{{ doc("demographics") }}'
+
+ - name: university
+ description: '{{ doc("university") }}'
+
+ - name: downtown
+ description: '{{ doc("downtown") }}'
+
+ - name: city_boundary
+ description: '{{ doc("city_boundary") }}'
+
+ - name: parking
+ description: '{{ doc("parking") }}'
+
- name: segregation_indexes
description: '{{ doc("segregation_indexes") }}'
data_tests:
@@ -198,6 +213,7 @@ models:
- unique
- name: neighborhoods
+ description: '{{ doc("neighborhoods") }}'
columns:
- name: neighborhood_id
data_tests:
@@ -205,6 +221,7 @@ models:
- unique
- name: wards
+ description: '{{ doc("wards") }}'
columns:
- name: ward_id
data_tests:
From e52b63785c0c6bb1a587ef20472949e32839a8fd Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 16:12:23 -0400
Subject: [PATCH 127/142] materialize for performance
---
dbt/models/staging/stg_usps_migration_add_zcta.sql | 6 ++++++
dbt/models/staging/stg_usps_migration_unpivot.sql | 6 ++++++
2 files changed, 12 insertions(+)
diff --git a/dbt/models/staging/stg_usps_migration_add_zcta.sql b/dbt/models/staging/stg_usps_migration_add_zcta.sql
index 4097c6b2..2b45f38e 100644
--- a/dbt/models/staging/stg_usps_migration_add_zcta.sql
+++ b/dbt/models/staging/stg_usps_migration_add_zcta.sql
@@ -1,3 +1,9 @@
+{{
+ config(
+ materialized='table'
+ )
+}}
+
with
usps_migration as (select * from {{ ref('stg_usps_migration_unpivot') }}),
zctas as (select * from {{ ref('zctas') }}),
diff --git a/dbt/models/staging/stg_usps_migration_unpivot.sql b/dbt/models/staging/stg_usps_migration_unpivot.sql
index 86dee67e..5f358c4b 100644
--- a/dbt/models/staging/stg_usps_migration_unpivot.sql
+++ b/dbt/models/staging/stg_usps_migration_unpivot.sql
@@ -1,3 +1,9 @@
+{{
+ config(
+ materialized='table'
+ )
+}}
+
{% set usps_migration_flow_types = ['business', 'family', 'individual', 'perm', 'temp'] %}
{% set usps_migration_flow_directions = ['from', 'to'] %}
From 744d8b6160e8a8bcbb3667dab8df12d925343dd5 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Wed, 4 Sep 2024 16:14:37 -0400
Subject: [PATCH 128/142] add .env
---
.env | 6 ++++++
1 file changed, 6 insertions(+)
create mode 100644 .env
diff --git a/.env b/.env
new file mode 100644
index 00000000..f143c46d
--- /dev/null
+++ b/.env
@@ -0,0 +1,6 @@
+GOOGLE_CLOUD_PROJECT=cities-429602
+GOOGLE_CLOUD_BUCKET=minneapolis-basis
+SCHEMA=minneapolis
+HOST=34.123.100.76
+DATABASE=cities
+USERNAME=postgres
From 266d24fea609e0058453d35c84c82e3f6fa2ee34 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 5 Sep 2024 13:39:00 -0400
Subject: [PATCH 129/142] add mean and median parcel areas per tract
---
.../tracts_model/intermediate/census_tracts_parcel_area.sql | 4 +++-
dbt/models/tracts_model/tracts_model__census_tracts.sql | 4 +++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
index cb6760fe..1f4216e7 100644
--- a/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
+++ b/dbt/models/tracts_model/intermediate/census_tracts_parcel_area.sql
@@ -3,7 +3,9 @@ census_tracts as (select * from {{ ref('tracts_model_int__census_tracts_filtered
parcels as (select * from {{ ref('tracts_model_int__parcels_filtered') }})
select
census_tract_id,
- sum(st_area(parcels.geom)) as parcel_sqm
+ sum(st_area(parcels.geom)) as parcel_sqm,
+ avg(st_area(parcels.geom)) as parcel_mean_sqm,
+ {{ median('st_area(parcels.geom)') }} as parcel_median_sqm
from
census_tracts left join parcels using (census_tract_id)
group by 1
diff --git a/dbt/models/tracts_model/tracts_model__census_tracts.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
index 8d584472..aa6ba710 100644
--- a/dbt/models/tracts_model/tracts_model__census_tracts.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -44,7 +44,9 @@ select
, property_values.median_value
, distance_to_transit.median_distance_to_transit as median_distance
, distance_to_transit.mean_distance_to_transit as mean_distance
- , parcel_area.parcel_sqm
+ , parcel_area.parcel_sqm::double precision
+ , parcel_area.parcel_mean_sqm::double precision
+ , parcel_area.parcel_median_sqm::double precision
, parking_limits.mean_limit::double precision
, white_frac.value_ as white
, income.value_ as income
From f4a5af4d82917c4734199147737e24f4430c46a6 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 5 Sep 2024 13:41:31 -0400
Subject: [PATCH 130/142] forgot to standardize new columns
---
dbt/models/tracts_model/tracts_model__census_tracts.sql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/dbt/models/tracts_model/tracts_model__census_tracts.sql b/dbt/models/tracts_model/tracts_model__census_tracts.sql
index aa6ba710..0e7e1ea4 100644
--- a/dbt/models/tracts_model/tracts_model__census_tracts.sql
+++ b/dbt/models/tracts_model/tracts_model__census_tracts.sql
@@ -68,7 +68,8 @@ select
, {{ standardize_cat(['year']) }}
, {{ standardize_cont(['housing_units', 'total_value', 'median_value',
'median_distance', 'mean_distance', 'parcel_sqm',
- 'white', 'income', 'mean_limit', 'segregation' ]) }}
+ 'parcel_mean_sqm', 'parcel_median_sqm', 'white',
+ 'income', 'mean_limit', 'segregation' ]) }}
from
raw_data
)
From 1babc51484e3b65caddd9e4a264fcfdef4d6946a Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Thu, 5 Sep 2024 15:52:00 -0400
Subject: [PATCH 131/142] add temp high frequency transit lines endpoint
---
api/schema.sql | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 44 insertions(+), 5 deletions(-)
diff --git a/api/schema.sql b/api/schema.sql
index 42fd4e4c..2285c2b7 100644
--- a/api/schema.sql
+++ b/api/schema.sql
@@ -1,3 +1,4 @@
+begin;
drop schema if exists api cascade;
create schema api;
@@ -10,9 +11,37 @@ create view api.census_tracts as (
select * from api__census_tracts
);
-create view api.high_frequency_transit_lines as (
- select * from api__high_frequency_transit_lines
-);
+create function api.high_frequency_transit_lines() returns setof dev.api__high_frequency_transit_lines as $$
+ select * from dev.api__high_frequency_transit_lines
+$$ language sql;
+
+create function api.high_frequency_transit_lines(
+ blue_zone_radius double precision,
+ yellow_zone_line_radius double precision,
+ yellow_zone_stop_radius double precision
+) returns table (
+ valid daterange,
+ geom geometry(LineString, 4269),
+ blue_zone_geom geometry(LineString, 4269),
+ yellow_zone_geom geometry(Geometry, 4269)
+) as $$
+ with
+ lines as (select * from dev.stg_high_frequency_transit_lines_union),
+ stops as (select * from dev.high_frequency_transit_stops),
+ lines_and_stops as (
+ select
+ lines.valid * stops.valid as valid,
+ lines.geom as line_geom,
+ stops.geom as stop_geom
+ from lines inner join stops on lines.valid && stops.valid
+ )
+ select
+ valid,
+ st_transform(line_geom, 4269) as geom,
+ st_transform(st_buffer(line_geom, blue_zone_radius), 4269) as blue_zone_geom,
+ st_transform(st_union(st_buffer(line_geom, yellow_zone_line_radius), st_buffer(stop_geom, yellow_zone_stop_radius)), 4269) as yellow_zone_geom
+ from lines_and_stops
+$$ language sql;
do $$
begin
@@ -21,8 +50,18 @@ exception when duplicate_object then raise notice '%, skipping', sqlerrm using e
end
$$;
-grant usage on schema public to web_anon;
+grant all on schema public to web_anon;
+grant all on schema dev to web_anon;
grant select on table public.spatial_ref_sys TO web_anon;
grant usage on schema api to web_anon;
-grant select on all tables in schema api to web_anon;
+grant all on all tables in schema api to web_anon;
+grant all on all functions in schema api to web_anon;
+grant all on schema api to web_anon;
+GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA dev TO web_anon;
+GRANT ALL PRIVILEGES ON ALL functions IN SCHEMA dev TO web_anon;
+GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA api TO web_anon;
+GRANT ALL PRIVILEGES ON ALL functions IN SCHEMA api TO web_anon;
+GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO web_anon;
+GRANT ALL PRIVILEGES ON ALL functions IN SCHEMA public TO web_anon;
grant web_anon to postgres;
+commit;
From 801b7a76a4ebb3d505756d1288876fe73728b795 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 6 Sep 2024 12:41:29 -0400
Subject: [PATCH 132/142] add more documentation to tracts model tables
---
dbt/models/tracts_model/docs.md | 92 ++++++++++++++++++++++++++++++
dbt/models/tracts_model/schema.yml | 51 +++++++++++++++++
2 files changed, 143 insertions(+)
create mode 100644 dbt/models/tracts_model/docs.md
create mode 100644 dbt/models/tracts_model/schema.yml
diff --git a/dbt/models/tracts_model/docs.md b/dbt/models/tracts_model/docs.md
new file mode 100644
index 00000000..a4a3371e
--- /dev/null
+++ b/dbt/models/tracts_model/docs.md
@@ -0,0 +1,92 @@
+{% docs tracts_model_int__census_tracts_filtered %}
+
+Intermediate table that selects census tracts of interest. Considers only tracts
+in the city boundary (tracts must intersect boundary and have at least 90% of
+area overlapping) and only for years 2011 to 2020.
+
+Notes:
+- Census tracts for 2020 are replaced with tracts for 2019. This requires
+ retagging parcels and other spatial entities, because the `census_tract_id`
+ changes with the replacement.
+
+{% enddocs %}
+
+{% docs tracts_model_int__parcels_filtered %}
+
+Retag parcels to account for tract replacement. This also has the effect of
+filtering parcels to the considered tracts.
+
+{% enddocs %}
+
+{% docs census_tracts_distance_to_transit %}
+
+Aggregate `parcels_distance_to_transit` by tract.
+
+{% enddocs %}
+
+{% docs census_tracts_housing_units %}
+
+Aggregate number of units built by tract. Unit data is drawn from
+`residential_permits`.
+
+{% enddocs %}
+
+{% docs census_tracts_parcel_area %}
+
+Aggregate parcel area by tract. Area is computed from the parcel geometry, not
+from the area included in the parcel dataset.
+
+{% enddocs %}
+
+{% docs census_tracts_parking_limits %}
+
+Parking limits aggregated by tract.
+
+{% enddocs %}
+
+{% docs parcels_distance_to_transit %}
+
+Distance from a parcel to the nearest transit (line or stop). This is the
+smallest distance from the parcel geometry to the line geometry, not from the
+parcel centroid.
+
+{% enddocs %}
+
+{% docs parcels_parking_limits %}
+
+Parking limits by parcel. The parking limit is a function of the distance from
+the parcel to the nearest transit line/transit stop.
+
+Notes:
+- Parcels in all years that intersect (any level of intersection) the downtown
+ area have the limit eliminated.
+- Parcels before 2015 have the full limit.
+- Parcels after 2015 and in the blue zone have the limit eliminated.
+- Parcels after 2015 and in the yellow zone have the limit reduced.
+
+{% enddocs %}
+
+{% docs census_tracts_property_values %}
+
+Total and median property value aggregated by tract. Uses total estimated market
+value from the parcel dataset.
+
+{% enddocs %}
+
+{% docs tracts_model__census_tracts %}
+
+Wide table that joins various census tract level aggregates.
+
+Notes:
+- Continuous columns are standardized by default. Categorical columns are
+ remapped to [0, |D|), where D is the domain. The original value of a column
+ `c` is called `c_original`.
+- Demographic variables are drawn from ACS tract level data.
+
+{% enddocs %}
+
+{% docs tracts_model__parcels %}
+
+Parcels filtered by the considered census tracts, with additional data.
+
+{% enddocs %}
diff --git a/dbt/models/tracts_model/schema.yml b/dbt/models/tracts_model/schema.yml
new file mode 100644
index 00000000..250d415e
--- /dev/null
+++ b/dbt/models/tracts_model/schema.yml
@@ -0,0 +1,51 @@
+models:
+ - name: tracts_model_int__census_tracts_filtered
+ description: '{{ doc("tracts_model_int__census_tracts_filtered") }}'
+
+ - name: tracts_model_int__parcels_filtered
+ description: '{{ doc("tracts_model_int__parcels_filtered") }}'
+
+ - name: tracts_model__census_tracts
+ description: '{{ doc("tracts_model__census_tracts") }}'
+ columns:
+ - name: segregation
+ description: Segregation with respect to the annual city distribution.
+ - name: white
+ description: The proportion of white people in the tract, not the absolute number.
+ - name: income
+ description: Median household income in the tract.
+ - name: median_distance
+ description: Median parcel distance to transit in meters.
+ - name: mean_distance
+ description: Mean parcel distance to transit in meters.
+
+ - name: tracts_model__parcels
+ description: '{{ doc("tracts_model__parcels") }}'
+ columns:
+ - name: distance_to_transit
+ description: Minimum distance to transit (lines or stops) in meters.
+ - name: limit_con
+ description: Numeric representation of parking limit (1 for full, 0 for eliminated, 0.5 for reduced).
+ - name: downtown_yn
+ description: Whether the parcel intersects the downtown area.
+
+ - name: census_tracts_distance_to_transit
+ description: '{{ doc("census_tracts_distance_to_transit") }}'
+
+ - name: census_tracts_housing_units
+ description: '{{ doc("census_tracts_housing_units") }}'
+
+ - name: census_tracts_parcel_area
+ description: '{{ doc("census_tracts_parcel_area") }}'
+
+ - name: census_tracts_parking_limits
+ description: '{{ doc("census_tracts_parking_limits") }}'
+
+ - name: parcels_distance_to_transit
+ description: '{{ doc("parcels_distance_to_transit") }}'
+
+ - name: parcels_parking_limits
+ description: '{{ doc("parcels_parking_limits") }}'
+
+ - name: census_tracts_property_values
+ description: '{{ doc("census_tracts_property_values") }}'
From 85eac69d160644e8fbed57282940058ce920daab Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 6 Sep 2024 12:51:19 -0400
Subject: [PATCH 133/142] remove spurious points from transit lines data
---
.../stg_high_frequency_transit_lines_union.sql | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/dbt/models/staging/stg_high_frequency_transit_lines_union.sql b/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
index 8b361587..56d4b680 100644
--- a/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
+++ b/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
@@ -1,21 +1,24 @@
-with lines_2015 as (
+with
+lines_2015 as (
select
st_union(st_transform(geom, {{ var("srid") }})) as geom
from
{{ source('minneapolis', 'high_frequency_transit_2015_freq_lines') }}
-)
-, lines_2016 as (
+ where st_geometrytype(geom) = 'ST_LineString'
+),
+lines_2016 as (
select
st_union(st_transform(geom, {{ var("srid") }})) as geom
from
{{ source('minneapolis', 'high_frequency_transit_2016_freq_lines') }}
+ where st_geometrytype(geom) = 'ST_LineString'
)
select
- '(,2016-01-01)'::daterange as valid,
- geom
+ '(,2016-01-01)'::daterange as valid,
+ geom
from lines_2015
union all
select
- '[2016-01-01,)'::daterange as valid,
- geom
+ '[2016-01-01,)'::daterange as valid,
+ geom
from lines_2016
From f512ec036ac0201a425432b80c9e62a4b63cf829 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 6 Sep 2024 14:22:50 -0400
Subject: [PATCH 134/142] fix dropped model
---
dbt/models/staging/stg_fair_market_rents_add_zcta.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dbt/models/staging/stg_fair_market_rents_add_zcta.sql b/dbt/models/staging/stg_fair_market_rents_add_zcta.sql
index 30bee443..de2fdcba 100644
--- a/dbt/models/staging/stg_fair_market_rents_add_zcta.sql
+++ b/dbt/models/staging/stg_fair_market_rents_add_zcta.sql
@@ -1,6 +1,6 @@
with
stg_fair_market_rents_unpivot as (
- select * from {{ ref('stg_fair_market_rents_unpivot') }}
+ select * from {{ ref('stg_fair_market_rents_dedup') }}
),
zip_codes_to_zctas as (select * from {{ ref('zip_codes_to_zctas') }}),
zctas as (select * from {{ ref('zctas') }})
From 3d5ccd11ba9aa6aa25ac6f493afc23b2edeb46d0 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 6 Sep 2024 14:26:03 -0400
Subject: [PATCH 135/142] remove unused config
---
.pg_format | 2 --
1 file changed, 2 deletions(-)
delete mode 100644 .pg_format
diff --git a/.pg_format b/.pg_format
deleted file mode 100644
index 2a3c25bb..00000000
--- a/.pg_format
+++ /dev/null
@@ -1,2 +0,0 @@
-keyword-case=1
-comma=start
\ No newline at end of file
From ec72e2ca824beb97e4bfc9aa0de81478acaa2edb Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 6 Sep 2024 14:26:23 -0400
Subject: [PATCH 136/142] drop api from pr
---
api/main.py | 78 ---------------------------------
api/postgrest.conf | 107 ---------------------------------------------
api/schema.sql | 67 ----------------------------
3 files changed, 252 deletions(-)
delete mode 100644 api/main.py
delete mode 100644 api/postgrest.conf
delete mode 100644 api/schema.sql
diff --git a/api/main.py b/api/main.py
deleted file mode 100644
index ca217e29..00000000
--- a/api/main.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import os
-
-from fastapi import FastAPI, Depends
-import psycopg2
-
-# from cities.deployment.tracts_minneapolis.predict import TractsModelPredictor
-
-USERNAME = os.getenv("USERNAME")
-PASSWORD = os.getenv("PASSWORD")
-HOST = os.getenv("HOST")
-DATABASE = os.getenv("DATABASE")
-
-app = FastAPI()
-
-
-def get_db() -> psycopg2.extensions.connection:
- db = psycopg2.connect(
- host=HOST, database=DATABASE, user=USERNAME, password=PASSWORD
- )
- try:
- yield db
- finally:
- db.close()
-
-
-# def get_predictor(
-# db: psycopg2.extensions.connection = Depends(get_db),
-# ) -> TractsModelPredictor:
-# return TractsModelPredictor(db)
-
-
-@app.get("/demographics")
-async def read_demographics(category: str, db=Depends(get_db)):
- cur = db.cursor()
- cur.execute("select * from api__demographics where description = %s", (category,))
- return cur.fetchall()
-
-
-@app.get("/census_tracts")
-async def read_census_tracts(year: int, db=Depends(get_db)):
- cur = db.cursor()
- cur.execute(
- """
- with census_tracts as (
- select census_tract, geom from api__census_tracts
- where year_ = %s
- )
- select json_build_object('type', 'FeatureCollection', 'features', json_agg(ST_AsGeoJSON(census_tracts.*)::json))
- from census_tracts
- """,
- (year,),
- )
- return cur.fetchall()
-
-
-@app.get("/high_frequency_transit_lines")
-async def read_census_tracts(year: int, db=Depends(get_db)):
- cur = db.cursor()
- cur.execute(
- """
- with census_tracts as (
- select census_tract, geom from api__census_tracts
- where year_ = %s
- )
- select json_build_object('type', 'FeatureCollection', 'features', json_agg(ST_AsGeoJSON(census_tracts.*)::json))
- from census_tracts
- """,
- (year,),
- )
- return cur.fetchall()
-
-
-# @app.get("/predict")
-# async def read_predict(
-# samples=100, predictor: TractsModelPredictor = Depends(get_predictor)
-# ):
-# result = predictor.predict(samples=samples)
-# return result.tolist()
diff --git a/api/postgrest.conf b/api/postgrest.conf
deleted file mode 100644
index ddb71965..00000000
--- a/api/postgrest.conf
+++ /dev/null
@@ -1,107 +0,0 @@
-## Admin server used for checks. It's disabled by default unless a port is specified.
-# admin-server-port = 3001
-
-## The database role to use when no client authentication is provided
-db-anon-role = "web_anon"
-
-## Notification channel for reloading the schema cache
-db-channel = "pgrst"
-
-## Enable or disable the notification channel
-db-channel-enabled = true
-
-## Enable in-database configuration
-db-config = true
-
-## Function for in-database configuration
-## db-pre-config = "postgrest.pre_config"
-
-## Extra schemas to add to the search_path of every request
-db-extra-search-path = "public"
-
-## Limit rows in response
-# db-max-rows = 1000
-
-## Allow getting the EXPLAIN plan through the `Accept: application/vnd.pgrst.plan` header
-# db-plan-enabled = false
-
-## Number of open connections in the pool
-db-pool = 10
-
-## Time in seconds to wait to acquire a slot from the connection pool
-# db-pool-acquisition-timeout = 10
-
-## Time in seconds after which to recycle pool connections
-# db-pool-max-lifetime = 1800
-
-## Time in seconds after which to recycle unused pool connections
-# db-pool-max-idletime = 30
-
-## Allow automatic database connection retrying
-# db-pool-automatic-recovery = true
-
-## Stored proc to exec immediately after auth
-# db-pre-request = "stored_proc_name"
-
-## Enable or disable prepared statements. disabling is only necessary when behind a connection pooler.
-## When disabled, statements will be parametrized but won't be prepared.
-db-prepared-statements = true
-
-## The name of which database schema to expose to REST clients
-db-schemas = "api"
-
-## How to terminate database transactions
-## Possible values are:
-## commit (default)
-## Transaction is always committed, this can not be overriden
-## commit-allow-override
-## Transaction is committed, but can be overriden with Prefer tx=rollback header
-## rollback
-## Transaction is always rolled back, this can not be overriden
-## rollback-allow-override
-## Transaction is rolled back, but can be overriden with Prefer tx=commit header
-db-tx-end = "commit"
-
-## The standard connection URI format, documented at
-## https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
-db-uri = "postgresql://postgres@34.123.100.76:5432/cities"
-
-# jwt-aud = "your_audience_claim"
-
-## Jspath to the role claim key
-jwt-role-claim-key = ".role"
-
-## Choose a secret, JSON Web Key (or set) to enable JWT auth
-## (use "@filename" to load from separate file)
-# jwt-secret = "secret_with_at_least_32_characters"
-jwt-secret-is-base64 = false
-
-## Enables and set JWT Cache max lifetime, disables caching with 0
-# jwt-cache-max-lifetime = 0
-
-## Logging level, the admitted values are: crit, error, warn, info and debug.
-log-level = "error"
-
-## Determine if the OpenAPI output should follow or ignore role privileges or be disabled entirely.
-## Admitted values: follow-privileges, ignore-privileges, disabled
-openapi-mode = "follow-privileges"
-
-## Base url for the OpenAPI output
-openapi-server-proxy-uri = ""
-
-## Configurable CORS origins
-# server-cors-allowed-origins = ""
-
-server-host = "!4"
-server-port = 3001
-
-## Allow getting the request-response timing information through the `Server-Timing` header
-server-timing-enabled = true
-
-## Unix socket location
-## if specified it takes precedence over server-port
-# server-unix-socket = "/tmp/pgrst.sock"
-
-## Unix socket file mode
-## When none is provided, 660 is applied by default
-# server-unix-socket-mode = "660"
diff --git a/api/schema.sql b/api/schema.sql
deleted file mode 100644
index 2285c2b7..00000000
--- a/api/schema.sql
+++ /dev/null
@@ -1,67 +0,0 @@
-begin;
-drop schema if exists api cascade;
-
-create schema api;
-
-create view api.demographics as (
- select * from api__demographics
-);
-
-create view api.census_tracts as (
- select * from api__census_tracts
-);
-
-create function api.high_frequency_transit_lines() returns setof dev.api__high_frequency_transit_lines as $$
- select * from dev.api__high_frequency_transit_lines
-$$ language sql;
-
-create function api.high_frequency_transit_lines(
- blue_zone_radius double precision,
- yellow_zone_line_radius double precision,
- yellow_zone_stop_radius double precision
-) returns table (
- valid daterange,
- geom geometry(LineString, 4269),
- blue_zone_geom geometry(LineString, 4269),
- yellow_zone_geom geometry(Geometry, 4269)
-) as $$
- with
- lines as (select * from dev.stg_high_frequency_transit_lines_union),
- stops as (select * from dev.high_frequency_transit_stops),
- lines_and_stops as (
- select
- lines.valid * stops.valid as valid,
- lines.geom as line_geom,
- stops.geom as stop_geom
- from lines inner join stops on lines.valid && stops.valid
- )
- select
- valid,
- st_transform(line_geom, 4269) as geom,
- st_transform(st_buffer(line_geom, blue_zone_radius), 4269) as blue_zone_geom,
- st_transform(st_union(st_buffer(line_geom, yellow_zone_line_radius), st_buffer(stop_geom, yellow_zone_stop_radius)), 4269) as yellow_zone_geom
- from lines_and_stops
-$$ language sql;
-
-do $$
-begin
-create role web_anon nologin;
-exception when duplicate_object then raise notice '%, skipping', sqlerrm using errcode = sqlstate;
-end
-$$;
-
-grant all on schema public to web_anon;
-grant all on schema dev to web_anon;
-grant select on table public.spatial_ref_sys TO web_anon;
-grant usage on schema api to web_anon;
-grant all on all tables in schema api to web_anon;
-grant all on all functions in schema api to web_anon;
-grant all on schema api to web_anon;
-GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA dev TO web_anon;
-GRANT ALL PRIVILEGES ON ALL functions IN SCHEMA dev TO web_anon;
-GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA api TO web_anon;
-GRANT ALL PRIVILEGES ON ALL functions IN SCHEMA api TO web_anon;
-GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO web_anon;
-GRANT ALL PRIVILEGES ON ALL functions IN SCHEMA public TO web_anon;
-grant web_anon to postgres;
-commit;
From 8a76e97fe39d9d160b059d38487d69dec547e1c8 Mon Sep 17 00:00:00 2001
From: Jack Feser
Date: Fri, 6 Sep 2024 14:47:14 -0400
Subject: [PATCH 137/142] select correct geometries
---
dbt/models/staging/stg_high_frequency_transit_lines_union.sql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/dbt/models/staging/stg_high_frequency_transit_lines_union.sql b/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
index 56d4b680..4de6bbdb 100644
--- a/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
+++ b/dbt/models/staging/stg_high_frequency_transit_lines_union.sql
@@ -4,14 +4,14 @@ lines_2015 as (
st_union(st_transform(geom, {{ var("srid") }})) as geom
from
{{ source('minneapolis', 'high_frequency_transit_2015_freq_lines') }}
- where st_geometrytype(geom) = 'ST_LineString'
+ where st_geometrytype(geom) = 'ST_MultiLineString'
),
lines_2016 as (
select
st_union(st_transform(geom, {{ var("srid") }})) as geom
from
{{ source('minneapolis', 'high_frequency_transit_2016_freq_lines') }}
- where st_geometrytype(geom) = 'ST_LineString'
+ where st_geometrytype(geom) = 'ST_MultiLineString'
)
select
'(,2016-01-01)'::daterange as valid,
From d37849a7da698c6d320bfaa3fc173f5526deab2b Mon Sep 17 00:00:00 2001
From: rfl-urbaniak
Date: Mon, 9 Sep 2024 06:22:54 -0400
Subject: [PATCH 138/142] update .gitignore from ru-tracts-model
---
.gitignore | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/.gitignore b/.gitignore
index afc863d2..bbeb945f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,18 @@ tests/.coverage
*.DS_Store
.vscode/launch.json
+data/sql/counties_database.db
+data/sql/msa_database.db
+.Rproj.user
+**/*.RData
+**/*.Rhistory
+
+# data
+data/minneapolis/processed/values_long.csv
+data/minneapolis/processed/values_with_parking.csv
+data/minneapolis/sourced/demographic/**
+data/minneapolis/preds/**
+data/minneapolis/sourced/parcel_to_census_tract_mappings/**
+data/minneapolis/sourced/parcel_to_parking_info_mappings/**
+
+data/minneapolis/.pgpass
From 6e2a361c71e00201b4546245f82cf20ca1c3a661 Mon Sep 17 00:00:00 2001
From: rfl-urbaniak
Date: Mon, 9 Sep 2024 06:33:48 -0400
Subject: [PATCH 139/142] new format lint
---
cities/modeling/model_interactions.py | 2 +-
cities/modeling/modeling_utils.py | 2 +-
cities/queries/causal_insight.py | 5 +++--
scripts/clean.sh | 22 ++++------------------
scripts/lint.sh | 14 +++++---------
5 files changed, 14 insertions(+), 31 deletions(-)
diff --git a/cities/modeling/model_interactions.py b/cities/modeling/model_interactions.py
index 8232410f..2446d6d5 100644
--- a/cities/modeling/model_interactions.py
+++ b/cities/modeling/model_interactions.py
@@ -3,10 +3,10 @@
from typing import Optional
import dill
+import pyro
import pyro.distributions as dist
import torch
-import pyro
from cities.modeling.modeling_utils import (
prep_wide_data_for_inference,
train_interactions_model,
diff --git a/cities/modeling/modeling_utils.py b/cities/modeling/modeling_utils.py
index 966a0ba5..55aaccc6 100644
--- a/cities/modeling/modeling_utils.py
+++ b/cities/modeling/modeling_utils.py
@@ -2,13 +2,13 @@
import matplotlib.pyplot as plt
import pandas as pd
+import pyro
import torch
from pyro.infer import SVI, Trace_ELBO
from pyro.infer.autoguide import AutoNormal
from pyro.optim import Adam # type: ignore
from scipy.stats import spearmanr
-import pyro
from cities.utils.data_grabber import (
DataGrabber,
list_available_features,
diff --git a/cities/queries/causal_insight.py b/cities/queries/causal_insight.py
index 187855ea..7a7a7e98 100644
--- a/cities/queries/causal_insight.py
+++ b/cities/queries/causal_insight.py
@@ -5,10 +5,10 @@
import numpy as np
import pandas as pd
import plotly.graph_objects as go
+import pyro
import torch
from sklearn.preprocessing import StandardScaler
-import pyro
from cities.modeling.model_interactions import model_cities_interaction
from cities.modeling.modeling_utils import prep_wide_data_for_inference
from cities.utils.cleaning_utils import (
@@ -576,7 +576,8 @@ def estimate_ATE(self):
label=f"mean = {tau_samples.mean():.3f}",
)
plt.title(
- f"ATE for {self.intervention_dataset} and {self.outcome_dataset} with forward shift = {self.forward_shift}"
+ f"ATE for {self.intervention_dataset} and {self.outcome_dataset} "
+ f"with forward shift = {self.forward_shift}"
)
plt.ylabel("counts")
plt.xlabel("ATE")
diff --git a/scripts/clean.sh b/scripts/clean.sh
index 6918545f..898f2e55 100755
--- a/scripts/clean.sh
+++ b/scripts/clean.sh
@@ -1,22 +1,8 @@
#!/bin/bash
set -euxo pipefail
-# isort suspended as conflicting with black
-# nbqa isort docs/guides/
-
-
-# this sometimes conflicts with black but does some
-# preliminary import sorting
-# and is then overriden by black
-isort cities/ tests/
-
-black ./cities/ ./tests/ ./docs/guides/
-
-black docs/guides/
-
+isort --profile="black" cities/ tests/
autoflake --remove-all-unused-imports --in-place --recursive ./cities ./tests
-
-nbqa autoflake --nbqa-shell --remove-all-unused-imports --recursive --in-place docs/guides/
-
-#nbqa black docs/guides/
-
+nbqa --nbqa-shell isort --profile="black" docs/guides/
+nbqa --nbqa-shell autoflake --nbqa-shell --remove-all-unused-imports --recursive --in-place docs/guides/
+black ./cities ./tests docs/guides/
diff --git a/scripts/lint.sh b/scripts/lint.sh
index 538aeeb1..2015aa76 100755
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -1,12 +1,8 @@
#!/bin/bash
set -euxo pipefail
-mypy --ignore-missing-imports cities/
-#isort --check --diff cities/ tests/
-black --check cities/ tests/
-flake8 cities/ tests/ --ignore=E203,W503 --max-line-length=127
-
-
-nbqa autoflake -v --recursive --check docs/guides/
-#nbqa isort --check docs/guides/
-nbqa black --check docs/guides/
+mypy --ignore-missing-imports cities/ tests/
+isort --check --profile="black" --diff cities/ tests/
+black --check cities/ tests/ docs/guides/
+flake8 cities/ tests/
+nbqa --nbqa-shell autoflake --nbqa-shell --recursive --check docs/guides/
From 5cf1ca579e55fd8698ec289e723ec5b563106d6b Mon Sep 17 00:00:00 2001
From: rfl-urbaniak
Date: Mon, 9 Sep 2024 06:53:45 -0400
Subject: [PATCH 140/142] pin pyro to pass inference test
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 3f14029a..1b6a832d 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
]
DEV_REQUIRES = [
- "pyro-ppl>=1.8.5",
+ "pyro-ppl=1.8.5",
"torch",
"plotly.express",
"scipy",
From e79617c50e1203d68bf2ac1a40a393e81cd27a55 Mon Sep 17 00:00:00 2001
From: rfl-urbaniak
Date: Mon, 9 Sep 2024 07:02:46 -0400
Subject: [PATCH 141/142] getting inference test to work
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 1b6a832d..6d4e13aa 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
]
DEV_REQUIRES = [
- "pyro-ppl=1.8.5",
+ "pyro-ppl==1.8.5",
"torch",
"plotly.express",
"scipy",
From 348bc0086e7d27c0579e6a906418793ecda3a327 Mon Sep 17 00:00:00 2001
From: rfl-urbaniak
Date: Mon, 9 Sep 2024 07:46:29 -0400
Subject: [PATCH 142/142] add seaborn to setup to pass notebook tests
---
docs/guides/counterfactual-explained.ipynb | 14 +++++++-------
setup.py | 1 +
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/docs/guides/counterfactual-explained.ipynb b/docs/guides/counterfactual-explained.ipynb
index 1f2bcd99..7f1f65da 100644
--- a/docs/guides/counterfactual-explained.ipynb
+++ b/docs/guides/counterfactual-explained.ipynb
@@ -741,7 +741,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -5895,7 +5895,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -5907,7 +5907,7 @@
},
{
"data": {
- "image/png": "",
+ "image/png": "",
"text/plain": [
""
]
@@ -5962,7 +5962,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -6001,12 +6001,12 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
- "image/png": "",
+ "image/png": "",
"text/plain": [
""
]
@@ -6061,7 +6061,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
diff --git a/setup.py b/setup.py
index 6d4e13aa..fc86c68a 100644
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@
"dill",
"plotly",
"matplotlib>=3.8.2",
+ "seaborn",
],
extras_require={"test": TEST_REQUIRES, "dev": DEV_REQUIRES + TEST_REQUIRES},
python_requires=">=3.10",