classification n
@@ -10069,17 +10085,17 @@
Show the code
-suppressMessages (
-ggplot (ols_preds, aes (x = permits_count, y = ols_preds)) +
- geom_point (alpha = 0.2 ) +
- labs (title = "Predicted vs. Actual Permits: OLS" ,
- subtitle = "2022 Data" ,
- x = "Actual Permits" ,
- y = "Predicted Permits" ) +
- geom_abline () +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- theme_minimal ()
- )
+suppressMessages (
+ggplot (ols_preds, aes (x = permits_count, y = ols_preds)) +
+ geom_point (alpha = 0.2 ) +
+ labs (title = "Predicted vs. Actual Permits: OLS" ,
+ subtitle = "2022 Data" ,
+ x = "Actual Permits" ,
+ y = "Predicted Permits" ) +
+ geom_abline () +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ theme_minimal ()
+ )
@@ -10088,21 +10104,21 @@
Show the code
- ols_preds_map <- tmap_theme (tm_shape (ols_preds) +
- tm_polygons (col = "ols_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Predicted Permits: OLS" )
-
- ols_error_map <- tmap_theme (tm_shape (ols_preds) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Absolute Error: OLS" )
-
-tmap_arrange (ols_preds_map, ols_error_map)
+ ols_preds_map <- tmap_theme (tm_shape (ols_preds) +
+ tm_polygons (col = "ols_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Predicted Permits: OLS" )
+
+ ols_error_map <- tmap_theme (tm_shape (ols_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Absolute Error: OLS" )
+
+tmap_arrange (ols_preds_map, ols_error_map)
@@ -10115,17 +10131,17 @@
Show the code
-suppressMessages (
-ggplot (rf_test_preds, aes (x = permits_count, y = rf_test_preds)) +
- geom_point (alpha = 0.2 ) +
- labs (title = "Predicted vs. Actual Permits: RF" ,
- subtitle = "2022 Data" ,
- x = "Actual Permits" ,
- y = "Predicted Permits" ) +
- geom_abline () +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- theme_minimal ()
- )
+suppressMessages (
+ggplot (rf_test_preds, aes (x = permits_count, y = rf_test_preds)) +
+ geom_point (alpha = 0.2 ) +
+ labs (title = "Predicted vs. Actual Permits: RF" ,
+ subtitle = "2022 Data" ,
+ x = "Actual Permits" ,
+ y = "Predicted Permits" ) +
+ geom_abline () +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ theme_minimal ()
+ )
@@ -10134,21 +10150,21 @@
Show the code
- test_preds_map <- tmap_theme (tm_shape (rf_test_preds) +
- tm_polygons (col = "rf_test_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Predicted Permits: RF Test" )
-
- test_error_map <- tmap_theme (tm_shape (rf_test_preds) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Absolute Error: RF Test" )
-
-tmap_arrange (test_preds_map, test_error_map)
+ test_preds_map <- tmap_theme (tm_shape (rf_test_preds) +
+ tm_polygons (col = "rf_test_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Predicted Permits: RF Test" )
+
+ test_error_map <- tmap_theme (tm_shape (rf_test_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Absolute Error: RF Test" )
+
+tmap_arrange (test_preds_map, test_error_map)
@@ -10159,43 +10175,43 @@
Model Testing
-Model training, validation, and testing involved three steps. First, we partitioned our data into training, validation, and testing sets. We used data from 2013 through 2021 for initial model training. Next, we evaluated our models’ ability to accurately predict 2022 construction permits using our validation set, which consisted of all permits in 2022. We carried out additional feature engineering and model tuning, iterating based on the results of these training and testing splits. We sought to minimize both the mean absolute error (MAE) of our best model and the distribution of absolute error. Finally, when we were satisfied with the results of our best model, we evaluated it again by training it on all data from 2013 through 2022 and validating it on data from 2023 (all but the last two weeks, which we consider negligible for our purposes), which the model had never “seen” before. As Kuhn and Johnson write in Applied Predictive Modeling (2013) , “Ideally, the model should be evaluated on samples that were not used to build or fine-tune the model, so that they provide an unbiased sense of model effectiveness.”
+Model training, validation, and testing involved three steps. First, we partitioned our data into training, validation, and testing sets. We used data from 2013 through 2021 for initial model training. Next, we evaluated our models’ ability to accurately predict 2022 construction permits using our validation set, which consisted of all permits in 2022. We carried out additional feature engineering and model tuning, iterating based on the results of these training and testing splits. We sought to minimize both the mean absolute error (MAE) of our best model and the distribution of absolute error. Finally, when we were satisfied with the results of our best model, we evaluated it again by training it on all data from 2013 through 2022 and validating it on data from 2023 (all but the last two weeks, which we consider negligible for our purposes), which the model had never “seen” before. As Kuhn and Johnson write in Applied Predictive Modeling (2013) , “Ideally, the model should be evaluated on samples that were not used to build or fine-tune the model, so that they provide an unbiased sense of model effectiveness.” (Code for all of these steps is available on GitHub. )
Again, testing confirms the strength of our model; based on 2023 data, our random forest model produces a MAE of 2.19. We note again that the range of model error is relatively narrow. Generally, we see that where the model predicts there to be more permits, there is also higher error. This spatial trend is also seen in the distribution of absolute errors clustering in a handful of block groups with high permit counts.
Show the code
- val_preds_map <- tmap_theme (tm_shape (rf_val_preds) +
- tm_polygons (col = "rf_val_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Predicted Permits: RF Validate" )
-
- val_error_map <- tmap_theme (tm_shape (rf_val_preds) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Absolute Error: RF Validate" )
-
-tmap_arrange (val_preds_map, val_error_map)
+ val_preds_map <- tmap_theme (tm_shape (rf_val_preds) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Predicted Permits: RF Validate" )
+
+ val_error_map <- tmap_theme (tm_shape (rf_val_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Absolute Error: RF Validate" )
+
+tmap_arrange (val_preds_map, val_error_map)
Show the code
-suppressMessages (
-ggplot (rf_val_preds, aes (x = permits_count, y = rf_val_preds)) +
- geom_point (alpha = 0.2 ) +
- labs (title = "Predicted vs. Actual Permits: RF" ,
- subtitle = "2023 Data" ,
- x = "Actual Permits" ,
- y = "Predicted Permits" ) +
- geom_abline () +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- theme_minimal ()
- )
+suppressMessages (
+ggplot (rf_val_preds, aes (x = permits_count, y = rf_val_preds)) +
+ geom_point (alpha = 0.2 ) +
+ labs (title = "Predicted vs. Actual Permits: RF" ,
+ subtitle = "2023 Data" ,
+ x = "Actual Permits" ,
+ y = "Predicted Permits" ) +
+ geom_abline () +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ theme_minimal ()
+ )
@@ -10209,24 +10225,24 @@
Show the code
- rf_val_preds_long <- rf_val_preds %>%
- pivot_longer (cols = c (rent_burden, percent_nonwhite, total_pop, med_inc),
- names_to = "variable" , values_to = "value" ) %>%
- mutate (variable = case_when (
- variable == "med_inc" ~ "Median Income ($)" ,
- variable == "percent_nonwhite" ~ "Nonwhite (%)" ,
- variable == "rent_burden" ~ "Rent Burden (%)" ,
- TRUE ~ "Total Pop."
- ))
-
-ggplot (rf_val_preds_long, aes (x = value, y = abs_error)) +
- geom_point (alpha = 0.2 ) +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- facet_wrap (~ variable, scales = "free_x" ) +
- labs (title = "Generalizability of Absolute Error" ,
- x = "Value" ,
- y = "Absolute Error" ) +
- theme_minimal ()
+ rf_val_preds_long <- rf_val_preds %>%
+ pivot_longer (cols = c (rent_burden, percent_nonwhite, total_pop, med_inc),
+ names_to = "variable" , values_to = "value" ) %>%
+ mutate (variable = case_when (
+ variable == "med_inc" ~ "Median Income ($)" ,
+ variable == "percent_nonwhite" ~ "Nonwhite (%)" ,
+ variable == "rent_burden" ~ "Rent Burden (%)" ,
+ TRUE ~ "Total Pop."
+ ))
+
+ggplot (rf_val_preds_long, aes (x = value, y = abs_error)) +
+ geom_point (alpha = 0.2 ) +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ facet_wrap (~ variable, scales = "free_x" ) +
+ labs (title = "Generalizability of Absolute Error" ,
+ x = "Value" ,
+ y = "Absolute Error" ) +
+ theme_minimal ()
@@ -10236,14 +10252,14 @@
Show the code
-suppressMessages (
- ggplot (rf_val_preds, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
- geom_boxplot (fill = NA , color = palette[3 ], alpha = 0.7 ) +
- labs (title = "MAE by Council District" ,
- y = "Mean Absolute Error" ,
- x = "Council District" ) +
- theme_minimal ()
- )
+suppressMessages (
+ ggplot (rf_val_preds, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
+ geom_boxplot (fill = NA , color = palette[3 ], alpha = 0.7 ) +
+ labs (title = "MAE by Council District" ,
+ y = "Mean Absolute Error" ,
+ x = "Council District" ) +
+ theme_minimal ()
+ )
@@ -10256,29 +10272,29 @@
Show the code
- filtered_zoning <- zoning %>%
- filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
- CODE != "I2" ,
- ! str_detect (CODE, "SP" )) %>%
- st_join (., rf_val_preds %>% select (rf_val_preds))
-
-
- zoning_map <- tmap_theme (tm_shape (filtered_zoning) +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" , title = "Zoning Code" , palette = zoning_palette) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ,
- legend.height = 0.4 ),
- "Restrictive Zoning" )
-
- mismatch <- tmap_theme (tm_shape (filtered_zoning) +
- tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" , title = "Predicted New Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Development Pressure" )
-
-tmap_arrange (zoning_map, mismatch)
+ filtered_zoning <- zoning %>%
+ filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
+ CODE != "I2" ,
+ ! str_detect (CODE, "SP" )) %>%
+ st_join (., rf_val_preds %>% select (rf_val_preds))
+
+
+ zoning_map <- tmap_theme (tm_shape (filtered_zoning) +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" , title = "Zoning Code" , palette = zoning_palette) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ,
+ legend.height = 0.4 ),
+ "Restrictive Zoning" )
+
+ mismatch <- tmap_theme (tm_shape (filtered_zoning) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" , title = "Predicted New Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Development Pressure" )
+
+tmap_arrange (zoning_map, mismatch)
@@ -10288,70 +10304,70 @@
Show the code
-tmap_mode ('view' )
-
- filtered_zoning %>%
- filter (rf_val_preds > 10 ) %>%
-tm_shape () +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
- popup.vars = c ('rf_val_preds' , 'CODE' ), palette = zoning_palette) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('view' )
+
+ filtered_zoning %>%
+ filter (rf_val_preds > 10 ) %>%
+tm_shape () +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
+ popup.vars = c ('rf_val_preds' , 'CODE' ), palette = zoning_palette) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
Show the code
- nbs <- filtered_zoning %>%
- mutate (nb = st_contiguity (geometry))
-
-# Create edge list while handling cases with no neighbors
- edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
- unnest (nbs) %>%
- filter (nbs != 0 )
-
-# Create a graph with a node for each row in filtered_zoning
- g <- make_empty_graph (n = nrow (filtered_zoning))
-V (g)$ name <- as.character (1 : nrow (filtered_zoning))
-
-# Add edges if they exist
-if (nrow (edge_list) > 0 ) {
- edges <- as.matrix (edge_list)
- g <- add_edges (g, c (t (edges)))
- }
-
-# Calculate the number of contiguous neighbors and sum of contiguous areas
- n_contiguous <- numeric (nrow (filtered_zoning))
- sum_contig_area <- numeric (nrow (filtered_zoning))
-
-for (i in 1 : nrow (filtered_zoning)) {
- neighbors <- neighborhood (g, order = 1 , nodes = i)[[1 ]]
- # Exclude the node itself from its list of neighbors
- neighbors <- neighbors[neighbors != i]
- n_contiguous[i] <- length (neighbors)
- sum_contig_area[i] <- sum (filtered_zoning$ Shape__Area[neighbors], na.rm = TRUE )
- }
-
- contig_info <- data.frame (n_contig = unlist (n_contiguous), sum_contig_area = unlist (sum_contig_area))
- filtered_zoning <- cbind (filtered_zoning, contig_info)
-
-
- filtered_zoning %>%
- st_drop_geometry () %>%
- select (rf_val_preds,
- n_contig,
- sum_contig_area,
- CODE) %>%
- filter (rf_val_preds > 10 ,
- n_contig > 2 ) %>%
- arrange (desc (rf_val_preds)) %>%
- kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
+ nbs <- filtered_zoning %>%
+ mutate (nb = st_contiguity (geometry))
+
+# Create edge list while handling cases with no neighbors
+ edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
+ unnest (nbs) %>%
+ filter (nbs != 0 )
+
+# Create a graph with a node for each row in filtered_zoning
+ g <- make_empty_graph (n = nrow (filtered_zoning))
+V (g)$ name <- as.character (1 : nrow (filtered_zoning))
+
+# Add edges if they exist
+if (nrow (edge_list) > 0 ) {
+ edges <- as.matrix (edge_list)
+ g <- add_edges (g, c (t (edges)))
+ }
+
+# Calculate the number of contiguous neighbors and sum of contiguous areas
+ n_contiguous <- numeric (nrow (filtered_zoning))
+ sum_contig_area <- numeric (nrow (filtered_zoning))
+
+for (i in 1 : nrow (filtered_zoning)) {
+ neighbors <- neighborhood (g, order = 1 , nodes = i)[[1 ]]
+ # Exclude the node itself from its list of neighbors
+ neighbors <- neighbors[neighbors != i]
+ n_contiguous[i] <- length (neighbors)
+ sum_contig_area[i] <- sum (filtered_zoning$ Shape__Area[neighbors], na.rm = TRUE )
+ }
+
+ contig_info <- data.frame (n_contig = unlist (n_contiguous), sum_contig_area = unlist (sum_contig_area))
+ filtered_zoning <- cbind (filtered_zoning, contig_info)
+
+
+ filtered_zoning %>%
+ st_drop_geometry () %>%
+ select (rf_val_preds,
+ n_contig,
+ sum_contig_area,
+ CODE) %>%
+ filter (rf_val_preds > 10 ,
+ n_contig > 2 ) %>%
+ arrange (desc (rf_val_preds)) %>%
+ kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
@@ -10464,16 +10480,16 @@
Show the code
-tmap_mode ('plot' )
-
- preds24 <- tmap_theme (tm_shape (rf_proj_preds) +
- tm_polygons (col = "rf_proj_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Predicted New Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Projected New Development, 2024" )
-
-tmap_arrange (preds24, med_inc)
+tmap_mode ('plot' )
+
+ preds24 <- tmap_theme (tm_shape (rf_proj_preds) +
+ tm_polygons (col = "rf_proj_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Predicted New Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Projected New Development, 2024" )
+
+tmap_arrange (preds24, med_inc)
@@ -10482,7 +10498,7 @@
Web Application
-Here is a preview of the SmartZoning web application. The UX offers key features that leverages this study’s modeling and mapping.
+Below is a wireframe preview of the SmartZoning web application; a prototype built in Shiny is available here . The UX offers key features that leverages this study’s modeling and mapping.
Key Features:
Interactive parcel level map that makes key information about parcel easily accessible
diff --git a/index.html b/index.html
index 4312099..d67defb 100644
--- a/index.html
+++ b/index.html
@@ -9336,28 +9336,38 @@
suppressMessages (
tmap_animation (tm, "assets/permits_animation.gif" , delay = 50 )
- )
+ )
+
+ bar_graph <- ggplot (building_permits %>% filter (! year %in% c (2024 )), aes (x = as.factor (year))) +
+ geom_bar (fill = palette[1 ], color = NA , alpha = 0.7 ) +
+ labs (title = "Permits per Year" ,
+ y = "Count" ) +
+ theme_minimal () +
+ theme (axis.title.x = element_blank (),
+ aspect.ratio = .75 )
+
+# Ensure the 'assets' directory exists
+if (! dir.exists ("assets" )) {
+ dir.create ("assets" )
+ }
+
+# Save the plot
+ggsave (bar_graph, filename = "assets/permits_per_year.png" )
+
+
+
-
-
-Show the code
-ggplot (building_permits %>% filter (! year %in% c (2024 )), aes (x = as.factor (year))) +
- geom_bar (fill = palette[1 ], color = NA , alpha = 0.7 ) +
- labs (title = "Permits per Year" ,
- y = "Count" ) +
- theme_minimal () +
- theme (axis.title.x = element_blank (),
- aspect.ratio = .75 )
-
-
-
+
+
+
+
We note a significant uptick in new construction permits as we approach 2021, followed by a sharp decline. It is generally acknowledged that this trend was due to the expiration of a tax abatement program for developers.
@@ -9365,36 +9375,36 @@
Show the code
- perms_x_dist <- st_join (building_permits, council_dists)
-
- perms_x_dist_sum <- perms_x_dist %>%
- st_drop_geometry () %>%
- group_by (DISTRICT, year) %>%
- summarize (permits_count = n ())
-
- perms_x_dist_mean = perms_x_dist_sum %>%
- group_by (year) %>%
- summarize (permits_count = mean (permits_count)) %>%
- mutate (DISTRICT = "Average" )
-
- perms_x_dist_sum <- bind_rows (perms_x_dist_sum, perms_x_dist_mean) %>%
- mutate (color = ifelse (DISTRICT != "Average" , 0 , 1 ))
-
-ggplotly (
-ggplot (perms_x_dist_sum %>% filter (year > 2013 , year < 2024 ), aes (x = year, y = permits_count, color = as.character (color), group = interaction (DISTRICT, color))) +
- geom_line (lwd = 0.7 ) +
- labs (title = "Permits per Year by Council District" ,
- y = "Total Permits" ) +
- # facet_wrap(~DISTRICT) +
- theme_minimal () +
- theme (axis.title.x = element_blank (),
- legend.position = "none" ) +
- scale_color_manual (values = c (palette[5 ], palette[1 ]))
- )
+ perms_x_dist <- st_join (building_permits, council_dists)
+
+ perms_x_dist_sum <- perms_x_dist %>%
+ st_drop_geometry () %>%
+ group_by (DISTRICT, year) %>%
+ summarize (permits_count = n ())
+
+ perms_x_dist_mean = perms_x_dist_sum %>%
+ group_by (year) %>%
+ summarize (permits_count = mean (permits_count)) %>%
+ mutate (DISTRICT = "Average" )
+
+ perms_x_dist_sum <- bind_rows (perms_x_dist_sum, perms_x_dist_mean) %>%
+ mutate (color = ifelse (DISTRICT != "Average" , 0 , 1 ))
+
+ggplotly (
+ggplot (perms_x_dist_sum %>% filter (year > 2013 , year < 2024 ), aes (x = year, y = permits_count, color = as.character (color), group = interaction (DISTRICT, color))) +
+ geom_line (lwd = 0.7 ) +
+ labs (title = "Permits per Year by Council District" ,
+ y = "Total Permits" ) +
+ # facet_wrap(~DISTRICT) +
+ theme_minimal () +
+ theme (axis.title.x = element_blank (),
+ legend.position = "none" ) +
+ scale_color_manual (values = c (palette[5 ], palette[1 ]))
+ )
@@ -9404,26 +9414,26 @@
Show the code
- permits_bg_long <- permits_bg %>%
- filter (! year %in% c (2024 )) %>%
- st_drop_geometry () %>%
- pivot_longer (
- cols = c (starts_with ("lag" )),
- names_to = "Variable" ,
- values_to = "Value"
- )
-
-
-ggscatter (permits_bg_long, x = "permits_count" , y = "Value" , facet.by = "Variable" ,
- add = "reg.line" ,
- add.params = list (color = palette[3 ]),
- conf.int = TRUE , alpha = 0.2
- ) +
- stat_cor (method = "pearson" , p.accuracy = 0.001 , r.accuracy = 0.01 , size = 3 ) +
- labs (title = "Correlation of `permits_count` and Engineered Features" ,
- x = "Value" ,
- y = "Permits Count" ) +
- theme_minimal ()
+ permits_bg_long <- permits_bg %>%
+ filter (! year %in% c (2024 )) %>%
+ st_drop_geometry () %>%
+ pivot_longer (
+ cols = c (starts_with ("lag" )),
+ names_to = "Variable" ,
+ values_to = "Value"
+ )
+
+
+ggscatter (permits_bg_long, x = "permits_count" , y = "Value" , facet.by = "Variable" ,
+ add = "reg.line" ,
+ add.params = list (color = palette[3 ]),
+ conf.int = TRUE , alpha = 0.2
+ ) +
+ stat_cor (method = "pearson" , p.accuracy = 0.001 , r.accuracy = 0.01 , size = 3 ) +
+ labs (title = "Correlation of `permits_count` and Engineered Features" ,
+ x = "Value" ,
+ y = "Permits Count" ) +
+ theme_minimal ()
@@ -9437,28 +9447,28 @@
<
Show the code
- med_inc <- tmap_theme (tm_shape (permits_bg %>% filter (year == 2022 )) +
- tm_polygons (col = "med_inc" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Med. Inc. ($)" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "darkgrey" ) +
- tm_layout (frame = FALSE ),
- "Median Income" )
-
- race <- tmap_theme (tm_shape (permits_bg %>% filter (year == 2022 )) +
- tm_polygons (col = "percent_nonwhite" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Nonwhite (%)" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "darkgrey" ) +
- tm_layout (frame = FALSE ),
- "Race" )
-
- rent_burd <- tmap_theme (tm_shape (permits_bg %>% filter (year == 2022 )) +
- tm_polygons (col = "ext_rent_burden" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Rent Burden (%)" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "darkgrey" ) +
- tm_layout (frame = FALSE ),
- "Extreme Rent Burden" )
-
-tmap_arrange (med_inc, race, rent_burd)
+ med_inc <- tmap_theme (tm_shape (permits_bg %>% filter (year == 2022 )) +
+ tm_polygons (col = "med_inc" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Med. Inc. ($)" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "darkgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Median Income" )
+
+ race <- tmap_theme (tm_shape (permits_bg %>% filter (year == 2022 )) +
+ tm_polygons (col = "percent_nonwhite" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Nonwhite (%)" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "darkgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Race" )
+
+ rent_burd <- tmap_theme (tm_shape (permits_bg %>% filter (year == 2022 )) +
+ tm_polygons (col = "ext_rent_burden" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Rent Burden (%)" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "darkgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Extreme Rent Burden" )
+
+tmap_arrange (med_inc, race, rent_burd)
@@ -9480,25 +9490,25 @@
Show the code
- corr_vars <- c ("total_pop" ,
- "med_inc" ,
- "percent_nonwhite" ,
- "percent_renters" ,
- "rent_burden" ,
- "ext_rent_burden" )
-
- corr_dat <- permits_bg %>% select (all_of (corr_vars), permits_count) %>% select (where (is.numeric)) %>% st_drop_geometry () %>% unique () %>% na.omit ()
-
- corr <- round (cor (corr_dat), 2 )
- p.mat <- cor_pmat (corr_dat)
-
-ggcorrplot (corr, p.mat = p.mat, hc.order = FALSE ,
- type = "full" , insig = "blank" , lab = TRUE , colors = c (palette[2 ], "white" , palette[3 ])) +
- annotate (
- geom = "rect" ,
- xmin = .5 , xmax = 7.5 , ymin = 6.5 , ymax = 7.5 ,
- fill = "transparent" , color = "red" , alpha = 0.5
- )
+ corr_vars <- c ("total_pop" ,
+ "med_inc" ,
+ "percent_nonwhite" ,
+ "percent_renters" ,
+ "rent_burden" ,
+ "ext_rent_burden" )
+
+ corr_dat <- permits_bg %>% select (all_of (corr_vars), permits_count) %>% select (where (is.numeric)) %>% st_drop_geometry () %>% unique () %>% na.omit ()
+
+ corr <- round (cor (corr_dat), 2 )
+ p.mat <- cor_pmat (corr_dat)
+
+ggcorrplot (corr, p.mat = p.mat, hc.order = FALSE ,
+ type = "full" , insig = "blank" , lab = TRUE , colors = c (palette[2 ], "white" , palette[3 ])) +
+ annotate (
+ geom = "rect" ,
+ xmin = .5 , xmax = 7.5 , ymin = 6.5 , ymax = 7.5 ,
+ fill = "transparent" , color = "red" , alpha = 0.5
+ )
@@ -9511,13 +9521,13 @@
Show the code
- ols <- lm (permits_count ~ ., data = permits_bg %>% filter (year < 2024 ) %>% select (- c (mapname, geoid10, year)) %>% st_drop_geometry ())
-vif (ols) %>%
- data.frame () %>%
- clean_names () %>%
- select (- df) %>%
- arrange (desc (gvif)) %>%
- kablerize ()
+ ols <- lm (permits_count ~ ., data = permits_bg %>% filter (year < 2024 ) %>% select (- c (mapname, geoid10, year)) %>% st_drop_geometry ())
+vif (ols) %>%
+ data.frame () %>%
+ clean_names () %>%
+ select (- df) %>%
+ arrange (desc (gvif)) %>%
+ kablerize ()
@@ -9967,15 +9977,15 @@
Show the code
-ggplot (permits_bg %>% st_drop_geometry %>% filter (! year %in% c (2024 )), aes (x = permits_count)) +
- geom_histogram (fill = palette[1 ], color = NA , alpha = 0.7 ) +
- labs (title = "Permits per Block Group per Year" ,
- subtitle = "Log-Transformed" ,
- y = "Count" ) +
- scale_x_log10 () +
- facet_wrap (~ year) +
- theme_minimal () +
- theme (axis.title.x = element_blank ())
+ggplot (permits_bg %>% st_drop_geometry %>% filter (! year %in% c (2024 )), aes (x = permits_count)) +
+ geom_histogram (fill = palette[1 ], color = NA , alpha = 0.7 ) +
+ labs (title = "Permits per Block Group per Year" ,
+ subtitle = "Log-Transformed" ,
+ y = "Count" ) +
+ scale_x_log10 () +
+ facet_wrap (~ year) +
+ theme_minimal () +
+ theme (axis.title.x = element_blank ())
@@ -9989,69 +9999,75 @@
Show the code
- lisa <- permits_bg %>%
- filter (year == 2023 ) %>%
- mutate (nb = st_contiguity (geometry),
- wt = st_weights (nb),
- permits_lag = st_lag (permits_count, nb, wt),
- moran = local_moran (permits_count, nb, wt)) %>%
- tidyr:: unnest (moran) %>%
- mutate (pysal = ifelse (p_folded_sim <= 0.1 , as.character (pysal), NA ),
- hotspot = case_when (
- pysal == "High-High" ~ "Yes" ,
- TRUE ~ "No"
- ))
-
-#
-# palette <- c("High-High" = "#B20016",
-# "Low-Low" = "#1C4769",
-# "Low-High" = "#24975E",
-# "High-Low" = "#EACA97")
-
- morans_i <- tmap_theme (tm_shape (lisa) +
- tm_polygons (col = "ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green, title = "Moran's I" ),
- "Local Moran's I (2023)" )
-
- p_value <- tmap_theme (tm_shape (lisa) +
- tm_polygons (col = "p_ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green, title = "P-Value" ),
- "Moran's I P-Value (2023)" )
-
- sig_hotspots <- tmap_theme (tm_shape (lisa) +
- tm_polygons (col = "hotspot" , border.alpha = 0 , style = "cat" , palette = c (mono_5_green[1 ], mono_5_green[5 ]), textNA = "Not a Hotspot" , title = "Hotspot?" ),
- "Construction Hotspots (2023)" )
-
-tmap_arrange (morans_i, p_value, sig_hotspots, ncol = 3 )
+ lisa <- permits_bg %>%
+ filter (year == 2023 ) %>%
+ mutate (nb = st_contiguity (geometry),
+ wt = st_weights (nb),
+ permits_lag = st_lag (permits_count, nb, wt),
+ moran = local_moran (permits_count, nb, wt)) %>%
+ tidyr:: unnest (moran) %>%
+ mutate (pysal = ifelse (p_folded_sim <= 0.1 , as.character (pysal), NA ),
+ hotspot = case_when (
+ pysal == "High-High" ~ "Yes" ,
+ TRUE ~ "No"
+ ))
+
+#
+# palette <- c("High-High" = "#B20016",
+# "Low-Low" = "#1C4769",
+# "Low-High" = "#24975E",
+# "High-Low" = "#EACA97")
+
+ morans_i <- tmap_theme (tm_shape (lisa) +
+ tm_polygons (col = "ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green, title = "Moran's I" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "darkgrey" ),
+ "Local Moran's I (2023)" )
+
+ p_value <- tmap_theme (tm_shape (lisa) +
+ tm_polygons (col = "p_ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green, title = "P-Value" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "darkgrey" ),
+ "Moran's I P-Value (2023)" )
+
+ sig_hotspots <- tmap_theme (tm_shape (lisa) +
+ tm_polygons (col = "hotspot" , border.alpha = 0 , style = "cat" , palette = c (mono_5_green[1 ], mono_5_green[5 ]), textNA = "Not a Hotspot" , title = "Hotspot?" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "darkgrey" ),
+ "Construction Hotspots (2023)" )
+
+tmap_arrange (morans_i, p_value, sig_hotspots, ncol = 3 )
-
+
Emergeging hotspots…? If I can get it to work.
Show the code
-# Prepare the data
- permits_data <- permits_bg %>%
- filter (year < 2024 ,
- year > 2012 ) %>%
- select (permits_count, geoid10, year) %>%
- na.omit ()
-
-# Create spacetime object
- stc <- as_spacetime (permits_data,
- .loc_col = "geoid10" ,
- .time_col = "year" )
-
-# Run emerging hotspot analysis
- ehsa <- emerging_hotspot_analysis (
- x = stc,
- .var = "permits_count" ,
- k = 1 ,
- nsim = 25
- )
-
-# Analyze the result
-count (ehsa, classification)
+# Prepare the data
+ permits_data <- permits_bg %>%
+ filter (year < 2024 ,
+ year > 2012 ) %>%
+ select (permits_count, geoid10, year) %>%
+ na.omit ()
+
+# Create spacetime object
+ stc <- as_spacetime (permits_data,
+ .loc_col = "geoid10" ,
+ .time_col = "year" )
+
+# Run emerging hotspot analysis
+ ehsa <- emerging_hotspot_analysis (
+ x = stc,
+ .var = "permits_count" ,
+ k = 1 ,
+ nsim = 25
+ )
+
+# Analyze the result
+count (ehsa, classification)
classification n
@@ -10069,17 +10085,17 @@
Show the code
-suppressMessages (
-ggplot (ols_preds, aes (x = permits_count, y = ols_preds)) +
- geom_point (alpha = 0.2 ) +
- labs (title = "Predicted vs. Actual Permits: OLS" ,
- subtitle = "2022 Data" ,
- x = "Actual Permits" ,
- y = "Predicted Permits" ) +
- geom_abline () +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- theme_minimal ()
- )
+suppressMessages (
+ggplot (ols_preds, aes (x = permits_count, y = ols_preds)) +
+ geom_point (alpha = 0.2 ) +
+ labs (title = "Predicted vs. Actual Permits: OLS" ,
+ subtitle = "2022 Data" ,
+ x = "Actual Permits" ,
+ y = "Predicted Permits" ) +
+ geom_abline () +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ theme_minimal ()
+ )
@@ -10088,21 +10104,21 @@
Show the code
- ols_preds_map <- tmap_theme (tm_shape (ols_preds) +
- tm_polygons (col = "ols_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Predicted Permits: OLS" )
-
- ols_error_map <- tmap_theme (tm_shape (ols_preds) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Absolute Error: OLS" )
-
-tmap_arrange (ols_preds_map, ols_error_map)
+ ols_preds_map <- tmap_theme (tm_shape (ols_preds) +
+ tm_polygons (col = "ols_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Predicted Permits: OLS" )
+
+ ols_error_map <- tmap_theme (tm_shape (ols_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Absolute Error: OLS" )
+
+tmap_arrange (ols_preds_map, ols_error_map)
@@ -10115,17 +10131,17 @@
Show the code
-suppressMessages (
-ggplot (rf_test_preds, aes (x = permits_count, y = rf_test_preds)) +
- geom_point (alpha = 0.2 ) +
- labs (title = "Predicted vs. Actual Permits: RF" ,
- subtitle = "2022 Data" ,
- x = "Actual Permits" ,
- y = "Predicted Permits" ) +
- geom_abline () +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- theme_minimal ()
- )
+suppressMessages (
+ggplot (rf_test_preds, aes (x = permits_count, y = rf_test_preds)) +
+ geom_point (alpha = 0.2 ) +
+ labs (title = "Predicted vs. Actual Permits: RF" ,
+ subtitle = "2022 Data" ,
+ x = "Actual Permits" ,
+ y = "Predicted Permits" ) +
+ geom_abline () +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ theme_minimal ()
+ )
@@ -10134,21 +10150,21 @@
Show the code
- test_preds_map <- tmap_theme (tm_shape (rf_test_preds) +
- tm_polygons (col = "rf_test_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Predicted Permits: RF Test" )
-
- test_error_map <- tmap_theme (tm_shape (rf_test_preds) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Absolute Error: RF Test" )
-
-tmap_arrange (test_preds_map, test_error_map)
+ test_preds_map <- tmap_theme (tm_shape (rf_test_preds) +
+ tm_polygons (col = "rf_test_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Predicted Permits: RF Test" )
+
+ test_error_map <- tmap_theme (tm_shape (rf_test_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Absolute Error: RF Test" )
+
+tmap_arrange (test_preds_map, test_error_map)
@@ -10159,43 +10175,43 @@
Model Testing
-Model training, validation, and testing involved three steps. First, we partitioned our data into training, validation, and testing sets. We used data from 2013 through 2021 for initial model training. Next, we evaluated our models’ ability to accurately predict 2022 construction permits using our validation set, which consisted of all permits in 2022. We carried out additional feature engineering and model tuning, iterating based on the results of these training and testing splits. We sought to minimize both the mean absolute error (MAE) of our best model and the distribution of absolute error. Finally, when we were satisfied with the results of our best model, we evaluated it again by training it on all data from 2013 through 2022 and validating it on data from 2023 (all but the last two weeks, which we consider negligible for our purposes), which the model had never “seen” before. As Kuhn and Johnson write in Applied Predictive Modeling (2013) , “Ideally, the model should be evaluated on samples that were not used to build or fine-tune the model, so that they provide an unbiased sense of model effectiveness.”
+Model training, validation, and testing involved three steps. First, we partitioned our data into training, validation, and testing sets. We used data from 2013 through 2021 for initial model training. Next, we evaluated our models’ ability to accurately predict 2022 construction permits using our validation set, which consisted of all permits in 2022. We carried out additional feature engineering and model tuning, iterating based on the results of these training and testing splits. We sought to minimize both the mean absolute error (MAE) of our best model and the distribution of absolute error. Finally, when we were satisfied with the results of our best model, we evaluated it again by training it on all data from 2013 through 2022 and validating it on data from 2023 (all but the last two weeks, which we consider negligible for our purposes), which the model had never “seen” before. As Kuhn and Johnson write in Applied Predictive Modeling (2013) , “Ideally, the model should be evaluated on samples that were not used to build or fine-tune the model, so that they provide an unbiased sense of model effectiveness.” (Code for all of these steps is available on GitHub. )
Again, testing confirms the strength of our model; based on 2023 data, our random forest model produces a MAE of 2.19. We note again that the range of model error is relatively narrow. Generally, we see that where the model predicts there to be more permits, there is also higher error. This spatial trend is also seen in the distribution of absolute errors clustering in a handful of block groups with high permit counts.
Show the code
- val_preds_map <- tmap_theme (tm_shape (rf_val_preds) +
- tm_polygons (col = "rf_val_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Predicted Permits: RF Validate" )
-
- val_error_map <- tmap_theme (tm_shape (rf_val_preds) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Absolute Error: RF Validate" )
-
-tmap_arrange (val_preds_map, val_error_map)
+ val_preds_map <- tmap_theme (tm_shape (rf_val_preds) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Predicted Permits: RF Validate" )
+
+ val_error_map <- tmap_theme (tm_shape (rf_val_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" , title = "Absolute Error" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Absolute Error: RF Validate" )
+
+tmap_arrange (val_preds_map, val_error_map)
Show the code
-suppressMessages (
-ggplot (rf_val_preds, aes (x = permits_count, y = rf_val_preds)) +
- geom_point (alpha = 0.2 ) +
- labs (title = "Predicted vs. Actual Permits: RF" ,
- subtitle = "2023 Data" ,
- x = "Actual Permits" ,
- y = "Predicted Permits" ) +
- geom_abline () +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- theme_minimal ()
- )
+suppressMessages (
+ggplot (rf_val_preds, aes (x = permits_count, y = rf_val_preds)) +
+ geom_point (alpha = 0.2 ) +
+ labs (title = "Predicted vs. Actual Permits: RF" ,
+ subtitle = "2023 Data" ,
+ x = "Actual Permits" ,
+ y = "Predicted Permits" ) +
+ geom_abline () +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ theme_minimal ()
+ )
@@ -10209,24 +10225,24 @@
Show the code
- rf_val_preds_long <- rf_val_preds %>%
- pivot_longer (cols = c (rent_burden, percent_nonwhite, total_pop, med_inc),
- names_to = "variable" , values_to = "value" ) %>%
- mutate (variable = case_when (
- variable == "med_inc" ~ "Median Income ($)" ,
- variable == "percent_nonwhite" ~ "Nonwhite (%)" ,
- variable == "rent_burden" ~ "Rent Burden (%)" ,
- TRUE ~ "Total Pop."
- ))
-
-ggplot (rf_val_preds_long, aes (x = value, y = abs_error)) +
- geom_point (alpha = 0.2 ) +
- geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
- facet_wrap (~ variable, scales = "free_x" ) +
- labs (title = "Generalizability of Absolute Error" ,
- x = "Value" ,
- y = "Absolute Error" ) +
- theme_minimal ()
+ rf_val_preds_long <- rf_val_preds %>%
+ pivot_longer (cols = c (rent_burden, percent_nonwhite, total_pop, med_inc),
+ names_to = "variable" , values_to = "value" ) %>%
+ mutate (variable = case_when (
+ variable == "med_inc" ~ "Median Income ($)" ,
+ variable == "percent_nonwhite" ~ "Nonwhite (%)" ,
+ variable == "rent_burden" ~ "Rent Burden (%)" ,
+ TRUE ~ "Total Pop."
+ ))
+
+ggplot (rf_val_preds_long, aes (x = value, y = abs_error)) +
+ geom_point (alpha = 0.2 ) +
+ geom_smooth (method = "lm" , se = FALSE , color = palette[3 ]) +
+ facet_wrap (~ variable, scales = "free_x" ) +
+ labs (title = "Generalizability of Absolute Error" ,
+ x = "Value" ,
+ y = "Absolute Error" ) +
+ theme_minimal ()
@@ -10236,14 +10252,14 @@
Show the code
-suppressMessages (
- ggplot (rf_val_preds, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
- geom_boxplot (fill = NA , color = palette[3 ], alpha = 0.7 ) +
- labs (title = "MAE by Council District" ,
- y = "Mean Absolute Error" ,
- x = "Council District" ) +
- theme_minimal ()
- )
+suppressMessages (
+ ggplot (rf_val_preds, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
+ geom_boxplot (fill = NA , color = palette[3 ], alpha = 0.7 ) +
+ labs (title = "MAE by Council District" ,
+ y = "Mean Absolute Error" ,
+ x = "Council District" ) +
+ theme_minimal ()
+ )
@@ -10256,29 +10272,29 @@
Show the code
- filtered_zoning <- zoning %>%
- filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
- CODE != "I2" ,
- ! str_detect (CODE, "SP" )) %>%
- st_join (., rf_val_preds %>% select (rf_val_preds))
-
-
- zoning_map <- tmap_theme (tm_shape (filtered_zoning) +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" , title = "Zoning Code" , palette = zoning_palette) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ,
- legend.height = 0.4 ),
- "Restrictive Zoning" )
-
- mismatch <- tmap_theme (tm_shape (filtered_zoning) +
- tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" , title = "Predicted New Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Development Pressure" )
-
-tmap_arrange (zoning_map, mismatch)
+ filtered_zoning <- zoning %>%
+ filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
+ CODE != "I2" ,
+ ! str_detect (CODE, "SP" )) %>%
+ st_join (., rf_val_preds %>% select (rf_val_preds))
+
+
+ zoning_map <- tmap_theme (tm_shape (filtered_zoning) +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" , title = "Zoning Code" , palette = zoning_palette) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ,
+ legend.height = 0.4 ),
+ "Restrictive Zoning" )
+
+ mismatch <- tmap_theme (tm_shape (filtered_zoning) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" , title = "Predicted New Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Development Pressure" )
+
+tmap_arrange (zoning_map, mismatch)
@@ -10288,70 +10304,70 @@
Show the code
-tmap_mode ('view' )
-
- filtered_zoning %>%
- filter (rf_val_preds > 10 ) %>%
-tm_shape () +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
- popup.vars = c ('rf_val_preds' , 'CODE' ), palette = zoning_palette) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('view' )
+
+ filtered_zoning %>%
+ filter (rf_val_preds > 10 ) %>%
+tm_shape () +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
+ popup.vars = c ('rf_val_preds' , 'CODE' ), palette = zoning_palette) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
Show the code
- nbs <- filtered_zoning %>%
- mutate (nb = st_contiguity (geometry))
-
-# Create edge list while handling cases with no neighbors
- edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
- unnest (nbs) %>%
- filter (nbs != 0 )
-
-# Create a graph with a node for each row in filtered_zoning
- g <- make_empty_graph (n = nrow (filtered_zoning))
-V (g)$ name <- as.character (1 : nrow (filtered_zoning))
-
-# Add edges if they exist
-if (nrow (edge_list) > 0 ) {
- edges <- as.matrix (edge_list)
- g <- add_edges (g, c (t (edges)))
- }
-
-# Calculate the number of contiguous neighbors and sum of contiguous areas
- n_contiguous <- numeric (nrow (filtered_zoning))
- sum_contig_area <- numeric (nrow (filtered_zoning))
-
-for (i in 1 : nrow (filtered_zoning)) {
- neighbors <- neighborhood (g, order = 1 , nodes = i)[[1 ]]
- # Exclude the node itself from its list of neighbors
- neighbors <- neighbors[neighbors != i]
- n_contiguous[i] <- length (neighbors)
- sum_contig_area[i] <- sum (filtered_zoning$ Shape__Area[neighbors], na.rm = TRUE )
- }
-
- contig_info <- data.frame (n_contig = unlist (n_contiguous), sum_contig_area = unlist (sum_contig_area))
- filtered_zoning <- cbind (filtered_zoning, contig_info)
-
-
- filtered_zoning %>%
- st_drop_geometry () %>%
- select (rf_val_preds,
- n_contig,
- sum_contig_area,
- CODE) %>%
- filter (rf_val_preds > 10 ,
- n_contig > 2 ) %>%
- arrange (desc (rf_val_preds)) %>%
- kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
+ nbs <- filtered_zoning %>%
+ mutate (nb = st_contiguity (geometry))
+
+# Create edge list while handling cases with no neighbors
+ edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
+ unnest (nbs) %>%
+ filter (nbs != 0 )
+
+# Create a graph with a node for each row in filtered_zoning
+ g <- make_empty_graph (n = nrow (filtered_zoning))
+V (g)$ name <- as.character (1 : nrow (filtered_zoning))
+
+# Add edges if they exist
+if (nrow (edge_list) > 0 ) {
+ edges <- as.matrix (edge_list)
+ g <- add_edges (g, c (t (edges)))
+ }
+
+# Calculate the number of contiguous neighbors and sum of contiguous areas
+ n_contiguous <- numeric (nrow (filtered_zoning))
+ sum_contig_area <- numeric (nrow (filtered_zoning))
+
+for (i in 1 : nrow (filtered_zoning)) {
+ neighbors <- neighborhood (g, order = 1 , nodes = i)[[1 ]]
+ # Exclude the node itself from its list of neighbors
+ neighbors <- neighbors[neighbors != i]
+ n_contiguous[i] <- length (neighbors)
+ sum_contig_area[i] <- sum (filtered_zoning$ Shape__Area[neighbors], na.rm = TRUE )
+ }
+
+ contig_info <- data.frame (n_contig = unlist (n_contiguous), sum_contig_area = unlist (sum_contig_area))
+ filtered_zoning <- cbind (filtered_zoning, contig_info)
+
+
+ filtered_zoning %>%
+ st_drop_geometry () %>%
+ select (rf_val_preds,
+ n_contig,
+ sum_contig_area,
+ CODE) %>%
+ filter (rf_val_preds > 10 ,
+ n_contig > 2 ) %>%
+ arrange (desc (rf_val_preds)) %>%
+ kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
@@ -10464,16 +10480,16 @@
Show the code
-tmap_mode ('plot' )
-
- preds24 <- tmap_theme (tm_shape (rf_proj_preds) +
- tm_polygons (col = "rf_proj_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Predicted New Permits" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE ),
- "Projected New Development, 2024" )
-
-tmap_arrange (preds24, med_inc)
+tmap_mode ('plot' )
+
+ preds24 <- tmap_theme (tm_shape (rf_proj_preds) +
+ tm_polygons (col = "rf_proj_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" , title = "Predicted New Permits" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE ),
+ "Projected New Development, 2024" )
+
+tmap_arrange (preds24, med_inc)
@@ -10482,7 +10498,7 @@
Web Application
-Here is a preview of the SmartZoning web application. The UX offers key features that leverages this study’s modeling and mapping.
+Below is a wireframe preview of the SmartZoning web application; a prototype built in Shiny is available here . The UX offers key features that leverages this study’s modeling and mapping.
Key Features:
Interactive parcel level map that makes key information about parcel easily accessible