From 8328e9c8db0d7015c751bb01594f03f4550bee89 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 8 Jan 2025 18:15:31 +0000 Subject: [PATCH 01/11] Add variable names and reformat README data dictionary for better readability --- README.Rmd | 17 ++--- README.md | 204 ++++++++++++++++++++++++++--------------------------- 2 files changed, 111 insertions(+), 110 deletions(-) diff --git a/README.Rmd b/README.Rmd index 8ba2a2e4..def44ec8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -316,18 +316,20 @@ param_notes <- param_tbl$value %>% )) %>% unlist() -ccao::vars_dict %>% - inner_join( - param_tbl %>% mutate(description = param_notes), - by = c("var_name_model" = "value") +param_tbl %>% + mutate(description = param_notes) %>% + left_join( + ccao::vars_dict, + by = c("value" = "var_name_model") ) %>% group_by(var_name_pretty) %>% mutate(row = paste0("X", row_number())) %>% distinct( `Feature Name` = var_name_pretty, + `Variable Name` = value, + Description = description, Category = var_type, Type = var_data_type, - Notes = description, var_value, row ) %>% mutate(Category = recode( @@ -337,14 +339,13 @@ ccao::vars_dict %>% meta = "Meta", other = "Other", ccao = "Other" )) %>% pivot_wider( - id_cols = `Feature Name`:`Notes`, + id_cols = `Feature Name`:`Category`, names_from = row, values_from = var_value ) %>% unite("Possible Values", starts_with("X"), sep = ", ", na.rm = TRUE) %>% - mutate(Notes = replace_na(Notes, "")) %>% + mutate(Description = replace_na(Description, "")) %>% arrange(Category) %>% - relocate(Notes, .after = everything()) %>% knitr::kable(format = "markdown") ``` diff --git a/README.md b/README.md index 351267f2..3554ab6c 100644 --- a/README.md +++ b/README.md @@ -368,106 +368,106 @@ districts](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/- and many others. The features in the table below are the ones that made the cut. They’re the right combination of easy to understand and impute, powerfully predictive, and well-behaved. Most of them are in use in the -model as of 2024-04-12. - -| Feature Name | Category | Type | Possible Values | Notes | -|:------------------------------------------------------------------------|:---------------|:------------|:---------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------| -| Percent Population Age, Under 19 Years Old | ACS5 | numeric | | Percent of the people 17 years or younger | -| Percent Population Age, Over 65 Years Old | ACS5 | numeric | | Percent of the people 65 years or older | -| Median Population Age | ACS5 | numeric | | Median age for whole population | -| Percent Population Mobility, In Same House 1 Year Ago | ACS5 | numeric | | Percent of people (older than 1 year) who have not moved in the past 12 months | -| Percent Population Mobility, Moved From Other State in Past Year | ACS5 | numeric | | Percent of people (older than 1 year) who moved from another state in the past 12 months | -| Percent Households Family, Married | ACS5 | numeric | | Percent of households that are family, married | -| Percent Households Nonfamily, Living Alone | ACS5 | numeric | | Percent of households that are non-family, alone (single) | -| Percent Population Education, High School Degree | ACS5 | numeric | | Percent of people older than 25 who attained a high school degree | -| Percent Population Education, Bachelor Degree | ACS5 | numeric | | Percent of people older than 25 who attained a bachelor’s degree | -| Percent Population Education, Graduate Degree | ACS5 | numeric | | Percent of people older than 25 who attained a graduate degree | -| Percent Population Income, Below Poverty Level | ACS5 | numeric | | Percent of people above the poverty level in the last 12 months | -| Median Income, Household in Past Year | ACS5 | numeric | | Median income per household in the past 12 months | -| Median Income, Per Capita in Past Year | ACS5 | numeric | | Median income per capita in the past 12 months | -| Percent Population Income, Received SNAP in Past Year | ACS5 | numeric | | Percent of households that received SNAP in the past 12 months | -| Percent Population Employment, Unemployed | ACS5 | numeric | | Percent of people 16 years and older unemployed | -| Median Occupied Household, Total, Year Built | ACS5 | numeric | | Median year built for all occupied households | -| Median Occupied Household, Renter, Gross Rent | ACS5 | numeric | | Median gross rent for only renter-occupied units | -| Percent Occupied Households, Owner | ACS5 | numeric | | Percent of households that are owner-occupied | -| Percent Occupied Households, Total, One or More Selected Conditions | ACS5 | numeric | | Percent of occupied households with selected conditions | -| Percent Population Mobility, Moved From Within Same County in Past Year | ACS5 | numeric | | Percent of people (older than 1 year) who moved in county in the past 12 months | -| Year Built | Characteristic | numeric | | Year the property was constructed | -| Central Air Conditioning | Characteristic | categorical | Central A/C, No Central A/C | Indicator for central air | -| Apartments | Characteristic | categorical | Two, Three, Four, Five, Six, None | Number of apartments for class 211 and 212 properties | -| Attic Finish | Characteristic | categorical | Living Area, Partial, None | Attic finish | -| Attic Type | Characteristic | categorical | Full, Partial, None | Attic type | -| Bedrooms | Characteristic | numeric | | Number of bedrooms in the building | -| Building Square Feet | Characteristic | numeric | | Square footage of the building, as measured from the exterior | -| Basement Type | Characteristic | categorical | Full, Slab, Partial, Crawl | Basement type | -| Basement Finish | Characteristic | categorical | Formal Rec Room, Apartment, Unfinished | Basement finish | -| Exterior Wall Material | Characteristic | categorical | Frame, Masonry, Frame + Masonry, Stucco | Exterior wall construction | -| Full Baths | Characteristic | numeric | | Number of full bathrooms | -| Fireplaces | Characteristic | numeric | | Number of fireplaces | -| Garage 1 Attached | Characteristic | categorical | Yes, No | Indicator for garage attached | -| Garage 1 Ext. Wall Material | Characteristic | categorical | Frame, Masonry, Frame + Masonry, Stucco | Garage exterior wall construction | -| Garage 1 Size | Characteristic | categorical | 1 cars, 1.5 cars, 2 cars, 2.5 cars, 3 cars, 3.5 cars, 0 cars, 4 cars | Garage size (number of cars) | -| Half Baths | Characteristic | numeric | | Number of half baths | -| Land Square Feet | Characteristic | numeric | | Square footage of the land (not just the building) of the property | -| Central Heating | Characteristic | categorical | Warm Air Furnace, Hot Water Steam, Electric Heater, None | Interior heating type | -| Number of Commercial Units | Characteristic | numeric | | Number of commercial units | -| Porch | Characteristic | categorical | None, Frame Enclosed, Masonry Enclosed | Porch type | -| Roof Material | Characteristic | categorical | Shingle + Asphalt, Tar + Gravel, Slate, Shake, Tile, Other | Roof material / construction | -| Rooms | Characteristic | numeric | | Number of total rooms in the building (excluding baths) | -| Cathedral Ceiling | Characteristic | categorical | Yes, No | Deprecated | -| Type of Residence | Characteristic | categorical | 1 Story, 2 Story, 3 Story +, Split Level, 1.5 Story, Missing | Type of residence | -| Recent Renovation | Characteristic | logical | | Indicates whether or not a property was renovated within the last 3 years | -| Property Class | Characteristic | character | | Card-level property type and/or use | -| Longitude | Location | numeric | | X coordinate in degrees (global longitude) | -| Latitude | Location | numeric | | Y coordinate in degrees (global latitude) | -| Census Tract GEOID | Location | character | | 11-digit ACS/Census tract GEOID | -| First Street Factor | Location | numeric | | First Street flood factor The flood factor is a risk score, where 10 is the highest risk and 1 is the lowest risk | -| School Elementary District GEOID | Location | character | | School district (elementary) GEOID | -| School Secondary District GEOID | Location | character | | School district (secondary) GEOID | -| Municipality Name | Location | character | | Taxing district name, as seen on Cook County tax bills | -| CMAP Walkability Score (No Transit) | Location | numeric | | CMAP walkability score for a given PIN, excluding transit walkability | -| CMAP Walkability Total Score | Location | numeric | | CMAP walkability score for a given PIN, including transit walkability | -| Airport Noise DNL | Location | numeric | | O’Hare and Midway noise, measured as DNL | -| Township Code | Meta | character | | Cook County township code | -| Neighborhood Code | Meta | character | | Assessor neighborhood code | -| Number of sales within previous N years of sale/lien date | Meta | numeric | | Number of sales within previous N years of sale/lien date | -| Property Tax Bill Aggregate Rate | Other | numeric | | Tax bill rate for the taxing district containing a given PIN | -| School District (Elementary) GreatSchools Rating | Other | numeric | | Average GreatSchools rating of elementary schools within the district of a given PIN | -| School District (Secondary) GreatSchools Rating | Other | numeric | | Average GreatSchools rating of secondary schools within the district of a given PIN | -| Corner Lot | Other | logical | | Corner lot indicator | -| Active Homeowner Exemption | Other | logical | | Parcel has an active homeowner exemption | -| Number of Years Active Homeowner Exemption | Other | numeric | | Number of years parcel has had an active homeowner exemption | -| Number of PINs in Half Mile | Proximity | numeric | | Number of PINs within half mile | -| Number of Bus Stops in Half Mile | Proximity | numeric | | Number of bus stops within half mile | -| Number of Foreclosures Per 1000 PINs (Past 5 Years) | Proximity | numeric | | Number of foreclosures per 1000 PINs, within half mile (past 5 years) | -| Number of Schools in Half Mile | Proximity | numeric | | Number of schools (any kind) within half mile | -| Number of Schools with Rating in Half Mile | Proximity | numeric | | Number of schools (any kind) within half mile | -| Average School Rating in Half Mile | Proximity | numeric | | Average school rating of schools within half mile | -| Nearest Bike Trail Distance (Feet) | Proximity | numeric | | Nearest bike trail distance (feet) | -| Nearest Cemetery Distance (Feet) | Proximity | numeric | | Nearest cemetery distance (feet) | -| Nearest CTA Route Distance (Feet) | Proximity | numeric | | Nearest CTA route distance (feet) | -| Nearest CTA Stop Distance (Feet) | Proximity | numeric | | Nearest CTA stop distance (feet) | -| Nearest Hospital Distance (Feet) | Proximity | numeric | | Nearest hospital distance (feet) | -| Lake Michigan Distance (Feet) | Proximity | numeric | | Distance to Lake Michigan shoreline (feet) | -| Nearest Major Road Distance (Feet) | Proximity | numeric | | Nearest major road distance (feet) | -| Nearest Metra Route Distance (Feet) | Proximity | numeric | | Nearest Metra route distance (feet) | -| Nearest Metra Stop Distance (Feet) | Proximity | numeric | | Nearest Metra stop distance (feet) | -| Nearest Park Distance (Feet) | Proximity | numeric | | Nearest park distance (feet) | -| Nearest Railroad Distance (Feet) | Proximity | numeric | | Nearest railroad distance (feet) | -| Nearest Secondary Road Distance (Feet) | Proximity | numeric | | Nearest secondary road distance (feet) | -| Nearest University Distance (Feet) | Proximity | numeric | | Nearest university distance (feet) | -| Nearest Vacant Land Parcel Distance (Feet) | Proximity | numeric | | Nearest vacant land (class 100) parcel distance (feet) | -| Nearest Water Distance (Feet) | Proximity | numeric | | Nearest water distance (feet) | -| Nearest Golf Course Distance (Feet) | Proximity | numeric | | Nearest golf course distance (feet) | -| Total Airport Noise DNL | Proximity | numeric | | Estimated DNL for a PIN, assuming a baseline DNL of 50 (“quiet suburban”) and adding predicted noise from O’Hare and Midway airports to that baseline | -| Sale Year | Time | numeric | | Sale year calculated as the number of years since 0 B.C.E | -| Sale Day | Time | numeric | | Sale day calculated as the number of days since January 1st, 1997 | -| Sale Quarter of Year | Time | character | | Character encoding of quarter of year (Q1 - Q4) | -| Sale Month of Year | Time | character | | Character encoding of month of year (Jan - Dec) | -| Sale Day of Year | Time | numeric | | Numeric encoding of day of year (1 - 365) | -| Sale Day of Month | Time | numeric | | Numeric encoding of day of month (1 - 31) | -| Sale Day of Week | Time | numeric | | Numeric encoding of day of week (1 - 7) | -| Sale After COVID-19 | Time | logical | | Indicator for whether sale occurred after COVID-19 was widely publicized (around March 15, 2020) | +model as of 2025-01-08. + +| Feature Name | Variable Name | Description | Category | Possible Values | +|:------------------------------------------------------------------------|:------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:---------------------------------------------------------------------| +| Percent Population Age, Under 19 Years Old | acs5_percent_age_children | Percent of the people 17 years or younger | ACS5 | | +| Percent Population Age, Over 65 Years Old | acs5_percent_age_senior | Percent of the people 65 years or older | ACS5 | | +| Median Population Age | acs5_median_age_total | Median age for whole population | ACS5 | | +| Percent Population Mobility, In Same House 1 Year Ago | acs5_percent_mobility_no_move | Percent of people (older than 1 year) who have not moved in the past 12 months | ACS5 | | +| Percent Population Mobility, Moved From Other State in Past Year | acs5_percent_mobility_moved_from_other_state | Percent of people (older than 1 year) who moved from another state in the past 12 months | ACS5 | | +| Percent Households Family, Married | acs5_percent_household_family_married | Percent of households that are family, married | ACS5 | | +| Percent Households Nonfamily, Living Alone | acs5_percent_household_nonfamily_alone | Percent of households that are non-family, alone (single) | ACS5 | | +| Percent Population Education, High School Degree | acs5_percent_education_high_school | Percent of people older than 25 who attained a high school degree | ACS5 | | +| Percent Population Education, Bachelor Degree | acs5_percent_education_bachelor | Percent of people older than 25 who attained a bachelor’s degree | ACS5 | | +| Percent Population Education, Graduate Degree | acs5_percent_education_graduate | Percent of people older than 25 who attained a graduate degree | ACS5 | | +| Percent Population Income, Below Poverty Level | acs5_percent_income_below_poverty_level | Percent of people above the poverty level in the last 12 months | ACS5 | | +| Median Income, Household in Past Year | acs5_median_income_household_past_year | Median income per household in the past 12 months | ACS5 | | +| Median Income, Per Capita in Past Year | acs5_median_income_per_capita_past_year | Median income per capita in the past 12 months | ACS5 | | +| Percent Population Income, Received SNAP in Past Year | acs5_percent_income_household_received_snap_past_year | Percent of households that received SNAP in the past 12 months | ACS5 | | +| Percent Population Employment, Unemployed | acs5_percent_employment_unemployed | Percent of people 16 years and older unemployed | ACS5 | | +| Median Occupied Household, Total, Year Built | acs5_median_household_total_occupied_year_built | Median year built for all occupied households | ACS5 | | +| Median Occupied Household, Renter, Gross Rent | acs5_median_household_renter_occupied_gross_rent | Median gross rent for only renter-occupied units | ACS5 | | +| Percent Occupied Households, Owner | acs5_percent_household_owner_occupied | Percent of households that are owner-occupied | ACS5 | | +| Percent Occupied Households, Total, One or More Selected Conditions | acs5_percent_household_total_occupied_w_sel_cond | Percent of occupied households with selected conditions | ACS5 | | +| Percent Population Mobility, Moved From Within Same County in Past Year | acs5_percent_mobility_moved_in_county | Percent of people (older than 1 year) who moved in county in the past 12 months | ACS5 | | +| Year Built | char_yrblt | Year the property was constructed | Characteristic | | +| Central Air Conditioning | char_air | Indicator for central air | Characteristic | Central A/C, No Central A/C | +| Apartments | char_apts | Number of apartments for class 211 and 212 properties | Characteristic | Two, Three, Four, Five, Six, None | +| Attic Finish | char_attic_fnsh | Attic finish | Characteristic | Living Area, Partial, None | +| Attic Type | char_attic_type | Attic type | Characteristic | Full, Partial, None | +| Bedrooms | char_beds | Number of bedrooms in the building | Characteristic | | +| Building Square Feet | char_bldg_sf | Square footage of the building, as measured from the exterior | Characteristic | | +| Basement Type | char_bsmt | Basement type | Characteristic | Full, Slab, Partial, Crawl | +| Basement Finish | char_bsmt_fin | Basement finish | Characteristic | Formal Rec Room, Apartment, Unfinished | +| Property Class | char_class | Card-level property type and/or use | Characteristic | | +| Exterior Wall Material | char_ext_wall | Exterior wall construction | Characteristic | Frame, Masonry, Frame + Masonry, Stucco | +| Full Baths | char_fbath | Number of full bathrooms | Characteristic | | +| Fireplaces | char_frpl | Number of fireplaces | Characteristic | | +| Garage 1 Attached | char_gar1_att | Indicator for garage attached | Characteristic | Yes, No | +| Garage 1 Ext. Wall Material | char_gar1_cnst | Garage exterior wall construction | Characteristic | Frame, Masonry, Frame + Masonry, Stucco | +| Garage 1 Size | char_gar1_size | Garage size (number of cars) | Characteristic | 1 cars, 1.5 cars, 2 cars, 2.5 cars, 3 cars, 3.5 cars, 0 cars, 4 cars | +| Half Baths | char_hbath | Number of half baths | Characteristic | | +| Land Square Feet | char_land_sf | Square footage of the land (not just the building) of the property | Characteristic | | +| Central Heating | char_heat | Interior heating type | Characteristic | Warm Air Furnace, Hot Water Steam, Electric Heater, None | +| Number of Commercial Units | char_ncu | Number of commercial units | Characteristic | | +| Porch | char_porch | Porch type | Characteristic | None, Frame Enclosed, Masonry Enclosed | +| Roof Material | char_roof_cnst | Roof material / construction | Characteristic | Shingle + Asphalt, Tar + Gravel, Slate, Shake, Tile, Other | +| Rooms | char_rooms | Number of total rooms in the building (excluding baths) | Characteristic | | +| Cathedral Ceiling | char_tp_dsgn | Deprecated | Characteristic | Yes, No | +| Type of Residence | char_type_resd | Type of residence | Characteristic | 1 Story, 2 Story, 3 Story +, Split Level, 1.5 Story, Missing | +| Recent Renovation | char_recent_renovation | Indicates whether or not a property was renovated within the last 3 years | Characteristic | | +| Longitude | loc_longitude | X coordinate in degrees (global longitude) | Location | | +| Latitude | loc_latitude | Y coordinate in degrees (global latitude) | Location | | +| Census Tract GEOID | loc_census_tract_geoid | 11-digit ACS/Census tract GEOID | Location | | +| First Street Factor | loc_env_flood_fs_factor | First Street flood factor The flood factor is a risk score, where 10 is the highest risk and 1 is the lowest risk | Location | | +| Airport Noise DNL | loc_env_airport_noise_dnl | O’Hare and Midway noise, measured as DNL | Location | | +| School Elementary District GEOID | loc_school_elementary_district_geoid | School district (elementary) GEOID | Location | | +| School Secondary District GEOID | loc_school_secondary_district_geoid | School district (secondary) GEOID | Location | | +| CMAP Walkability Score (No Transit) | loc_access_cmap_walk_nta_score | CMAP walkability score for a given PIN, excluding transit walkability | Location | | +| CMAP Walkability Total Score | loc_access_cmap_walk_total_score | CMAP walkability score for a given PIN, including transit walkability | Location | | +| Municipality Name | loc_tax_municipality_name | Taxing district name, as seen on Cook County tax bills | Location | | +| Township Code | meta_township_code | Cook County township code | Meta | | +| Neighborhood Code | meta_nbhd_code | Assessor neighborhood code | Meta | | +| Number of sales within previous N years of sale/lien date | meta_sale_count_past_n_years | Number of sales within previous N years of sale/lien date | Meta | | +| Property Tax Bill Aggregate Rate | other_tax_bill_rate | Tax bill rate for the taxing district containing a given PIN | Other | | +| School District (Elementary) GreatSchools Rating | other_school_district_elementary_avg_rating | Average GreatSchools rating of elementary schools within the district of a given PIN | Other | | +| School District (Secondary) GreatSchools Rating | other_school_district_secondary_avg_rating | Average GreatSchools rating of secondary schools within the district of a given PIN | Other | | +| Active Homeowner Exemption | ccao_is_active_exe_homeowner | Parcel has an active homeowner exemption | Other | | +| Corner Lot | ccao_is_corner_lot | Corner lot indicator | Other | | +| Number of Years Active Homeowner Exemption | ccao_n_years_exe_homeowner | Number of years parcel has had an active homeowner exemption | Other | | +| Number of PINs in Half Mile | prox_num_pin_in_half_mile | Number of PINs within half mile | Proximity | | +| Number of Bus Stops in Half Mile | prox_num_bus_stop_in_half_mile | Number of bus stops within half mile | Proximity | | +| Number of Foreclosures Per 1000 PINs (Past 5 Years) | prox_num_foreclosure_per_1000_pin_past_5_years | Number of foreclosures per 1000 PINs, within half mile (past 5 years) | Proximity | | +| Number of Schools in Half Mile | prox_num_school_in_half_mile | Number of schools (any kind) within half mile | Proximity | | +| Number of Schools with Rating in Half Mile | prox_num_school_with_rating_in_half_mile | Number of schools (any kind) within half mile | Proximity | | +| Average School Rating in Half Mile | prox_avg_school_rating_in_half_mile | Average school rating of schools within half mile | Proximity | | +| Total Airport Noise DNL | prox_airport_dnl_total | Estimated DNL for a PIN, assuming a baseline DNL of 50 (“quiet suburban”) and adding predicted noise from O’Hare and Midway airports to that baseline | Proximity | | +| Nearest Bike Trail Distance (Feet) | prox_nearest_bike_trail_dist_ft | Nearest bike trail distance (feet) | Proximity | | +| Nearest Cemetery Distance (Feet) | prox_nearest_cemetery_dist_ft | Nearest cemetery distance (feet) | Proximity | | +| Nearest CTA Route Distance (Feet) | prox_nearest_cta_route_dist_ft | Nearest CTA route distance (feet) | Proximity | | +| Nearest CTA Stop Distance (Feet) | prox_nearest_cta_stop_dist_ft | Nearest CTA stop distance (feet) | Proximity | | +| Nearest Hospital Distance (Feet) | prox_nearest_hospital_dist_ft | Nearest hospital distance (feet) | Proximity | | +| Lake Michigan Distance (Feet) | prox_lake_michigan_dist_ft | Distance to Lake Michigan shoreline (feet) | Proximity | | +| Nearest Major Road Distance (Feet) | prox_nearest_major_road_dist_ft | Nearest major road distance (feet) | Proximity | | +| Nearest Metra Route Distance (Feet) | prox_nearest_metra_route_dist_ft | Nearest Metra route distance (feet) | Proximity | | +| Nearest Metra Stop Distance (Feet) | prox_nearest_metra_stop_dist_ft | Nearest Metra stop distance (feet) | Proximity | | +| Nearest Park Distance (Feet) | prox_nearest_park_dist_ft | Nearest park distance (feet) | Proximity | | +| Nearest Railroad Distance (Feet) | prox_nearest_railroad_dist_ft | Nearest railroad distance (feet) | Proximity | | +| Nearest Secondary Road Distance (Feet) | prox_nearest_secondary_road_dist_ft | Nearest secondary road distance (feet) | Proximity | | +| Nearest University Distance (Feet) | prox_nearest_university_dist_ft | Nearest university distance (feet) | Proximity | | +| Nearest Vacant Land Parcel Distance (Feet) | prox_nearest_vacant_land_dist_ft | Nearest vacant land (class 100) parcel distance (feet) | Proximity | | +| Nearest Water Distance (Feet) | prox_nearest_water_dist_ft | Nearest water distance (feet) | Proximity | | +| Nearest Golf Course Distance (Feet) | prox_nearest_golf_course_dist_ft | Nearest golf course distance (feet) | Proximity | | +| Sale Year | time_sale_year | Sale year calculated as the number of years since 0 B.C.E | Time | | +| Sale Day | time_sale_day | Sale day calculated as the number of days since January 1st, 1997 | Time | | +| Sale Quarter of Year | time_sale_quarter_of_year | Character encoding of quarter of year (Q1 - Q4) | Time | | +| Sale Month of Year | time_sale_month_of_year | Character encoding of month of year (Jan - Dec) | Time | | +| Sale Day of Year | time_sale_day_of_year | Numeric encoding of day of year (1 - 365) | Time | | +| Sale Day of Month | time_sale_day_of_month | Numeric encoding of day of month (1 - 31) | Time | | +| Sale Day of Week | time_sale_day_of_week | Numeric encoding of day of week (1 - 7) | Time | | +| Sale After COVID-19 | time_sale_post_covid | Indicator for whether sale occurred after COVID-19 was widely publicized (around March 15, 2020) | Time | | #### Data Sources @@ -1465,8 +1465,8 @@ commands: dependency as necessary 4. Run `renv::snapshot()` to update the reporting lockfile with the dependencies defined in the `DESCRIPTION` file -5. Run `renv::activate(profile = "default")` if you would like to switch - back to the default renv profile +5. Run `renv::activate(profile = "default")` if you would like to + switch back to the default renv profile ## Troubleshooting From 65cd2eb6cd838ad92c32e44d130e41cfdb36b560 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Wed, 8 Jan 2025 21:25:31 +0000 Subject: [PATCH 02/11] Add data dict to repo --- .gitignore | 1 + README.Rmd | 43 +++++++++++++------- README.md | 6 ++- docs/data-dict.csv | 97 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 15 deletions(-) create mode 100644 docs/data-dict.csv diff --git a/.gitignore b/.gitignore index 5cd2556d..9de0fa30 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ cache/ *.rds *.zip *.csv +!docs/data-dict.csv *.xlsx *.xlsm *.html diff --git a/README.Rmd b/README.Rmd index def44ec8..c14925a8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -231,10 +231,13 @@ Model accuracy for each parameter combination is measured on a validation set us ### Features Used -The residential model uses a variety of individual and aggregate features to determine a property's assessed value. We've tested a long list of possible features over time, including [walk score](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/county_walkscore.html), [crime rate](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/chicago_crimerate.html), [school districts](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/county_school_boundaries_mean_encoded.html), and many others. The features in the table below are the ones that made the cut. They're the right combination of easy to understand and impute, powerfully predictive, and well-behaved. Most of them are in use in the model as of `r Sys.Date()`. +The residential model uses a variety of individual and aggregate features to determine a property's assessed value. We've tested a long list of possible features over time, including [walk score](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/county_walkscore.html), [crime rate](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/chicago_crimerate.html), [school districts](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/county_school_boundaries_mean_encoded.html), and many others. The features in the table below are the ones that made the cut. They're the right combination of easy to understand and impute, powerfully predictive, and well-behaved. + +For a machine-readable version of this data dictionary, see [`docs/data-dict.csv`](./docs/data-dict.csv). ```{r feature_guide, message=FALSE, results='asis', echo=FALSE} library(dplyr) +library(readr) library(tidyr) library(yaml) library(jsonlite) @@ -316,7 +319,7 @@ param_notes <- param_tbl$value %>% )) %>% unlist() -param_tbl %>% +param_tbl_fmt <- param_tbl %>% mutate(description = param_notes) %>% left_join( ccao::vars_dict, @@ -325,27 +328,41 @@ param_tbl %>% group_by(var_name_pretty) %>% mutate(row = paste0("X", row_number())) %>% distinct( - `Feature Name` = var_name_pretty, - `Variable Name` = value, - Description = description, - Category = var_type, - Type = var_data_type, + feature_name = var_name_pretty, + variable_name = value, + description, + category = var_type, + type = var_data_type, var_value, row ) %>% - mutate(Category = recode( - Category, + mutate(category = recode( + category, char = "Characteristic", acs5 = "ACS5", loc = "Location", prox = "Proximity", ind = "Indicator", time = "Time", meta = "Meta", other = "Other", ccao = "Other" )) %>% pivot_wider( - id_cols = `Feature Name`:`Category`, + id_cols = `feature_name`:`category`, names_from = row, values_from = var_value ) %>% - unite("Possible Values", starts_with("X"), sep = ", ", na.rm = TRUE) %>% - mutate(Description = replace_na(Description, "")) %>% - arrange(Category) %>% + unite("possible_values", starts_with("X"), sep = ", ", na.rm = TRUE) %>% + mutate(description = replace_na(description, "")) %>% + arrange(category) + +# Write machine-readable version of the table to file +param_tbl_fmt %>% + write_csv("docs/data-dict.csv") + +# Render human-readable version of the table to the doc +param_tbl_fmt %>% + rename( + "Feature Name" = "feature_name", + "Variable Name" = "variable_name", + "Description" = "description", + "Category" = "category", + "Possible Values" = "possible_values" + ) %>% knitr::kable(format = "markdown") ``` diff --git a/README.md b/README.md index 3554ab6c..59d6cf40 100644 --- a/README.md +++ b/README.md @@ -367,8 +367,10 @@ rate](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob districts](https://gitlab.com/ccao-data-science---modeling/models/ccao_res_avm/-/blob/9407d1fae1986c5ce1f5434aa91d3f8cf06c8ea1/output/test_new_variables/county_school_boundaries_mean_encoded.html), and many others. The features in the table below are the ones that made the cut. They’re the right combination of easy to understand and impute, -powerfully predictive, and well-behaved. Most of them are in use in the -model as of 2025-01-08. +powerfully predictive, and well-behaved. + +For a machine-readable version of this data dictionary, see +[`docs/data-dict.csv`](./docs/data-dict.csv). | Feature Name | Variable Name | Description | Category | Possible Values | |:------------------------------------------------------------------------|:------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:---------------------------------------------------------------------| diff --git a/docs/data-dict.csv b/docs/data-dict.csv new file mode 100644 index 00000000..e602aeb6 --- /dev/null +++ b/docs/data-dict.csv @@ -0,0 +1,97 @@ +feature_name,variable_name,description,category,possible_values +"Percent Population Age, Under 19 Years Old",acs5_percent_age_children,Percent of the people 17 years or younger,ACS5, +"Percent Population Age, Over 65 Years Old",acs5_percent_age_senior,Percent of the people 65 years or older,ACS5, +Median Population Age,acs5_median_age_total,Median age for whole population,ACS5, +"Percent Population Mobility, In Same House 1 Year Ago",acs5_percent_mobility_no_move,Percent of people (older than 1 year) who have not moved in the past 12 months,ACS5, +"Percent Population Mobility, Moved From Other State in Past Year",acs5_percent_mobility_moved_from_other_state,Percent of people (older than 1 year) who moved from another state in the past 12 months,ACS5, +"Percent Households Family, Married",acs5_percent_household_family_married,"Percent of households that are family, married",ACS5, +"Percent Households Nonfamily, Living Alone",acs5_percent_household_nonfamily_alone,"Percent of households that are non-family, alone (single)",ACS5, +"Percent Population Education, High School Degree",acs5_percent_education_high_school,Percent of people older than 25 who attained a high school degree,ACS5, +"Percent Population Education, Bachelor Degree",acs5_percent_education_bachelor,Percent of people older than 25 who attained a bachelor's degree,ACS5, +"Percent Population Education, Graduate Degree",acs5_percent_education_graduate,Percent of people older than 25 who attained a graduate degree,ACS5, +"Percent Population Income, Below Poverty Level",acs5_percent_income_below_poverty_level,Percent of people above the poverty level in the last 12 months,ACS5, +"Median Income, Household in Past Year",acs5_median_income_household_past_year,Median income per household in the past 12 months,ACS5, +"Median Income, Per Capita in Past Year",acs5_median_income_per_capita_past_year,Median income per capita in the past 12 months,ACS5, +"Percent Population Income, Received SNAP in Past Year",acs5_percent_income_household_received_snap_past_year,Percent of households that received SNAP in the past 12 months,ACS5, +"Percent Population Employment, Unemployed",acs5_percent_employment_unemployed,Percent of people 16 years and older unemployed,ACS5, +"Median Occupied Household, Total, Year Built",acs5_median_household_total_occupied_year_built,Median year built for all occupied households,ACS5, +"Median Occupied Household, Renter, Gross Rent",acs5_median_household_renter_occupied_gross_rent,Median gross rent for only renter-occupied units,ACS5, +"Percent Occupied Households, Owner",acs5_percent_household_owner_occupied,Percent of households that are owner-occupied,ACS5, +"Percent Occupied Households, Total, One or More Selected Conditions",acs5_percent_household_total_occupied_w_sel_cond,Percent of occupied households with selected conditions,ACS5, +"Percent Population Mobility, Moved From Within Same County in Past Year",acs5_percent_mobility_moved_in_county,Percent of people (older than 1 year) who moved in county in the past 12 months,ACS5, +Year Built,char_yrblt,Year the property was constructed,Characteristic, +Central Air Conditioning,char_air,Indicator for central air,Characteristic,"Central A/C, No Central A/C" +Apartments,char_apts,Number of apartments for class 211 and 212 properties,Characteristic,"Two, Three, Four, Five, Six, None" +Attic Finish,char_attic_fnsh,Attic finish,Characteristic,"Living Area, Partial, None" +Attic Type,char_attic_type,Attic type,Characteristic,"Full, Partial, None" +Bedrooms,char_beds,Number of bedrooms in the building,Characteristic, +Building Square Feet,char_bldg_sf,"Square footage of the building, as measured from the exterior",Characteristic, +Basement Type,char_bsmt,Basement type,Characteristic,"Full, Slab, Partial, Crawl" +Basement Finish,char_bsmt_fin,Basement finish,Characteristic,"Formal Rec Room, Apartment, Unfinished" +Property Class,char_class,Card-level property type and/or use,Characteristic, +Exterior Wall Material,char_ext_wall,Exterior wall construction,Characteristic,"Frame, Masonry, Frame + Masonry, Stucco" +Full Baths,char_fbath,Number of full bathrooms,Characteristic, +Fireplaces,char_frpl,Number of fireplaces,Characteristic, +Garage 1 Attached,char_gar1_att,Indicator for garage attached,Characteristic,"Yes, No" +Garage 1 Ext. Wall Material,char_gar1_cnst,Garage exterior wall construction,Characteristic,"Frame, Masonry, Frame + Masonry, Stucco" +Garage 1 Size,char_gar1_size,Garage size (number of cars),Characteristic,"1 cars, 1.5 cars, 2 cars, 2.5 cars, 3 cars, 3.5 cars, 0 cars, 4 cars" +Half Baths,char_hbath,Number of half baths,Characteristic, +Land Square Feet,char_land_sf,Square footage of the land (not just the building) of the property,Characteristic, +Central Heating,char_heat,Interior heating type,Characteristic,"Warm Air Furnace, Hot Water Steam, Electric Heater, None" +Number of Commercial Units,char_ncu,Number of commercial units,Characteristic, +Porch,char_porch,Porch type,Characteristic,"None, Frame Enclosed, Masonry Enclosed" +Roof Material,char_roof_cnst,Roof material / construction,Characteristic,"Shingle + Asphalt, Tar + Gravel, Slate, Shake, Tile, Other" +Rooms,char_rooms,Number of total rooms in the building (excluding baths),Characteristic, +Cathedral Ceiling,char_tp_dsgn,Deprecated,Characteristic,"Yes, No" +Type of Residence,char_type_resd,Type of residence,Characteristic,"1 Story, 2 Story, 3 Story +, Split Level, 1.5 Story, Missing" +Recent Renovation,char_recent_renovation,Indicates whether or not a property was renovated within the last 3 years,Characteristic, +Longitude,loc_longitude,X coordinate in degrees (global longitude),Location, +Latitude,loc_latitude,Y coordinate in degrees (global latitude),Location, +Census Tract GEOID,loc_census_tract_geoid,11-digit ACS/Census tract GEOID,Location, +First Street Factor,loc_env_flood_fs_factor,"First Street flood factor The flood factor is a risk score, where 10 is the highest risk and 1 is the lowest risk",Location, +Airport Noise DNL,loc_env_airport_noise_dnl,"O'Hare and Midway noise, measured as DNL",Location, +School Elementary District GEOID,loc_school_elementary_district_geoid,School district (elementary) GEOID,Location, +School Secondary District GEOID,loc_school_secondary_district_geoid,School district (secondary) GEOID,Location, +CMAP Walkability Score (No Transit),loc_access_cmap_walk_nta_score,"CMAP walkability score for a given PIN, excluding transit walkability",Location, +CMAP Walkability Total Score,loc_access_cmap_walk_total_score,"CMAP walkability score for a given PIN, including transit walkability",Location, +Municipality Name,loc_tax_municipality_name,"Taxing district name, as seen on Cook County tax bills",Location, +Township Code,meta_township_code,Cook County township code,Meta, +Neighborhood Code,meta_nbhd_code,Assessor neighborhood code,Meta, +Number of sales within previous N years of sale/lien date,meta_sale_count_past_n_years,Number of sales within previous N years of sale/lien date,Meta, +Property Tax Bill Aggregate Rate,other_tax_bill_rate,Tax bill rate for the taxing district containing a given PIN,Other, +School District (Elementary) GreatSchools Rating,other_school_district_elementary_avg_rating,Average GreatSchools rating of elementary schools within the district of a given PIN,Other, +School District (Secondary) GreatSchools Rating,other_school_district_secondary_avg_rating,Average GreatSchools rating of secondary schools within the district of a given PIN,Other, +Active Homeowner Exemption,ccao_is_active_exe_homeowner,Parcel has an active homeowner exemption,Other, +Corner Lot,ccao_is_corner_lot,Corner lot indicator,Other, +Number of Years Active Homeowner Exemption,ccao_n_years_exe_homeowner,Number of years parcel has had an active homeowner exemption,Other, +Number of PINs in Half Mile,prox_num_pin_in_half_mile,Number of PINs within half mile,Proximity, +Number of Bus Stops in Half Mile,prox_num_bus_stop_in_half_mile,Number of bus stops within half mile,Proximity, +Number of Foreclosures Per 1000 PINs (Past 5 Years),prox_num_foreclosure_per_1000_pin_past_5_years,"Number of foreclosures per 1000 PINs, within half mile (past 5 years)",Proximity, +Number of Schools in Half Mile,prox_num_school_in_half_mile,Number of schools (any kind) within half mile,Proximity, +Number of Schools with Rating in Half Mile,prox_num_school_with_rating_in_half_mile,Number of schools (any kind) within half mile,Proximity, +Average School Rating in Half Mile,prox_avg_school_rating_in_half_mile,Average school rating of schools within half mile,Proximity, +Total Airport Noise DNL,prox_airport_dnl_total,"Estimated DNL for a PIN, assuming a baseline DNL of 50 (""quiet suburban"") and adding predicted noise from O'Hare and Midway airports to that baseline",Proximity, +Nearest Bike Trail Distance (Feet),prox_nearest_bike_trail_dist_ft,Nearest bike trail distance (feet),Proximity, +Nearest Cemetery Distance (Feet),prox_nearest_cemetery_dist_ft,Nearest cemetery distance (feet),Proximity, +Nearest CTA Route Distance (Feet),prox_nearest_cta_route_dist_ft,Nearest CTA route distance (feet),Proximity, +Nearest CTA Stop Distance (Feet),prox_nearest_cta_stop_dist_ft,Nearest CTA stop distance (feet),Proximity, +Nearest Hospital Distance (Feet),prox_nearest_hospital_dist_ft,Nearest hospital distance (feet),Proximity, +Lake Michigan Distance (Feet),prox_lake_michigan_dist_ft,Distance to Lake Michigan shoreline (feet),Proximity, +Nearest Major Road Distance (Feet),prox_nearest_major_road_dist_ft,Nearest major road distance (feet),Proximity, +Nearest Metra Route Distance (Feet),prox_nearest_metra_route_dist_ft,Nearest Metra route distance (feet),Proximity, +Nearest Metra Stop Distance (Feet),prox_nearest_metra_stop_dist_ft,Nearest Metra stop distance (feet),Proximity, +Nearest Park Distance (Feet),prox_nearest_park_dist_ft,Nearest park distance (feet),Proximity, +Nearest Railroad Distance (Feet),prox_nearest_railroad_dist_ft,Nearest railroad distance (feet),Proximity, +Nearest Secondary Road Distance (Feet),prox_nearest_secondary_road_dist_ft,Nearest secondary road distance (feet),Proximity, +Nearest University Distance (Feet),prox_nearest_university_dist_ft,Nearest university distance (feet),Proximity, +Nearest Vacant Land Parcel Distance (Feet),prox_nearest_vacant_land_dist_ft,Nearest vacant land (class 100) parcel distance (feet),Proximity, +Nearest Water Distance (Feet),prox_nearest_water_dist_ft,Nearest water distance (feet),Proximity, +Nearest Golf Course Distance (Feet),prox_nearest_golf_course_dist_ft,Nearest golf course distance (feet),Proximity, +Sale Year,time_sale_year,Sale year calculated as the number of years since 0 B.C.E,Time, +Sale Day,time_sale_day,"Sale day calculated as the number of days since January 1st, 1997",Time, +Sale Quarter of Year,time_sale_quarter_of_year,Character encoding of quarter of year (Q1 - Q4),Time, +Sale Month of Year,time_sale_month_of_year,Character encoding of month of year (Jan - Dec),Time, +Sale Day of Year,time_sale_day_of_year,Numeric encoding of day of year (1 - 365),Time, +Sale Day of Month,time_sale_day_of_month,Numeric encoding of day of month (1 - 31),Time, +Sale Day of Week,time_sale_day_of_week,Numeric encoding of day of week (1 - 7),Time, +Sale After COVID-19,time_sale_post_covid,"Indicator for whether sale occurred after COVID-19 was widely publicized (around March 15, 2020)",Time, From 46c163bcfb6984bee6e25ddfc56e5901a78f3a58 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 9 Jan 2025 19:23:05 +0000 Subject: [PATCH 03/11] Add pre-commit hook to check that the data dict is up to date with params --- .pre-commit-config.yaml | 5 +++++ R/hooks/check-data-dict.R | 9 +++++++++ 2 files changed, 14 insertions(+) create mode 100644 R/hooks/check-data-dict.R diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d9242f2..8d290b45 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,3 +27,8 @@ repos: entry: Cannot commit .Rhistory, .RData, .Rds or .rds. language: fail files: '\.(Rhistory|RData|Rds|rds)$' + - id: check-data-dict + name: Data dictionary must be up to date + entry: Rscript R/hooks/check-data-dict.R + files: (^|/)((params\.yaml)|(data-dict\.csv))$ + language: system diff --git a/R/hooks/check-data-dict.R b/R/hooks/check-data-dict.R new file mode 100644 index 00000000..995fa2f9 --- /dev/null +++ b/R/hooks/check-data-dict.R @@ -0,0 +1,9 @@ +#!/usr/bin/env Rscript +# Script to check that the data dictionary file is up to date with the +# latest feature set +library(readr) +library(yaml) + +params <- read_yaml("params.yaml") +data_dict <- read_csv("docs/data-dict.csv", show_col_types = FALSE) +stopifnot(setequal(data_dict$variable_name, params$model$predictor$all)) From 7994e4fd82ac59c1333a8696e5a065334f513112 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 9 Jan 2025 19:58:40 +0000 Subject: [PATCH 04/11] Better error msg and dependencies in check-data-dict.R pre-commit hook --- .pre-commit-config.yaml | 6 ++++-- R/hooks/check-data-dict.R | 29 +++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d290b45..8bbd70f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,9 @@ repos: language: fail files: '\.(Rhistory|RData|Rds|rds)$' - id: check-data-dict - name: Data dictionary must be up to date + name: Data dictionary must be up to date with params file entry: Rscript R/hooks/check-data-dict.R files: (^|/)((params\.yaml)|(data-dict\.csv))$ - language: system + language: r + additional_dependencies: + - yaml diff --git a/R/hooks/check-data-dict.R b/R/hooks/check-data-dict.R index 995fa2f9..a0899c0a 100644 --- a/R/hooks/check-data-dict.R +++ b/R/hooks/check-data-dict.R @@ -1,9 +1,30 @@ #!/usr/bin/env Rscript # Script to check that the data dictionary file is up to date with the # latest feature set -library(readr) library(yaml) -params <- read_yaml("params.yaml") -data_dict <- read_csv("docs/data-dict.csv", show_col_types = FALSE) -stopifnot(setequal(data_dict$variable_name, params$model$predictor$all)) +params_filename <- "params.yaml" +data_dict_filename <- "docs/data-dict.csv" + +params <- read_yaml(params_filename) +data_dict <- read.csv(data_dict_filename) + +symmetric_diff <- c( + setdiff(data_dict$variable_name, params$model$predictor$all), + setdiff(params$model$predictor$all, data_dict$variable_name) +) +symmetric_diff_len <- length(symmetric_diff) + +if (symmetric_diff_len > 0) { + err_msg_prefix <- ifelse(symmetric_diff_len == 1, "Param is", "Params are") + err_msg <- paste0( + err_msg_prefix, + " not present in both ", + params_filename, + " and ", + data_dict_filename, + ": ", + paste(symmetric_diff, collapse = ", ") + ) + stop(err_msg) +} From 5fb7b563c5ed85eadf984eb9b1e6ec2345bc6d50 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Thu, 9 Jan 2025 23:12:17 +0000 Subject: [PATCH 05/11] Update data dict and README to use latest feature info --- README.Rmd | 2 +- README.md | 201 +++++++++++++++++++++++---------------------- docs/data-dict.csv | 23 ++++-- renv.lock | 6 +- 4 files changed, 121 insertions(+), 111 deletions(-) diff --git a/README.Rmd b/README.Rmd index c14925a8..0825cd00 100644 --- a/README.Rmd +++ b/README.Rmd @@ -339,7 +339,7 @@ param_tbl_fmt <- param_tbl %>% category, char = "Characteristic", acs5 = "ACS5", loc = "Location", prox = "Proximity", ind = "Indicator", time = "Time", - meta = "Meta", other = "Other", ccao = "Other" + meta = "Meta", other = "Other", ccao = "Other", shp = "Parcel Shape" )) %>% pivot_wider( id_cols = `feature_name`:`category`, diff --git a/README.md b/README.md index 59d6cf40..b70c9c80 100644 --- a/README.md +++ b/README.md @@ -372,104 +372,109 @@ powerfully predictive, and well-behaved. For a machine-readable version of this data dictionary, see [`docs/data-dict.csv`](./docs/data-dict.csv). -| Feature Name | Variable Name | Description | Category | Possible Values | -|:------------------------------------------------------------------------|:------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:---------------------------------------------------------------------| -| Percent Population Age, Under 19 Years Old | acs5_percent_age_children | Percent of the people 17 years or younger | ACS5 | | -| Percent Population Age, Over 65 Years Old | acs5_percent_age_senior | Percent of the people 65 years or older | ACS5 | | -| Median Population Age | acs5_median_age_total | Median age for whole population | ACS5 | | -| Percent Population Mobility, In Same House 1 Year Ago | acs5_percent_mobility_no_move | Percent of people (older than 1 year) who have not moved in the past 12 months | ACS5 | | -| Percent Population Mobility, Moved From Other State in Past Year | acs5_percent_mobility_moved_from_other_state | Percent of people (older than 1 year) who moved from another state in the past 12 months | ACS5 | | -| Percent Households Family, Married | acs5_percent_household_family_married | Percent of households that are family, married | ACS5 | | -| Percent Households Nonfamily, Living Alone | acs5_percent_household_nonfamily_alone | Percent of households that are non-family, alone (single) | ACS5 | | -| Percent Population Education, High School Degree | acs5_percent_education_high_school | Percent of people older than 25 who attained a high school degree | ACS5 | | -| Percent Population Education, Bachelor Degree | acs5_percent_education_bachelor | Percent of people older than 25 who attained a bachelor’s degree | ACS5 | | -| Percent Population Education, Graduate Degree | acs5_percent_education_graduate | Percent of people older than 25 who attained a graduate degree | ACS5 | | -| Percent Population Income, Below Poverty Level | acs5_percent_income_below_poverty_level | Percent of people above the poverty level in the last 12 months | ACS5 | | -| Median Income, Household in Past Year | acs5_median_income_household_past_year | Median income per household in the past 12 months | ACS5 | | -| Median Income, Per Capita in Past Year | acs5_median_income_per_capita_past_year | Median income per capita in the past 12 months | ACS5 | | -| Percent Population Income, Received SNAP in Past Year | acs5_percent_income_household_received_snap_past_year | Percent of households that received SNAP in the past 12 months | ACS5 | | -| Percent Population Employment, Unemployed | acs5_percent_employment_unemployed | Percent of people 16 years and older unemployed | ACS5 | | -| Median Occupied Household, Total, Year Built | acs5_median_household_total_occupied_year_built | Median year built for all occupied households | ACS5 | | -| Median Occupied Household, Renter, Gross Rent | acs5_median_household_renter_occupied_gross_rent | Median gross rent for only renter-occupied units | ACS5 | | -| Percent Occupied Households, Owner | acs5_percent_household_owner_occupied | Percent of households that are owner-occupied | ACS5 | | -| Percent Occupied Households, Total, One or More Selected Conditions | acs5_percent_household_total_occupied_w_sel_cond | Percent of occupied households with selected conditions | ACS5 | | -| Percent Population Mobility, Moved From Within Same County in Past Year | acs5_percent_mobility_moved_in_county | Percent of people (older than 1 year) who moved in county in the past 12 months | ACS5 | | -| Year Built | char_yrblt | Year the property was constructed | Characteristic | | -| Central Air Conditioning | char_air | Indicator for central air | Characteristic | Central A/C, No Central A/C | -| Apartments | char_apts | Number of apartments for class 211 and 212 properties | Characteristic | Two, Three, Four, Five, Six, None | -| Attic Finish | char_attic_fnsh | Attic finish | Characteristic | Living Area, Partial, None | -| Attic Type | char_attic_type | Attic type | Characteristic | Full, Partial, None | -| Bedrooms | char_beds | Number of bedrooms in the building | Characteristic | | -| Building Square Feet | char_bldg_sf | Square footage of the building, as measured from the exterior | Characteristic | | -| Basement Type | char_bsmt | Basement type | Characteristic | Full, Slab, Partial, Crawl | -| Basement Finish | char_bsmt_fin | Basement finish | Characteristic | Formal Rec Room, Apartment, Unfinished | -| Property Class | char_class | Card-level property type and/or use | Characteristic | | -| Exterior Wall Material | char_ext_wall | Exterior wall construction | Characteristic | Frame, Masonry, Frame + Masonry, Stucco | -| Full Baths | char_fbath | Number of full bathrooms | Characteristic | | -| Fireplaces | char_frpl | Number of fireplaces | Characteristic | | -| Garage 1 Attached | char_gar1_att | Indicator for garage attached | Characteristic | Yes, No | -| Garage 1 Ext. Wall Material | char_gar1_cnst | Garage exterior wall construction | Characteristic | Frame, Masonry, Frame + Masonry, Stucco | -| Garage 1 Size | char_gar1_size | Garage size (number of cars) | Characteristic | 1 cars, 1.5 cars, 2 cars, 2.5 cars, 3 cars, 3.5 cars, 0 cars, 4 cars | -| Half Baths | char_hbath | Number of half baths | Characteristic | | -| Land Square Feet | char_land_sf | Square footage of the land (not just the building) of the property | Characteristic | | -| Central Heating | char_heat | Interior heating type | Characteristic | Warm Air Furnace, Hot Water Steam, Electric Heater, None | -| Number of Commercial Units | char_ncu | Number of commercial units | Characteristic | | -| Porch | char_porch | Porch type | Characteristic | None, Frame Enclosed, Masonry Enclosed | -| Roof Material | char_roof_cnst | Roof material / construction | Characteristic | Shingle + Asphalt, Tar + Gravel, Slate, Shake, Tile, Other | -| Rooms | char_rooms | Number of total rooms in the building (excluding baths) | Characteristic | | -| Cathedral Ceiling | char_tp_dsgn | Deprecated | Characteristic | Yes, No | -| Type of Residence | char_type_resd | Type of residence | Characteristic | 1 Story, 2 Story, 3 Story +, Split Level, 1.5 Story, Missing | -| Recent Renovation | char_recent_renovation | Indicates whether or not a property was renovated within the last 3 years | Characteristic | | -| Longitude | loc_longitude | X coordinate in degrees (global longitude) | Location | | -| Latitude | loc_latitude | Y coordinate in degrees (global latitude) | Location | | -| Census Tract GEOID | loc_census_tract_geoid | 11-digit ACS/Census tract GEOID | Location | | -| First Street Factor | loc_env_flood_fs_factor | First Street flood factor The flood factor is a risk score, where 10 is the highest risk and 1 is the lowest risk | Location | | -| Airport Noise DNL | loc_env_airport_noise_dnl | O’Hare and Midway noise, measured as DNL | Location | | -| School Elementary District GEOID | loc_school_elementary_district_geoid | School district (elementary) GEOID | Location | | -| School Secondary District GEOID | loc_school_secondary_district_geoid | School district (secondary) GEOID | Location | | -| CMAP Walkability Score (No Transit) | loc_access_cmap_walk_nta_score | CMAP walkability score for a given PIN, excluding transit walkability | Location | | -| CMAP Walkability Total Score | loc_access_cmap_walk_total_score | CMAP walkability score for a given PIN, including transit walkability | Location | | -| Municipality Name | loc_tax_municipality_name | Taxing district name, as seen on Cook County tax bills | Location | | -| Township Code | meta_township_code | Cook County township code | Meta | | -| Neighborhood Code | meta_nbhd_code | Assessor neighborhood code | Meta | | -| Number of sales within previous N years of sale/lien date | meta_sale_count_past_n_years | Number of sales within previous N years of sale/lien date | Meta | | -| Property Tax Bill Aggregate Rate | other_tax_bill_rate | Tax bill rate for the taxing district containing a given PIN | Other | | -| School District (Elementary) GreatSchools Rating | other_school_district_elementary_avg_rating | Average GreatSchools rating of elementary schools within the district of a given PIN | Other | | -| School District (Secondary) GreatSchools Rating | other_school_district_secondary_avg_rating | Average GreatSchools rating of secondary schools within the district of a given PIN | Other | | -| Active Homeowner Exemption | ccao_is_active_exe_homeowner | Parcel has an active homeowner exemption | Other | | -| Corner Lot | ccao_is_corner_lot | Corner lot indicator | Other | | -| Number of Years Active Homeowner Exemption | ccao_n_years_exe_homeowner | Number of years parcel has had an active homeowner exemption | Other | | -| Number of PINs in Half Mile | prox_num_pin_in_half_mile | Number of PINs within half mile | Proximity | | -| Number of Bus Stops in Half Mile | prox_num_bus_stop_in_half_mile | Number of bus stops within half mile | Proximity | | -| Number of Foreclosures Per 1000 PINs (Past 5 Years) | prox_num_foreclosure_per_1000_pin_past_5_years | Number of foreclosures per 1000 PINs, within half mile (past 5 years) | Proximity | | -| Number of Schools in Half Mile | prox_num_school_in_half_mile | Number of schools (any kind) within half mile | Proximity | | -| Number of Schools with Rating in Half Mile | prox_num_school_with_rating_in_half_mile | Number of schools (any kind) within half mile | Proximity | | -| Average School Rating in Half Mile | prox_avg_school_rating_in_half_mile | Average school rating of schools within half mile | Proximity | | -| Total Airport Noise DNL | prox_airport_dnl_total | Estimated DNL for a PIN, assuming a baseline DNL of 50 (“quiet suburban”) and adding predicted noise from O’Hare and Midway airports to that baseline | Proximity | | -| Nearest Bike Trail Distance (Feet) | prox_nearest_bike_trail_dist_ft | Nearest bike trail distance (feet) | Proximity | | -| Nearest Cemetery Distance (Feet) | prox_nearest_cemetery_dist_ft | Nearest cemetery distance (feet) | Proximity | | -| Nearest CTA Route Distance (Feet) | prox_nearest_cta_route_dist_ft | Nearest CTA route distance (feet) | Proximity | | -| Nearest CTA Stop Distance (Feet) | prox_nearest_cta_stop_dist_ft | Nearest CTA stop distance (feet) | Proximity | | -| Nearest Hospital Distance (Feet) | prox_nearest_hospital_dist_ft | Nearest hospital distance (feet) | Proximity | | -| Lake Michigan Distance (Feet) | prox_lake_michigan_dist_ft | Distance to Lake Michigan shoreline (feet) | Proximity | | -| Nearest Major Road Distance (Feet) | prox_nearest_major_road_dist_ft | Nearest major road distance (feet) | Proximity | | -| Nearest Metra Route Distance (Feet) | prox_nearest_metra_route_dist_ft | Nearest Metra route distance (feet) | Proximity | | -| Nearest Metra Stop Distance (Feet) | prox_nearest_metra_stop_dist_ft | Nearest Metra stop distance (feet) | Proximity | | -| Nearest Park Distance (Feet) | prox_nearest_park_dist_ft | Nearest park distance (feet) | Proximity | | -| Nearest Railroad Distance (Feet) | prox_nearest_railroad_dist_ft | Nearest railroad distance (feet) | Proximity | | -| Nearest Secondary Road Distance (Feet) | prox_nearest_secondary_road_dist_ft | Nearest secondary road distance (feet) | Proximity | | -| Nearest University Distance (Feet) | prox_nearest_university_dist_ft | Nearest university distance (feet) | Proximity | | -| Nearest Vacant Land Parcel Distance (Feet) | prox_nearest_vacant_land_dist_ft | Nearest vacant land (class 100) parcel distance (feet) | Proximity | | -| Nearest Water Distance (Feet) | prox_nearest_water_dist_ft | Nearest water distance (feet) | Proximity | | -| Nearest Golf Course Distance (Feet) | prox_nearest_golf_course_dist_ft | Nearest golf course distance (feet) | Proximity | | -| Sale Year | time_sale_year | Sale year calculated as the number of years since 0 B.C.E | Time | | -| Sale Day | time_sale_day | Sale day calculated as the number of days since January 1st, 1997 | Time | | -| Sale Quarter of Year | time_sale_quarter_of_year | Character encoding of quarter of year (Q1 - Q4) | Time | | -| Sale Month of Year | time_sale_month_of_year | Character encoding of month of year (Jan - Dec) | Time | | -| Sale Day of Year | time_sale_day_of_year | Numeric encoding of day of year (1 - 365) | Time | | -| Sale Day of Month | time_sale_day_of_month | Numeric encoding of day of month (1 - 31) | Time | | -| Sale Day of Week | time_sale_day_of_week | Numeric encoding of day of week (1 - 7) | Time | | -| Sale After COVID-19 | time_sale_post_covid | Indicator for whether sale occurred after COVID-19 was widely publicized (around March 15, 2020) | Time | | +| Feature Name | Variable Name | Description | Category | Possible Values | +|:----------------------------------------------------------------------------|:------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:---------------------------------------------------------------------| +| Percent Population Age, Under 19 Years Old | acs5_percent_age_children | Percent of the people 17 years or younger | ACS5 | | +| Percent Population Age, Over 65 Years Old | acs5_percent_age_senior | Percent of the people 65 years or older | ACS5 | | +| Median Population Age | acs5_median_age_total | Median age for whole population | ACS5 | | +| Percent Households Family, Married | acs5_percent_household_family_married | Percent of households that are family, married | ACS5 | | +| Percent Households Nonfamily, Living Alone | acs5_percent_household_nonfamily_alone | Percent of households that are non-family, alone (single) | ACS5 | | +| Percent Population Education, High School Degree | acs5_percent_education_high_school | Percent of people older than 25 who attained a high school degree | ACS5 | | +| Percent Population Education, Bachelor Degree | acs5_percent_education_bachelor | Percent of people older than 25 who attained a bachelor’s degree | ACS5 | | +| Percent Population Education, Graduate Degree | acs5_percent_education_graduate | Percent of people older than 25 who attained a graduate degree | ACS5 | | +| Percent Population Income, Below Poverty Level | acs5_percent_income_below_poverty_level | Percent of people above the poverty level in the last 12 months | ACS5 | | +| Median Income, Household in Past Year | acs5_median_income_household_past_year | Median income per household in the past 12 months | ACS5 | | +| Median Income, Per Capita in Past Year | acs5_median_income_per_capita_past_year | Median income per capita in the past 12 months | ACS5 | | +| Percent Population Income, Received SNAP in Past Year | acs5_percent_income_household_received_snap_past_year | Percent of households that received SNAP in the past 12 months | ACS5 | | +| Percent Population Employment, Unemployed | acs5_percent_employment_unemployed | Percent of people 16 years and older unemployed | ACS5 | | +| Median Occupied Household, Total, Year Built | acs5_median_household_total_occupied_year_built | Median year built for all occupied households | ACS5 | | +| Median Occupied Household, Renter, Gross Rent | acs5_median_household_renter_occupied_gross_rent | Median gross rent for only renter-occupied units | ACS5 | | +| Percent Occupied Households, Owner | acs5_percent_household_owner_occupied | Percent of households that are owner-occupied | ACS5 | | +| Year Built | char_yrblt | Year the property was constructed | Characteristic | | +| Central Air Conditioning | char_air | Indicator for central air | Characteristic | Central A/C, No Central A/C | +| Apartments | char_apts | Number of apartments for class 211 and 212 properties | Characteristic | Two, Three, Four, Five, Six, None | +| Attic Finish | char_attic_fnsh | Attic finish | Characteristic | Living Area, Partial, None | +| Attic Type | char_attic_type | Attic type | Characteristic | Full, Partial, None | +| Bedrooms | char_beds | Number of bedrooms in the building | Characteristic | | +| Building Square Feet | char_bldg_sf | Square footage of the building, as measured from the exterior | Characteristic | | +| Basement Type | char_bsmt | Basement type | Characteristic | Full, Slab, Partial, Crawl | +| Basement Finish | char_bsmt_fin | Basement finish | Characteristic | Formal Rec Room, Apartment, Unfinished | +| Property Class | char_class | Card-level property type and/or use | Characteristic | | +| Exterior Wall Material | char_ext_wall | Exterior wall construction | Characteristic | Frame, Masonry, Frame + Masonry, Stucco | +| Full Baths | char_fbath | Number of full bathrooms | Characteristic | | +| Fireplaces | char_frpl | Number of fireplaces | Characteristic | | +| Garage 1 Attached | char_gar1_att | Indicator for garage attached | Characteristic | Yes, No | +| Garage 1 Ext. Wall Material | char_gar1_cnst | Garage exterior wall construction | Characteristic | Frame, Masonry, Frame + Masonry, Stucco | +| Garage 1 Size | char_gar1_size | Garage size (number of cars) | Characteristic | 1 cars, 1.5 cars, 2 cars, 2.5 cars, 3 cars, 3.5 cars, 0 cars, 4 cars | +| Half Baths | char_hbath | Number of half baths | Characteristic | | +| Land Square Feet | char_land_sf | Square footage of the land (not just the building) of the property | Characteristic | | +| Central Heating | char_heat | Interior heating type | Characteristic | Warm Air Furnace, Hot Water Steam, Electric Heater, None | +| Number of Commercial Units | char_ncu | Number of commercial units | Characteristic | | +| Porch | char_porch | Porch type | Characteristic | None, Frame Enclosed, Masonry Enclosed | +| Roof Material | char_roof_cnst | Roof material / construction | Characteristic | Shingle + Asphalt, Tar + Gravel, Slate, Shake, Tile, Other | +| Rooms | char_rooms | Number of total rooms in the building (excluding baths) | Characteristic | | +| Cathedral Ceiling | char_tp_dsgn | Deprecated | Characteristic | Yes, No | +| Type of Residence | char_type_resd | Type of residence | Characteristic | 1 Story, 2 Story, 3 Story +, Split Level, 1.5 Story, Missing | +| Recent Renovation | char_recent_renovation | Indicates whether or not a property was renovated within the last 3 years | Characteristic | | +| Longitude | loc_longitude | X coordinate in degrees (global longitude) | Location | | +| Latitude | loc_latitude | Y coordinate in degrees (global latitude) | Location | | +| Census Tract GEOID | loc_census_tract_geoid | 11-digit ACS/Census tract GEOID | Location | | +| First Street Factor | loc_env_flood_fs_factor | First Street flood factor The flood factor is a risk score, where 10 is the highest risk and 1 is the lowest risk | Location | | +| School Elementary District GEOID | loc_school_elementary_district_geoid | School district (elementary) GEOID | Location | | +| School Secondary District GEOID | loc_school_secondary_district_geoid | School district (secondary) GEOID | Location | | +| CMAP Walkability Score (No Transit) | loc_access_cmap_walk_nta_score | CMAP walkability score for a given PIN, excluding transit walkability | Location | | +| CMAP Walkability Total Score | loc_access_cmap_walk_total_score | CMAP walkability score for a given PIN, including transit walkability | Location | | +| Municipality Name | loc_tax_municipality_name | Taxing district name, as seen on Cook County tax bills | Location | | +| Township Code | meta_township_code | Cook County township code | Meta | | +| Neighborhood Code | meta_nbhd_code | Assessor neighborhood code | Meta | | +| Number of sales within previous N years of sale/lien date | meta_sale_count_past_n_years | Number of sales within previous N years of sale/lien date | Meta | | +| Property Tax Bill Aggregate Rate | other_tax_bill_rate | Tax bill rate for the taxing district containing a given PIN | Other | | +| School District (Elementary) GreatSchools Rating | other_school_district_elementary_avg_rating | Average GreatSchools rating of elementary schools within the district of a given PIN | Other | | +| School District (Secondary) GreatSchools Rating | other_school_district_secondary_avg_rating | Average GreatSchools rating of secondary schools within the district of a given PIN | Other | | +| Active Homeowner Exemption | ccao_is_active_exe_homeowner | Parcel has an active homeowner exemption | Other | | +| Corner Lot | ccao_is_corner_lot | Corner lot indicator | Other | | +| Number of Years Active Homeowner Exemption | ccao_n_years_exe_homeowner | Number of years parcel has had an active homeowner exemption | Other | | +| Standard Deviation Distance From Parcel Centroid to Vertices (Feet) | shp_parcel_centroid_dist_ft_sd | Standard deviation of the distance from each major parcel vertex to the parcel centroid | Parcel Shape | | +| Standard Deviation Parcel Edge Length (Feet) | shp_parcel_edge_len_ft_sd | Standard deviation of the edge length between parcel vertices | Parcel Shape | | +| Standard Deviation Parcel Interior Angle (Degrees) | shp_parcel_interior_angle_sd | Standard deviation of the interior angles of the parcel polygon | Parcel Shape | | +| Ratio of Parcel Area to Minimum Rotated Bounding Rectangle | shp_parcel_mrr_area_ratio | Ratio of the parcel’s area to the area of its \[minimum rotated bounding rectangle\]( | Parcel Shape | | +| Ratio of Parcel Minimum Rotated Bounding Rectangle Longest to Shortest Side | shp_parcel_mrr_side_ratio | Ratio of the longest to the shortest side of the parcel’s \[minimum rotated bounding rectangle\]( | Parcel Shape | | +| Number of Parcel Vertices | shp_parcel_num_vertices | The number of vertices of the parcel | Parcel Shape | | +| Number of PINs in Half Mile | prox_num_pin_in_half_mile | Number of PINs within half mile | Proximity | | +| Number of Bus Stops in Half Mile | prox_num_bus_stop_in_half_mile | Number of bus stops within half mile | Proximity | | +| Number of Foreclosures Per 1000 PINs (Past 5 Years) | prox_num_foreclosure_per_1000_pin_past_5_years | Number of foreclosures per 1000 PINs, within half mile (past 5 years) | Proximity | | +| Average School Rating in Half Mile | prox_avg_school_rating_in_half_mile | Average school rating of schools within half mile | Proximity | | +| Total Airport Noise DNL | prox_airport_dnl_total | Estimated DNL for a PIN, assuming a baseline DNL of 50 (“quiet suburban”) and adding predicted noise from O’Hare and Midway airports to that baseline | Proximity | | +| Nearest Bike Trail Distance (Feet) | prox_nearest_bike_trail_dist_ft | Nearest bike trail distance (feet) | Proximity | | +| Nearest Cemetery Distance (Feet) | prox_nearest_cemetery_dist_ft | Nearest cemetery distance (feet) | Proximity | | +| Nearest CTA Route Distance (Feet) | prox_nearest_cta_route_dist_ft | Nearest CTA route distance (feet) | Proximity | | +| Nearest CTA Stop Distance (Feet) | prox_nearest_cta_stop_dist_ft | Nearest CTA stop distance (feet) | Proximity | | +| Nearest Hospital Distance (Feet) | prox_nearest_hospital_dist_ft | Nearest hospital distance (feet) | Proximity | | +| Lake Michigan Distance (Feet) | prox_lake_michigan_dist_ft | Distance to Lake Michigan shoreline (feet) | Proximity | | +| Nearest Metra Route Distance (Feet) | prox_nearest_metra_route_dist_ft | Nearest Metra route distance (feet) | Proximity | | +| Nearest Metra Stop Distance (Feet) | prox_nearest_metra_stop_dist_ft | Nearest Metra stop distance (feet) | Proximity | | +| Nearest Park Distance (Feet) | prox_nearest_park_dist_ft | Nearest park distance (feet) | Proximity | | +| Nearest Railroad Distance (Feet) | prox_nearest_railroad_dist_ft | Nearest railroad distance (feet) | Proximity | | +| Nearest University Distance (Feet) | prox_nearest_university_dist_ft | Nearest university distance (feet) | Proximity | | +| Nearest Vacant Land Parcel Distance (Feet) | prox_nearest_vacant_land_dist_ft | Nearest vacant land (class 100) parcel distance (feet) | Proximity | | +| Nearest Water Distance (Feet) | prox_nearest_water_dist_ft | Nearest water distance (feet) | Proximity | | +| Nearest Golf Course Distance (Feet) | prox_nearest_golf_course_dist_ft | Nearest golf course distance (feet) | Proximity | | +| Nearest Highway Distance (Feet) | prox_nearest_road_highway_dist_ft | Distance to nearest highway road | Proximity | | +| Nearest Arterial Road Distance (Feet) | prox_nearest_road_arterial_dist_ft | Distance to nearest arterial road | Proximity | | +| Nearest Collector Road Distance (Feet) | prox_nearest_road_collector_dist_ft | Distance to nearest collector road | Proximity | | +| Average Daily Traffic Count on Nearest Highway | prox_nearest_road_highway_daily_traffic | Daily traffic of nearest highway road | Proximity | | +| Average Daily Traffic Count on Nearest Arterial Road | prox_nearest_road_arterial_daily_traffic | Daily traffic of nearest arterial road | Proximity | | +| Average Daily Traffic Count on Nearest Collector Road | prox_nearest_road_collector_daily_traffic | Daily traffic of nearest collector road | Proximity | | +| Nearest New Construction (Feet) | prox_nearest_new_construction_dist_ft | Nearest new construction distance (feet) | Proximity | | +| Nearest Major Stadium (Feet) | prox_nearest_stadium_dist_ft | Nearest stadium distance (feet) | Proximity | | +| Sale Year | time_sale_year | Sale year calculated as the number of years since 0 B.C.E | Time | | +| Sale Day | time_sale_day | Sale day calculated as the number of days since January 1st, 1997 | Time | | +| Sale Quarter of Year | time_sale_quarter_of_year | Character encoding of quarter of year (Q1 - Q4) | Time | | +| Sale Month of Year | time_sale_month_of_year | Character encoding of month of year (Jan - Dec) | Time | | +| Sale Day of Year | time_sale_day_of_year | Numeric encoding of day of year (1 - 365) | Time | | +| Sale Day of Month | time_sale_day_of_month | Numeric encoding of day of month (1 - 31) | Time | | +| Sale Day of Week | time_sale_day_of_week | Numeric encoding of day of week (1 - 7) | Time | | +| Sale After COVID-19 | time_sale_post_covid | Indicator for whether sale occurred after COVID-19 was widely publicized (around March 15, 2020) | Time | | #### Data Sources diff --git a/docs/data-dict.csv b/docs/data-dict.csv index e602aeb6..3b1be660 100644 --- a/docs/data-dict.csv +++ b/docs/data-dict.csv @@ -2,8 +2,6 @@ feature_name,variable_name,description,category,possible_values "Percent Population Age, Under 19 Years Old",acs5_percent_age_children,Percent of the people 17 years or younger,ACS5, "Percent Population Age, Over 65 Years Old",acs5_percent_age_senior,Percent of the people 65 years or older,ACS5, Median Population Age,acs5_median_age_total,Median age for whole population,ACS5, -"Percent Population Mobility, In Same House 1 Year Ago",acs5_percent_mobility_no_move,Percent of people (older than 1 year) who have not moved in the past 12 months,ACS5, -"Percent Population Mobility, Moved From Other State in Past Year",acs5_percent_mobility_moved_from_other_state,Percent of people (older than 1 year) who moved from another state in the past 12 months,ACS5, "Percent Households Family, Married",acs5_percent_household_family_married,"Percent of households that are family, married",ACS5, "Percent Households Nonfamily, Living Alone",acs5_percent_household_nonfamily_alone,"Percent of households that are non-family, alone (single)",ACS5, "Percent Population Education, High School Degree",acs5_percent_education_high_school,Percent of people older than 25 who attained a high school degree,ACS5, @@ -17,8 +15,6 @@ Median Population Age,acs5_median_age_total,Median age for whole population,ACS5 "Median Occupied Household, Total, Year Built",acs5_median_household_total_occupied_year_built,Median year built for all occupied households,ACS5, "Median Occupied Household, Renter, Gross Rent",acs5_median_household_renter_occupied_gross_rent,Median gross rent for only renter-occupied units,ACS5, "Percent Occupied Households, Owner",acs5_percent_household_owner_occupied,Percent of households that are owner-occupied,ACS5, -"Percent Occupied Households, Total, One or More Selected Conditions",acs5_percent_household_total_occupied_w_sel_cond,Percent of occupied households with selected conditions,ACS5, -"Percent Population Mobility, Moved From Within Same County in Past Year",acs5_percent_mobility_moved_in_county,Percent of people (older than 1 year) who moved in county in the past 12 months,ACS5, Year Built,char_yrblt,Year the property was constructed,Characteristic, Central Air Conditioning,char_air,Indicator for central air,Characteristic,"Central A/C, No Central A/C" Apartments,char_apts,Number of apartments for class 211 and 212 properties,Characteristic,"Two, Three, Four, Five, Six, None" @@ -49,7 +45,6 @@ Longitude,loc_longitude,X coordinate in degrees (global longitude),Location, Latitude,loc_latitude,Y coordinate in degrees (global latitude),Location, Census Tract GEOID,loc_census_tract_geoid,11-digit ACS/Census tract GEOID,Location, First Street Factor,loc_env_flood_fs_factor,"First Street flood factor The flood factor is a risk score, where 10 is the highest risk and 1 is the lowest risk",Location, -Airport Noise DNL,loc_env_airport_noise_dnl,"O'Hare and Midway noise, measured as DNL",Location, School Elementary District GEOID,loc_school_elementary_district_geoid,School district (elementary) GEOID,Location, School Secondary District GEOID,loc_school_secondary_district_geoid,School district (secondary) GEOID,Location, CMAP Walkability Score (No Transit),loc_access_cmap_walk_nta_score,"CMAP walkability score for a given PIN, excluding transit walkability",Location, @@ -64,11 +59,15 @@ School District (Secondary) GreatSchools Rating,other_school_district_secondary_ Active Homeowner Exemption,ccao_is_active_exe_homeowner,Parcel has an active homeowner exemption,Other, Corner Lot,ccao_is_corner_lot,Corner lot indicator,Other, Number of Years Active Homeowner Exemption,ccao_n_years_exe_homeowner,Number of years parcel has had an active homeowner exemption,Other, +Standard Deviation Distance From Parcel Centroid to Vertices (Feet),shp_parcel_centroid_dist_ft_sd,Standard deviation of the distance from each major parcel vertex to the parcel centroid,Parcel Shape, +Standard Deviation Parcel Edge Length (Feet),shp_parcel_edge_len_ft_sd,Standard deviation of the edge length between parcel vertices,Parcel Shape, +Standard Deviation Parcel Interior Angle (Degrees),shp_parcel_interior_angle_sd,Standard deviation of the interior angles of the parcel polygon,Parcel Shape, +Ratio of Parcel Area to Minimum Rotated Bounding Rectangle,shp_parcel_mrr_area_ratio,Ratio of the parcel's area to the area of its [minimum rotated bounding rectangle](https://en,Parcel Shape, +Ratio of Parcel Minimum Rotated Bounding Rectangle Longest to Shortest Side,shp_parcel_mrr_side_ratio,Ratio of the longest to the shortest side of the parcel's [minimum rotated bounding rectangle](https://en,Parcel Shape, +Number of Parcel Vertices,shp_parcel_num_vertices,The number of vertices of the parcel,Parcel Shape, Number of PINs in Half Mile,prox_num_pin_in_half_mile,Number of PINs within half mile,Proximity, Number of Bus Stops in Half Mile,prox_num_bus_stop_in_half_mile,Number of bus stops within half mile,Proximity, Number of Foreclosures Per 1000 PINs (Past 5 Years),prox_num_foreclosure_per_1000_pin_past_5_years,"Number of foreclosures per 1000 PINs, within half mile (past 5 years)",Proximity, -Number of Schools in Half Mile,prox_num_school_in_half_mile,Number of schools (any kind) within half mile,Proximity, -Number of Schools with Rating in Half Mile,prox_num_school_with_rating_in_half_mile,Number of schools (any kind) within half mile,Proximity, Average School Rating in Half Mile,prox_avg_school_rating_in_half_mile,Average school rating of schools within half mile,Proximity, Total Airport Noise DNL,prox_airport_dnl_total,"Estimated DNL for a PIN, assuming a baseline DNL of 50 (""quiet suburban"") and adding predicted noise from O'Hare and Midway airports to that baseline",Proximity, Nearest Bike Trail Distance (Feet),prox_nearest_bike_trail_dist_ft,Nearest bike trail distance (feet),Proximity, @@ -77,16 +76,22 @@ Nearest CTA Route Distance (Feet),prox_nearest_cta_route_dist_ft,Nearest CTA rou Nearest CTA Stop Distance (Feet),prox_nearest_cta_stop_dist_ft,Nearest CTA stop distance (feet),Proximity, Nearest Hospital Distance (Feet),prox_nearest_hospital_dist_ft,Nearest hospital distance (feet),Proximity, Lake Michigan Distance (Feet),prox_lake_michigan_dist_ft,Distance to Lake Michigan shoreline (feet),Proximity, -Nearest Major Road Distance (Feet),prox_nearest_major_road_dist_ft,Nearest major road distance (feet),Proximity, Nearest Metra Route Distance (Feet),prox_nearest_metra_route_dist_ft,Nearest Metra route distance (feet),Proximity, Nearest Metra Stop Distance (Feet),prox_nearest_metra_stop_dist_ft,Nearest Metra stop distance (feet),Proximity, Nearest Park Distance (Feet),prox_nearest_park_dist_ft,Nearest park distance (feet),Proximity, Nearest Railroad Distance (Feet),prox_nearest_railroad_dist_ft,Nearest railroad distance (feet),Proximity, -Nearest Secondary Road Distance (Feet),prox_nearest_secondary_road_dist_ft,Nearest secondary road distance (feet),Proximity, Nearest University Distance (Feet),prox_nearest_university_dist_ft,Nearest university distance (feet),Proximity, Nearest Vacant Land Parcel Distance (Feet),prox_nearest_vacant_land_dist_ft,Nearest vacant land (class 100) parcel distance (feet),Proximity, Nearest Water Distance (Feet),prox_nearest_water_dist_ft,Nearest water distance (feet),Proximity, Nearest Golf Course Distance (Feet),prox_nearest_golf_course_dist_ft,Nearest golf course distance (feet),Proximity, +Nearest Highway Distance (Feet),prox_nearest_road_highway_dist_ft,Distance to nearest highway road,Proximity, +Nearest Arterial Road Distance (Feet),prox_nearest_road_arterial_dist_ft,Distance to nearest arterial road,Proximity, +Nearest Collector Road Distance (Feet),prox_nearest_road_collector_dist_ft,Distance to nearest collector road,Proximity, +Average Daily Traffic Count on Nearest Highway,prox_nearest_road_highway_daily_traffic,Daily traffic of nearest highway road,Proximity, +Average Daily Traffic Count on Nearest Arterial Road,prox_nearest_road_arterial_daily_traffic,Daily traffic of nearest arterial road,Proximity, +Average Daily Traffic Count on Nearest Collector Road,prox_nearest_road_collector_daily_traffic,Daily traffic of nearest collector road,Proximity, +Nearest New Construction (Feet),prox_nearest_new_construction_dist_ft,Nearest new construction distance (feet),Proximity, +Nearest Major Stadium (Feet),prox_nearest_stadium_dist_ft,Nearest stadium distance (feet),Proximity, Sale Year,time_sale_year,Sale year calculated as the number of years since 0 B.C.E,Time, Sale Day,time_sale_day,"Sale day calculated as the number of days since January 1st, 1997",Time, Sale Quarter of Year,time_sale_quarter_of_year,Character encoding of quarter of year (Q1 - Q4),Time, diff --git a/renv.lock b/renv.lock index 6d914d51..28f04401 100644 --- a/renv.lock +++ b/renv.lock @@ -295,11 +295,11 @@ "Version": "1.3.0", "Source": "GitHub", "RemoteType": "github", + "RemoteHost": "api.github.com", "RemoteUsername": "ccao-data", "RemoteRepo": "ccao", "RemoteRef": "master", - "RemoteSha": "6445f79e6b4207a174c22d7a139511cf8e2516b6", - "RemoteHost": "api.github.com", + "RemoteSha": "8b6f53e14c1732fcec5f6982fbc4bfb32f45f194", "Requirements": [ "R", "assessr", @@ -308,7 +308,7 @@ "rlang", "tidyr" ], - "Hash": "d452fba08dff15c8379f18aa03af084e" + "Hash": "1663306aa228ded9892f07d65ec20db3" }, "class": { "Package": "class", From ffbc73d16eda35ed2abfaa3df04ccdb8732475d3 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 10 Jan 2025 17:26:33 +0000 Subject: [PATCH 06/11] Remove corner lot indicator from README and data dict --- README.md | 1 - docs/data-dict.csv | 1 - 2 files changed, 2 deletions(-) diff --git a/README.md b/README.md index b70c9c80..9d637ffd 100644 --- a/README.md +++ b/README.md @@ -432,7 +432,6 @@ For a machine-readable version of this data dictionary, see | School District (Elementary) GreatSchools Rating | other_school_district_elementary_avg_rating | Average GreatSchools rating of elementary schools within the district of a given PIN | Other | | | School District (Secondary) GreatSchools Rating | other_school_district_secondary_avg_rating | Average GreatSchools rating of secondary schools within the district of a given PIN | Other | | | Active Homeowner Exemption | ccao_is_active_exe_homeowner | Parcel has an active homeowner exemption | Other | | -| Corner Lot | ccao_is_corner_lot | Corner lot indicator | Other | | | Number of Years Active Homeowner Exemption | ccao_n_years_exe_homeowner | Number of years parcel has had an active homeowner exemption | Other | | | Standard Deviation Distance From Parcel Centroid to Vertices (Feet) | shp_parcel_centroid_dist_ft_sd | Standard deviation of the distance from each major parcel vertex to the parcel centroid | Parcel Shape | | | Standard Deviation Parcel Edge Length (Feet) | shp_parcel_edge_len_ft_sd | Standard deviation of the edge length between parcel vertices | Parcel Shape | | diff --git a/docs/data-dict.csv b/docs/data-dict.csv index 3b1be660..f0b1d6be 100644 --- a/docs/data-dict.csv +++ b/docs/data-dict.csv @@ -57,7 +57,6 @@ Property Tax Bill Aggregate Rate,other_tax_bill_rate,Tax bill rate for the taxin School District (Elementary) GreatSchools Rating,other_school_district_elementary_avg_rating,Average GreatSchools rating of elementary schools within the district of a given PIN,Other, School District (Secondary) GreatSchools Rating,other_school_district_secondary_avg_rating,Average GreatSchools rating of secondary schools within the district of a given PIN,Other, Active Homeowner Exemption,ccao_is_active_exe_homeowner,Parcel has an active homeowner exemption,Other, -Corner Lot,ccao_is_corner_lot,Corner lot indicator,Other, Number of Years Active Homeowner Exemption,ccao_n_years_exe_homeowner,Number of years parcel has had an active homeowner exemption,Other, Standard Deviation Distance From Parcel Centroid to Vertices (Feet),shp_parcel_centroid_dist_ft_sd,Standard deviation of the distance from each major parcel vertex to the parcel centroid,Parcel Shape, Standard Deviation Parcel Edge Length (Feet),shp_parcel_edge_len_ft_sd,Standard deviation of the edge length between parcel vertices,Parcel Shape, From 2e300ec7e127a58b690706a07b04364199f6c670 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 10 Jan 2025 20:22:03 +0000 Subject: [PATCH 07/11] Fix descriptions for parcel shape characteristics in docs and data dict --- README.md | 4 ++-- docs/data-dict.csv | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9d637ffd..162ddddf 100644 --- a/README.md +++ b/README.md @@ -436,8 +436,8 @@ For a machine-readable version of this data dictionary, see | Standard Deviation Distance From Parcel Centroid to Vertices (Feet) | shp_parcel_centroid_dist_ft_sd | Standard deviation of the distance from each major parcel vertex to the parcel centroid | Parcel Shape | | | Standard Deviation Parcel Edge Length (Feet) | shp_parcel_edge_len_ft_sd | Standard deviation of the edge length between parcel vertices | Parcel Shape | | | Standard Deviation Parcel Interior Angle (Degrees) | shp_parcel_interior_angle_sd | Standard deviation of the interior angles of the parcel polygon | Parcel Shape | | -| Ratio of Parcel Area to Minimum Rotated Bounding Rectangle | shp_parcel_mrr_area_ratio | Ratio of the parcel’s area to the area of its \[minimum rotated bounding rectangle\]( | Parcel Shape | | -| Ratio of Parcel Minimum Rotated Bounding Rectangle Longest to Shortest Side | shp_parcel_mrr_side_ratio | Ratio of the longest to the shortest side of the parcel’s \[minimum rotated bounding rectangle\]( | Parcel Shape | | +| Ratio of Parcel Area to Minimum Rotated Bounding Rectangle | shp_parcel_mrr_area_ratio | Ratio of the parcel’s area to the area of its minimum rotated bounding rectangle | Parcel Shape | | +| Ratio of Parcel Minimum Rotated Bounding Rectangle Longest to Shortest Side | shp_parcel_mrr_side_ratio | Ratio of the longest to the shortest side of the parcel’s minimum rotated bounding rectangle | Parcel Shape | | | Number of Parcel Vertices | shp_parcel_num_vertices | The number of vertices of the parcel | Parcel Shape | | | Number of PINs in Half Mile | prox_num_pin_in_half_mile | Number of PINs within half mile | Proximity | | | Number of Bus Stops in Half Mile | prox_num_bus_stop_in_half_mile | Number of bus stops within half mile | Proximity | | diff --git a/docs/data-dict.csv b/docs/data-dict.csv index f0b1d6be..cbd3b869 100644 --- a/docs/data-dict.csv +++ b/docs/data-dict.csv @@ -61,8 +61,8 @@ Number of Years Active Homeowner Exemption,ccao_n_years_exe_homeowner,Number of Standard Deviation Distance From Parcel Centroid to Vertices (Feet),shp_parcel_centroid_dist_ft_sd,Standard deviation of the distance from each major parcel vertex to the parcel centroid,Parcel Shape, Standard Deviation Parcel Edge Length (Feet),shp_parcel_edge_len_ft_sd,Standard deviation of the edge length between parcel vertices,Parcel Shape, Standard Deviation Parcel Interior Angle (Degrees),shp_parcel_interior_angle_sd,Standard deviation of the interior angles of the parcel polygon,Parcel Shape, -Ratio of Parcel Area to Minimum Rotated Bounding Rectangle,shp_parcel_mrr_area_ratio,Ratio of the parcel's area to the area of its [minimum rotated bounding rectangle](https://en,Parcel Shape, -Ratio of Parcel Minimum Rotated Bounding Rectangle Longest to Shortest Side,shp_parcel_mrr_side_ratio,Ratio of the longest to the shortest side of the parcel's [minimum rotated bounding rectangle](https://en,Parcel Shape, +Ratio of Parcel Area to Minimum Rotated Bounding Rectangle,shp_parcel_mrr_area_ratio,Ratio of the parcel's area to the area of its minimum rotated bounding rectangle,Parcel Shape, +Ratio of Parcel Minimum Rotated Bounding Rectangle Longest to Shortest Side,shp_parcel_mrr_side_ratio,Ratio of the longest to the shortest side of the parcel's minimum rotated bounding rectangle,Parcel Shape, Number of Parcel Vertices,shp_parcel_num_vertices,The number of vertices of the parcel,Parcel Shape, Number of PINs in Half Mile,prox_num_pin_in_half_mile,Number of PINs within half mile,Proximity, Number of Bus Stops in Half Mile,prox_num_bus_stop_in_half_mile,Number of bus stops within half mile,Proximity, From 95db04c1906298505c0b8256f7cdd8eaa9e68874 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 10 Jan 2025 22:03:10 +0000 Subject: [PATCH 08/11] Set up tmate session in pre-commit workflow to debug cache problems --- .github/workflows/pre-commit.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 53059fd8..224b499e 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -14,3 +14,7 @@ jobs: - name: Run pre-commit checks uses: ccao-data/actions/pre-commit@main + + - name: Setup tmate session + if: always() + uses: mxschmitt/action-tmate@v3 From ad157a1d46dc4da069de5a9b2825f5bfed61e6f3 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 10 Jan 2025 22:22:40 +0000 Subject: [PATCH 09/11] Revert "Set up tmate session in pre-commit workflow to debug cache problems" This reverts commit 95db04c1906298505c0b8256f7cdd8eaa9e68874. --- .github/workflows/pre-commit.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 224b499e..53059fd8 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -14,7 +14,3 @@ jobs: - name: Run pre-commit checks uses: ccao-data/actions/pre-commit@main - - - name: Setup tmate session - if: always() - uses: mxschmitt/action-tmate@v3 From 87ba713d8736ad3f9e8a7f4e313eecd8ff320be8 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 10 Jan 2025 22:23:06 +0000 Subject: [PATCH 10/11] Pin to branch of pre-commit action to test improved R caching --- .github/workflows/pre-commit.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 53059fd8..f729c09e 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -13,4 +13,4 @@ jobs: uses: actions/checkout@v4 - name: Run pre-commit checks - uses: ccao-data/actions/pre-commit@main + uses: ccao-data/actions/pre-commit@jeancochrane/cache-additional-renv-dependencies-in-pre-commit From 7a885041b4c14d2572f03ea20000e64bd27356df Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 10 Jan 2025 22:37:18 +0000 Subject: [PATCH 11/11] Revert "Pin to branch of pre-commit action to test improved R caching" This reverts commit 87ba713d8736ad3f9e8a7f4e313eecd8ff320be8. --- .github/workflows/pre-commit.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index f729c09e..53059fd8 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -13,4 +13,4 @@ jobs: uses: actions/checkout@v4 - name: Run pre-commit checks - uses: ccao-data/actions/pre-commit@jeancochrane/cache-additional-renv-dependencies-in-pre-commit + uses: ccao-data/actions/pre-commit@main