kuriwaki · kuriwaki · Oct 26, 2024 · Oct 8, 2024 · Oct 26, 2024 · Oct 26, 2024
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [main, master]
+    branches: [main]
   pull_request:
-    branches: [main, master]
+    branches: [main, dev]
   release:
     types: [published]
   workflow_dispatch:

diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,5 @@ docs/
 
 
 vignettes/synth_cache
+
+KPEcd2008.csv
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -26,7 +26,6 @@ Imports:
     magrittr,
     fs,
     rlang,
-    Formula,
     dplyr (>= 1.1.0),
     tidyr,
     haven (>= 2.1.0),

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,8 @@
 # cccesMRPprep 0.1.13
 
 * Add 2022, 2024, 2006, and 2012 daily kos cd data
+* Fix minor error where pct values in cd_info from 2006-2012, 2022-2024 were not using two-party voteshare
+* cd_info now distinguishes between total votes including third party (`presvotes_total`) and D+R votes (`presvotes_DR`)
 
 # cccesMRPprep 0.1.12
 

diff --git a/R/cces_std-for-acs.R b/R/cces_std-for-acs.R
@@ -146,12 +146,12 @@ ccc_std_demographics <- function(tbl,
   # hispanic conversion
   if (wh_as_hisp && ("hispanic" %in% colnames(tbl_mod))) {
     tbl_mod <- tbl_mod %>%
-      mutate(race = replace(race, race_cces_chr == "White" & hispanic == 1, race_cces_to_acs$race[3]))
+      mutate(race = replace(race, race_cces_chr == "White" & .data$hispanic == 1, race_cces_to_acs$race[3]))
   }
 
   if (bh_as_hisp && ("hispanic" %in% colnames(tbl_mod))) {
     tbl_mod <- tbl_mod %>%
-      mutate(race = replace(race, race_cces_chr == "Black" & hispanic == 1, race_cces_to_acs$race[3]))
+      mutate(race = replace(race, race_cces_chr == "Black" & .data$hispanic == 1, race_cces_to_acs$race[3]))
   }
 
   if ((!is.null(wh_as_hisp) | !is.null(bh_as_hisp)) &
@@ -164,7 +164,7 @@ ccc_std_demographics <- function(tbl,
            matches("weight"),
            matches("(state|st|cd|dist)"),
            matches("gender"),
-           female,
+           matches("^female$"),
            matches("pid3$"),
            matches("age"),
            matches("educ"),

diff --git a/R/datadoc_cd-info-long.R b/R/datadoc_cd-info-long.R
@@ -16,8 +16,9 @@
 #'    \item{`party`, `candidate`}{Define the presidential candidate that corresponds to the `elec`
 #'     (which may not be the same as `lines`). For example, `lines = 2012, cd = AL-01` combined with
 #'      `elec = 2008` represents the 2008 election results in the newly redistricted (2012) AL-01 geography}
-#'    \item{`pct`}{are the two party voteshares of the candidate}
-#'    \item{`presvotes_total`}{Is the total number of votes for President in that CD}
+#'    \item{`pct`}{The two party voteshares of the candidate}
+#'    \item{`presvotes_total`}{The total number of votes for President in that CD}
+#'    \item{`presvotes_DR`}{The total number of Democrat + Republican votes for President in that CD}
 #'    }
 #'
 #' @examples

diff --git a/R/datadoc_cd-info.R b/R/datadoc_cd-info.R
@@ -5,6 +5,7 @@
 #' district-level. We include one such data for congressional districts. All data
 #' is collected by The Downballot.
 #'
+#' @name cd_info
 #' @details `cd_info_2008` is data on boundaries used in 2006, 2008, and 2010;
 #' `cd_info_2012` is data on boundaries used in 2012 and 2014; `cd_info_2016`
 #' uses 2016 boundaries; `cd_info_2018` is data on 2018 boundaries;
@@ -28,17 +29,19 @@
 #' @format Each `cd_info_20**` is a dataframe with the `r nrow(cd_info_2018)` Congressional
 #'  Districts, one row per cd.
 #'  \describe{
-#'    \item{`year`}{The year for the district line. A congressional district's
-#'    actual geography can change year to year, and significantly so in different
-#'    redistricting cycles. Lines try to get the contemporaneous district map,
-#'    so that cd_info_2016 uses 2016 maps and cd_info_2020 uses 2020 maps.
-#'    However, this work relies on the hard work of assembling precinct results by Daily Kos.}
+#'    \item{`year`}{The year for the district map. A congressional district's
+#'    actual geography can change year to year. Lines represent the contemporaneous
+#'    district geography,
+#'    so that `cd_info_2016` uses 2016 maps and `cd_info_2020` uses 2020 maps.
+#'    Corresponds to `line` in `cd_info_long`.
+#'    This work relies on the hard work of assembling precinct results by Daily Kos.}
 #'    \item{`cd`}{District code. The formatting corresponds to the CCES cumulative
 #'    coding of \code{cd}: a two-letter abbreviation for the state followed by
 #'    a dash, and the district number padded with zeros to the left to be of length
 #'    2. At-large districts like Delaware are given a "-01" for the district number. See `to_cd()`}
 #'    \item{`presvotes_total`}{In presidential years, the total number of votes cast for
-#'     the office of President that year. Taken from Daily Kos estimates from precinct results.}
+#'     the office of President that year. }
+#'    \item{`presvotes_DR`}{Same as `presvotes_total` but only the sum of Democratic and Republican candidate's votes}
 #'    \item{`pct_trump`, `pct_romney`, `pct_mccain`}{The two-party voteshare of Republican
 #'    presidential candidates in that district for the given year. E.g. the
 #'    \code{pct_mccain} data for `cd_info_2018` represents the percent
@@ -53,7 +56,7 @@
 #'    have the same largest place.}
 #'  }
 #'
-#' @seealso cd_info_long
+#' @seealso `cd_info_long`
 #'
 #' @source
 #'   The Downballot (formerly Daily Kos Elections), \url{https://www.the-downballot.com/p/data}
@@ -64,6 +67,9 @@
 #'   Daily Kos, "2008 results for districts used in **2006, 2008, 2010**"
 #'   \url{https://bit.ly/4entUrV}
 #'
+#'   Kiernan Park-Egan, "U.S. Presidential Election Results by Congressional District, 1952 to 2020"
+#'   \url{https://bit.ly/4fk6UKx} (used for 2008 values only when Daily Kos has missing data)
+#'
 #'   Daily Kos, "2008, 2012 results for districts used in **2012, 2014**"
 #'   \url{https://bit.ly/3N4PDZK}
 #'
@@ -83,40 +89,44 @@
 #'
 #'   Pennsylvania 2016 CD names are named by Shiro Kuriwaki and Lara Putnam.
 #'
-#'   Also see Cha, Kuriwaki, and Snyder, 2021,
-#'    "Candidates in American General Elections", https://doi.org/10.7910/DVN/DGDRDT,
-#'    Harvard Dataverse.
+#'   Also see Cha, Kuriwaki, and Snyder, 2024,
+#'    "Candidates in American General Elections", \url{https://doi.org/10.7910/DVN/DGDRDT},
+#'    Harvard Dataverse, for congressional candidate's (instead of President's) vote totals.
 #'
 #'
 #' @examples
 #' head(cd_info_2018)
 #' head(elec_NY)
-"cd_info_2018"
+NULL
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
 #' @format NULL
 "cd_info_2008"
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
 #' @format NULL
 "cd_info_2012"
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
 #' @format NULL
 "cd_info_2016"
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
+#' @format NULL
+"cd_info_2018"
+
+#' @rdname cd_info
 #' @format NULL
 "cd_info_2020"
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
 #' @format NULL
 "cd_info_2022"
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
 #' @format NULL
 "cd_info_2024"
 
-#' @rdname cd_info_2018
+#' @rdname cd_info
 #' @format NULL
 "elec_NY"
diff --git a/R/district-coding.R b/R/district-coding.R
@@ -4,7 +4,7 @@
 #'@param state A vector of state names, preferably abbreviations.
 #' If it is numeric, the function will assume they are FIPS codes
 #' and translate them accordingly. If they have full names like "California"
-#' instead of "CA", it will trnslate that too.
+#' instead of "CA", it will translate that too. But you cannot mix different types.
 #'@param num A vector of district codes
 #'
 #'@importFrom stringr str_pad
@@ -29,12 +29,12 @@ to_cd <- function(state, num) {
 
   # State
   if (inherits(state, "haven_labelled") | is.numeric(state)) {
-    fips_to_st <- deframe(select(states_key, st_fips, st))
+    fips_to_st <- deframe(select(states_key, .data$st_fips, .data$st))
     state <- recode(as.numeric(state), !!!fips_to_st)
   }
 
-  if (all(state %in% c(states_key$state))) {
-    state_to_st <- deframe(select(states_key, state, st))
+  if (all(state %in% c(ccesMRPprep::states_key$state))) {
+    state_to_st <- deframe(select(states_key, .data$state, .data$st))
     state <- recode(state, !!!state_to_st)
   }
 

diff --git a/README.Rmd b/README.Rmd
@@ -54,14 +54,13 @@ See the overview vignette (`vignette("overview")`) from a illustrative workflow.
 
 ## Data Sources 
 
-Function-specific pages will detail the documentation used in each function. Here is a manual compilaiton:
+Function-specific pages will detail the documentation used in each function. Here is a manual compilation:
 
 | Information | Source | Citation and URL (if public) | 
 | :--- |  :--- |  :--- |
 | CCES Covariates | Cumulative CCES | Shiro Kuriwaki, "Cumulative CCES Common Content".  https://doi.org/10.7910/DVN/II2DB6 |
 | CCES Outcomes | Each Year's CCES | Stephen Ansolabehere, Sam Luks, and Brian Schaffner. "CCES Common Content" (varies by year).   https://cces.gov.harvard.edu/ |
 | Poststratification | Census Bureau ACS | American Community Survey. Extracted via [tidycensus package](https://github.com/walkerke/tidycensus). See [ACS vignette](https://www.shirokuriwaki.com/ccesMRPprep/articles/acs.html) |
-| District-level Contestedness and Incumbency | Collected mainly by Jim Snyder | |
 | CD-level Presidential Voteshare | Daily Kos | Daily Kos, [The ultimate Daily Kos Elections guide to all of our data sets](https://www.dailykos.com/stories/2018/2/21/1742660/-The-ultimate-Daily-Kos-Elections-guide-to-all-of-our-data-sets#1) |
 | State-level Presidential Voteshare | MEDSL |  MIT Election Data and Science Lab, 2017, "U.S. President 1976–2016".  https://doi.org/10.7910/DVN/42MVDX |
 
@@ -71,7 +70,6 @@ Function-specific pages will detail the documentation used in each function. Her
 - [kuriwaki/rcces](https://github.com/kuriwaki/rcces) has another set of CCES related functions, but these
 are either my own personal functions in development (not for production), or specific to 
 non-MRP projects.
-- [kuriwaki/CCES_district-opinion](https://github.com/kuriwaki/CCES_district-opinion) is a private package that uses (among others) this package to process large CCES data for MRP at scale. 
 
 
 ## Support

diff --git a/README.md b/README.md
@@ -82,25 +82,21 @@ workflow.
 ## Data Sources
 
 Function-specific pages will detail the documentation used in each
-function. Here is a manual compilaiton:
+function. Here is a manual compilation:
 
-| Information                                 | Source                         | Citation and URL (if public)                                                                                                                                                                      |
-|:--------------------------------------------|:-------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| CCES Covariates                             | Cumulative CCES                | Shiro Kuriwaki, “Cumulative CCES Common Content”. <https://doi.org/10.7910/DVN/II2DB6>                                                                                                            |
-| CCES Outcomes                               | Each Year’s CCES               | Stephen Ansolabehere, Sam Luks, and Brian Schaffner. “CCES Common Content” (varies by year). <https://cces.gov.harvard.edu/>                                                                      |
-| Poststratification                          | Census Bureau ACS              | American Community Survey. Extracted via [tidycensus package](https://github.com/walkerke/tidycensus). See [ACS vignette](https://www.shirokuriwaki.com/ccesMRPprep/articles/acs.html)            |
-| District-level Contestedness and Incumbency | Collected mainly by Jim Snyder |                                                                                                                                                                                                   |
-| CD-level Presidential Voteshare             | Daily Kos                      | Daily Kos, [The ultimate Daily Kos Elections guide to all of our data sets](https://www.dailykos.com/stories/2018/2/21/1742660/-The-ultimate-Daily-Kos-Elections-guide-to-all-of-our-data-sets#1) |
-| State-level Presidential Voteshare          | MEDSL                          | MIT Election Data and Science Lab, 2017, “U.S. President 1976–2016”. <https://doi.org/10.7910/DVN/42MVDX>                                                                                         |
+| Information                        | Source            | Citation and URL (if public)                                                                                                                                                                      |
+|:-----------------------------------|:------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| CCES Covariates                    | Cumulative CCES   | Shiro Kuriwaki, “Cumulative CCES Common Content”. <https://doi.org/10.7910/DVN/II2DB6>                                                                                                            |
+| CCES Outcomes                      | Each Year’s CCES  | Stephen Ansolabehere, Sam Luks, and Brian Schaffner. “CCES Common Content” (varies by year). <https://cces.gov.harvard.edu/>                                                                      |
+| Poststratification                 | Census Bureau ACS | American Community Survey. Extracted via [tidycensus package](https://github.com/walkerke/tidycensus). See [ACS vignette](https://www.shirokuriwaki.com/ccesMRPprep/articles/acs.html)            |
+| CD-level Presidential Voteshare    | Daily Kos         | Daily Kos, [The ultimate Daily Kos Elections guide to all of our data sets](https://www.dailykos.com/stories/2018/2/21/1742660/-The-ultimate-Daily-Kos-Elections-guide-to-all-of-our-data-sets#1) |
+| State-level Presidential Voteshare | MEDSL             | MIT Election Data and Science Lab, 2017, “U.S. President 1976–2016”. <https://doi.org/10.7910/DVN/42MVDX>                                                                                         |
 
 ## Related Packages
 
 - [kuriwaki/rcces](https://github.com/kuriwaki/rcces) has another set of
   CCES related functions, but these are either my own personal functions
   in development (not for production), or specific to non-MRP projects.
-- [kuriwaki/CCES_district-opinion](https://github.com/kuriwaki/CCES_district-opinion)
-  is a private package that uses (among others) this package to process
-  large CCES data for MRP at scale.
 
 ## Support
 

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -30,7 +30,7 @@ reference:
   - ccc_samp
   - cc18_samp
   - cc18_NY
-  - cd_info_2018
+  - cd_info
   - cd_info_long
   - elec_NY
   - questions_samp

diff --git a/data-raw/create_cd-info-long.R b/data-raw/create_cd-info-long.R
@@ -79,22 +79,25 @@ D_pct <- R_pct |>
 # same for Ns
 Ns <-
   cd_info_all |>
-  select(lines = year, cd, matches("total")) |>
+  select(lines = year, cd, matches("presvotes_")) |>
   pivot_longer(
-    matches("presvotes_total"),
+    matches("presvotes_"),
     names_prefix = "presvotes_",
-    values_to = "presvotes_total", values_drop_na = TRUE) |>
+    values_drop_na = TRUE) |>
   mutate(elec = case_when(
-    name == "total20" ~ 2020,
-    lines %in% c(2008, 2012, 2016, 2020) ~ lines,
-    lines == 2010 ~ 2008,
-    lines == 2014 ~ 2012,
-    lines == 2018 ~ 2016,
-    lines == 2022 ~ 2020,
+    name %in% c("total_20", "DR_20") ~ 2020,
+    name %in% c("total_16", "DR_16") ~ 2016,
+    name %in% c("total", "DR") & lines %in% c(2008, 2010) ~ 2008,
+    name %in% c("total", "DR") & lines %in% c(2012, 2014) ~ 2012,
+    name %in% c("total", "DR") & lines == 2022 ~ 2020,
   ),
   .after = cd
   ) |>
-  select(-name)
+  mutate(name = str_remove(name, "_.*")) |>
+  pivot_wider(id_cols = c(elec, lines, cd),
+              names_from = name,
+              values_from = value,
+              names_prefix = "presvotes_")
 
 cd_info_long <- bind_rows(D_pct, R_pct) |>
   tidylog::left_join(Ns, by = c("lines", "cd", "elec")) |>

diff --git a/data-raw/create_dist-level-data_2006_2012.R b/data-raw/create_dist-level-data_2006_2012.R
@@ -11,40 +11,44 @@ url_2012 <- "https://docs.google.com/spreadsheets/d/1xn6nCNM97oFDZ4M-HQgoUT3X4pa
 # Read data from Google Sheets
 
 # 2008
-cd_names_2008 <- read_sheet(url_2008, range = "A2:E", col_names = TRUE, sheet = 1) |>
+cd_names_2008 <- read_sheet(url_2008, range = "A2:A", col_names = TRUE, sheet = 1) |>
   mutate(year = 2008) |>
   select(year,
-         cd = CD,
-         mccain = McCain)
+         cd = CD)
+
+# https://yalemaps.maps.arcgis.com/home/item.html?id=35e8e9aa89b34a3a8a036b1be7ad6607
+voting_info_2008 <- read_csv("data-raw/KPEcd2008.csv") |>
+  filter(STATENAME != "District Of Columbia") |>
+  transmute(cd = to_cd(STATENAME, DISTRICT),
+            pct_mccain = GOP_VOT / (GOP_VOT + DEM_VOT),
+            presvotes_DR = GOP_VOT + DEM_VOT,
+            presvotes_total = TOT_VOT)
 
-voting_info_2008 <- read_sheet(url_2008,range = "A2:I", col_names = TRUE, sheet = 2) |>
-  select(cd = CD,
-      #   pct_mccain = 'McCain%', # replaced by McCain sheet one
-         presvotes_total = "Total")
 
 # 2012
 cd_names_2012 <- read_sheet(url_2012, range = "A2:G", col_names = TRUE, sheet = 1) |>
   mutate(year = 2012) |>
   select(year, cd = CD)
 
+# 2008 CA missing
 voting_info_2012 <- read_sheet(url_2012, range = "A2:O", col_names = TRUE, sheet = 2) |>
-  select(cd = CD,
-         presvotes_total = 7,
-         pct_romney = 'Romney%',
-         pct_mccain = 'McCain%')
+  transmute(
+    cd = CD,
+    pct_romney = Romney / (`Obama...5` + Romney),
+    presvotes_DR = `Obama...5` + Romney,
+    presvotes_total = `Total...7`)
 
 # Join data from page 1 and page 2 for each dataset
 cd_info_2008 <- cd_names_2008 |>
-  left_join(voting_info_2008, by = "cd") |>
   mutate(cd = str_replace(cd, "-AL$", "-01")) |>
-  mutate(pct_mccain = mccain * 0.01) |>
-  select(!mccain) |>
-  relocate(year, cd, pct_mccain, presvotes_total)
+  left_join(voting_info_2008, by = "cd") |>
+  relocate(year, cd, pct_mccain)
 
 cd_info_2012 <- cd_names_2012 |>
   left_join(voting_info_2012, by = "cd") |>
   mutate(cd = str_replace(cd, "-AL$", "-01"))
 
+
 # Save the data
 usethis::use_data(cd_info_2012, overwrite = TRUE)
 usethis::use_data(cd_info_2008, overwrite = TRUE)