-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from kuriwaki/add-kos
Add kos and cd_info_long
- Loading branch information
Showing
15 changed files
with
471 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,18 @@ | ||
Package: ccesMRPprep | ||
Type: Package | ||
Title: Functions and Data to Prepare CCES data for MRP | ||
Version: 0.1.12 | ||
Version: 0.1.13 | ||
Authors@R: | ||
c(person(given = "Shiro", | ||
family = "Kuriwaki", | ||
role = c("aut", "cre", "cph"), | ||
email = "[email protected]", | ||
comment = c(ORCID = "0000-0002-5687-2647")), | ||
person(given = "Daily Kos Elections", | ||
role = "dtc")) | ||
role = "dtc"), | ||
person(given = "Miranda", | ||
family = "Selin", | ||
role = "ctb")) | ||
Description: This provides data loading, processing, and formatting functions for | ||
a particular task: using CCES data for Multilevel Regression Post-stratification. | ||
Model fitting and visualization of MRP itself is handled elsewhere. This package | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
# cccesMRPprep 0.1.13 | ||
|
||
* Add 2022, 2024, 2006, and 2012 daily kos cd data | ||
|
||
# cccesMRPprep 0.1.12 | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#' Congressional District level information - long version | ||
#' | ||
#' `cd_info_long` provides a "long" version of the yearly `cd_info_20**` datasets. | ||
#' | ||
#' | ||
#' @format `cd_info_long` is a dataframe with `r nrow(cd_info_long)` rows, | ||
#' covering the maps of `r length(unique(cd_info_long$lines))` election years | ||
#' (`r paste0(unique(cd_info_long$lines), collapse = ', ')`) for each of the 435 congressional districts. | ||
#' \describe{ | ||
#' \item{`lines`}{Is the year corresponding to the geography (district line). For example, `lines = 2008` and `cd = "AL-01` | ||
#' indicates that the row is representing AL-01's geography as used in the 2008 election.} | ||
#' \item{`cd`}{Is the CD corresponding to the year of the geography (district line). Note that districts can change drastically | ||
#' by redistricting; a state's "first congressional district" from one `lines` can cover a different area | ||
#' than the same first congressional district for another.} | ||
#' \item{`elec`}{Is the year of the election for the presidential election data that follows} | ||
#' \item{`party`, `candidate`}{Define the presidential candidate that corresponds to the `elec` | ||
#' (which may not be the same as `lines`). For example, `lines = 2012, cd = AL-01` combined with | ||
#' `elec = 2008` represents the 2008 election results in the newly redistricted (2012) AL-01 geography} | ||
#' \item{`pct`}{are the two party voteshares of the candidate} | ||
#' \item{`presvotes_total`}{Is the total number of votes for President in that CD} | ||
#' } | ||
#' | ||
#' @examples | ||
#' library(dplyr) | ||
#' | ||
#' # get only data for proximate years | ||
#' cd_info_long |> filter((elec == lines) | (elec + 2 == lines)) | ||
#' | ||
#' # this subset returns exactly 2 * 435 districts per cycle: | ||
#' cd_info_long |> filter((elec == lines) | (elec + 2 == lines)) |> count(lines, party) | ||
#' | ||
#' # this will show where the districts lines changed between 2022 and 2024 | ||
#' # (same election, same candidate, different map) | ||
#' cd_info_long |> | ||
#' filter(lines %in% c(2022, 2024), elec == 2020, candidate == "biden") |> | ||
#' arrange(cd, lines) | ||
#' | ||
"cd_info_long" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1,122 @@ | ||
#' Congressional District level information by Daily Kos | ||
#' Congressional District level information by The Downballot | ||
#' | ||
#' | ||
#' Some of the most consequential variables to include in MRP are at the | ||
#' Some of the most consequential variables to include in MRP are measured at the | ||
#' district-level. We include one such data for congressional districts. All data | ||
#' is collected by Daily Kos. `cd_info_2018` is data on 2018 boundaries, `cd_info_2016` | ||
#' uses 2016 boundaries and `cd_info_2020` uses 2020 (but with place descriptions | ||
#' currently at 2016). | ||
#' is collected by The Downballot. | ||
#' | ||
#' @details `cd_info_2008` is data on boundaries used in 2006, 2008, and 2010; | ||
#' `cd_info_2012` is data on boundaries used in 2012 and 2014; `cd_info_2016` | ||
#' uses 2016 boundaries; `cd_info_2018` is data on 2018 boundaries; | ||
#' `cd_info_2020` uses 2020 boundaries; `cd_info_2022` is | ||
#' data on 2022 boundaries; `cd_info_2024` uses 2024 boundaries. | ||
#' | ||
#' District lines change before and after each decennial Census, e.g. 2010 vs. 2012 and | ||
#' 2020 vs. 2022. There is also change in district lines due to court interventions. | ||
#' | ||
#' * Between the 2022 and 2024 data, 4 districts changed their | ||
#' districts: AL, GA, LA, and NC. In these changes, AL-02, GA-06, and LA-06 became | ||
#' a majority minority district; the NC state supreme court plan in 2022 expired; and | ||
#' the NY court struck down the initial 2022 NY plan. | ||
#' * Between the 2018 and 2020 data, NC changed their districts | ||
#' * Between the 2016 and 2018 data, PA changed their districts | ||
#' * Between the 2012-14 and 2016 data, FL, NC, VA changed their districts | ||
#' | ||
#' These can be seen by, for example, the following code: | ||
#' `cd_info_2022 |> left_join(cd_info_2024, by = "cd") |> select(cd, matches("trump")) |> mutate(diff = abs(pct_trump - pct_trump20))` | ||
#' | ||
#' @format Each `cd_info_20**` is a dataframe with the `r nrow(cd_info_2018)` Congressional | ||
#' Districts, one row per cd. | ||
#' \describe{ | ||
#' \item{year}{The year for the district line. A congressional district's | ||
#' \item{`year`}{The year for the district line. A congressional district's | ||
#' actual geography can change year to year, and significantly so in different | ||
#' redistricting cycles. Lines try to get the contemporaneous district map, | ||
#' so that cd_info_2016 uses 2016 maps and cd_info_2020 uses 2020 maps. | ||
#' However, this work relies on the hard work of assembling precinct results by Daily Kos.} | ||
#' \item{cd}{District code. The formatting corresponds to the CCES cumulative | ||
#' \item{`cd`}{District code. The formatting corresponds to the CCES cumulative | ||
#' coding of \code{cd}: a two-letter abbreviation for the state followed by | ||
#' a dash, and the district number padded with zeros to the left to be of length | ||
#' 2. At-large districts like Delaware are given a "-01" for the district number.} | ||
#' \item{presvotes_total}{In presidential years, the total number of votes cast for | ||
#' 2. At-large districts like Delaware are given a "-01" for the district number. See `to_cd()`} | ||
#' \item{`presvotes_total`}{In presidential years, the total number of votes cast for | ||
#' the office of President that year. Taken from Daily Kos estimates from precinct results.} | ||
#' \item{pct_trump, pct_romney, pct_mccain}{The two-party voteshare of Republican | ||
#' \item{`pct_trump`, `pct_romney`, `pct_mccain`}{The two-party voteshare of Republican | ||
#' presidential candidates in that district for the given year. E.g. the | ||
#' \code{pct_mccain} data when \code{cd_year == 2018} represents the percent | ||
#' of the vote by McCain in 2008 for that district _under 2018 lines._ | ||
#' The Trump value is for 2016 for `cd_info_2018` and ``cd_info_2020` but not | ||
#' for 2020 where we use Trump's 2020 vote against Biden and denote as | ||
#' `pct_trump16` the 2016 result.} | ||
#' \item{dailykos_name}{The unique descriptive name for the district code in | ||
#' 2018 given by Daily Kos. Some edits are made for changing district. See | ||
#' \code{pct_mccain} data for `cd_info_2018` represents the percent | ||
#' of the vote by McCain in 2008 for that district _under 2018 lines._\cr | ||
#' `pct_trump` denotes the 2016 election for `cd_info_2018` and `cd_info_2016`.\cr | ||
#' `pct_trump` denotes the 2020 election for `cd_info_2020`, `cd_info_2022`, and `cd_info_2024`.\cr | ||
#' `pct_trump16` denotes the 2016 result for `cd_info_2020`.} | ||
#' \item{`dailykos_name`}{The unique descriptive name for the district code in | ||
#' 2018 given by Daily Kos (later renamed to The Downballot). Some edits are made for changing district. See | ||
#' Source for full citation.} | ||
#' \item{largest_place}{The largest place in the district code in 2018 given by Daily Kos. Multiple districts may | ||
#' have the largest place.} | ||
#' \item{`largest_place`}{The largest place in the district code in 2018 given by Daily Kos. Multiple districts may | ||
#' have the same largest place.} | ||
#' } | ||
#' | ||
#' @seealso cd_info_long | ||
#' | ||
#' @source | ||
#' The Downballot (formerly Daily Kos Elections), \url{https://www.the-downballot.com/p/data} | ||
#' | ||
#' The Daily Kos Elections naming guide to the nation's congressional districts. | ||
#' \url{https://bit.ly/2XsFI5W} | ||
#' | ||
#' Daily Kos, "2008, 2012, & 2016 results for districts used in 2018." | ||
#' \url{https://bit.ly/3DRhPcj} | ||
|
||
#' DailyDaily Kos Elections 2012, 2016 & 2020 presidential election results for congressional districts in 2020" | ||
#' \url{https://bit.ly/3bXtAPB} | ||
#' Daily Kos, "2008 results for districts used in **2006, 2008, 2010**" | ||
#' \url{https://bit.ly/4entUrV} | ||
#' | ||
#' Daily Kos, "2008, 2012 results for districts used in **2012, 2014**" | ||
#' \url{https://bit.ly/3N4PDZK} | ||
#' | ||
#' Daily Kos, "2008, 2012, & 2016 results for districts used in **2018**." \url{https://bit.ly/3bXtAPB} | ||
#' | ||
#' Daily Kos, "2012, 2016 & 2020 presidential election results for congressional districts in **2020**", \url{https://bit.ly/3DRhPcj} | ||
#' | ||
#' Daily Kos, 2020 presidential election results by later congressional districts: | ||
#' | ||
#' * __2022__ congressional districts: \url{https://bit.ly/4gLYnBK} | ||
#' * __2024__ congressional districts: \url{https://bit.ly/47KTvZw} | ||
#' | ||
#' Daily Kos, congressional district geography and most populous places: | ||
#' | ||
#' * 119th Congress: \url{https://bit.ly/geography_119}\cr | ||
#' * 118th Congress: \url{https://bit.ly/geography_118} | ||
#' | ||
#' Pennsylvania 2016 CD names are named by Shiro Kuriwaki and Lara Putnam. | ||
#' | ||
#' Also see Cha, Jeremiah; Kuriwaki, Shiro; Snyder, James M. Jr., 2021, | ||
#' Also see Cha, Kuriwaki, and Snyder, 2021, | ||
#' "Candidates in American General Elections", https://doi.org/10.7910/DVN/DGDRDT, | ||
#' Harvard Dataverse. | ||
#' | ||
#' @importFrom tibble tibble | ||
#' | ||
#' @examples | ||
#' head(cd_info_2018) | ||
#' head(elec_NY) | ||
"cd_info_2018" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"cd_info_2008" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"cd_info_2012" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"cd_info_2016" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"cd_info_2020" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"cd_info_2022" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"cd_info_2024" | ||
|
||
#' @rdname cd_info_2018 | ||
#' @format NULL | ||
"elec_NY" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
library(tidyverse) | ||
|
||
pres_party <- tibble::tribble( | ||
~name, ~party, | ||
"harris", "D", | ||
"trump", "R", | ||
"biden", "D", | ||
"clinton", "D", | ||
"obama", "D", | ||
"romney", "R", | ||
"mccain", "R" | ||
) | ||
|
||
pres_names_long <- tribble( | ||
~name, ~elec, | ||
"harris", 2024, | ||
"trump", 2024, | ||
"biden", 2020, | ||
"trump", 2020, | ||
"trump", 2016, | ||
"clinton", 2016, | ||
"obama", 2012, | ||
"romney", 2012, | ||
"obama", 2008, | ||
"mccain", 2008) | ||
|
||
pres_names_wide <- pres_names_long |> | ||
left_join(pres_party) |> | ||
pivot_wider( | ||
id_cols = elec, | ||
names_from = party, | ||
names_prefix = "party_", | ||
values_from = name | ||
) | ||
|
||
cd_info_all <- bind_rows( | ||
cd_info_2008, | ||
mutate(cd_info_2008, year = 2010), | ||
cd_info_2012, | ||
mutate(cd_info_2012, year = 2014), | ||
cd_info_2016, | ||
cd_info_2018, | ||
cd_info_2020, | ||
cd_info_2022, | ||
cd_info_2024, | ||
) | ||
|
||
# Republican vote percentages | ||
R_pct <- | ||
cd_info_all |> | ||
select(lines = year, cd, starts_with("pct_")) |> | ||
pivot_longer( | ||
matches("pct_"), | ||
names_prefix = "pct_", | ||
values_to = "pct", values_drop_na = TRUE) |> | ||
mutate(elec = case_when( | ||
name == "mccain" ~ 2008, | ||
name == "romney" ~ 2012, | ||
name == "trump16" ~ 2016, | ||
name == "trump20" ~ 2020, | ||
name == "trump" & lines <= 2018 ~ 2016, | ||
name == "trump" & lines >= 2020 & lines <= 2022 ~ 2020 | ||
), | ||
.after = cd | ||
) |> | ||
mutate(name = str_remove(name, "(16|20)"), | ||
party = "R") | ||
|
||
|
||
D_pct <- R_pct |> | ||
left_join(pres_names_wide, by = "elec") |> | ||
transmute(lines, | ||
cd, | ||
elec, | ||
party = "D", | ||
name = party_D, | ||
pct = 1 - pct) | ||
|
||
# same for Ns | ||
Ns <- | ||
cd_info_all |> | ||
select(lines = year, cd, matches("total")) |> | ||
pivot_longer( | ||
matches("presvotes_total"), | ||
names_prefix = "presvotes_", | ||
values_to = "presvotes_total", values_drop_na = TRUE) |> | ||
mutate(elec = case_when( | ||
name == "total20" ~ 2020, | ||
lines %in% c(2008, 2012, 2016, 2020) ~ lines, | ||
lines == 2010 ~ 2008, | ||
lines == 2014 ~ 2012, | ||
lines == 2018 ~ 2016, | ||
lines == 2022 ~ 2020, | ||
), | ||
.after = cd | ||
) |> | ||
select(-name) | ||
|
||
cd_info_long <- bind_rows(D_pct, R_pct) |> | ||
tidylog::left_join(Ns, by = c("lines", "cd", "elec")) |> | ||
arrange(lines, elec, cd, party) |> | ||
rename(candidate = name) | ||
|
||
usethis::use_data(cd_info_long, overwrite = TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
library(tidyverse) | ||
library(googlesheets4) | ||
|
||
# Authenticate with Google Sheets | ||
# gs4_auth() | ||
|
||
# URLs for your Google Sheets | ||
url_2008 <- "https://docs.google.com/spreadsheets/d/1l7W130tPRF6dQ4JoqlQJSJexnEg4rZct-7kwDXqgoLE/edit?gid=429358610#gid=429358610" | ||
url_2012 <- "https://docs.google.com/spreadsheets/d/1xn6nCNM97oFDZ4M-HQgoUT3X4paOiSDsRMSuxbaOBdg/edit?gid=0#gid=0" | ||
|
||
# Read data from Google Sheets | ||
|
||
# 2008 | ||
cd_names_2008 <- read_sheet(url_2008, range = "A2:E", col_names = TRUE, sheet = 1) |> | ||
mutate(year = 2008) |> | ||
select(year, | ||
cd = CD, | ||
mccain = McCain) | ||
|
||
voting_info_2008 <- read_sheet(url_2008,range = "A2:I", col_names = TRUE, sheet = 2) |> | ||
select(cd = CD, | ||
# pct_mccain = 'McCain%', # replaced by McCain sheet one | ||
presvotes_total = "Total") | ||
|
||
# 2012 | ||
cd_names_2012 <- read_sheet(url_2012, range = "A2:G", col_names = TRUE, sheet = 1) |> | ||
mutate(year = 2012) |> | ||
select(year, cd = CD) | ||
|
||
voting_info_2012 <- read_sheet(url_2012, range = "A2:O", col_names = TRUE, sheet = 2) |> | ||
select(cd = CD, | ||
presvotes_total = 7, | ||
pct_romney = 'Romney%', | ||
pct_mccain = 'McCain%') | ||
|
||
# Join data from page 1 and page 2 for each dataset | ||
cd_info_2008 <- cd_names_2008 |> | ||
left_join(voting_info_2008, by = "cd") |> | ||
mutate(cd = str_replace(cd, "-AL$", "-01")) |> | ||
mutate(pct_mccain = mccain * 0.01) |> | ||
select(!mccain) |> | ||
relocate(year, cd, pct_mccain, presvotes_total) | ||
|
||
cd_info_2012 <- cd_names_2012 |> | ||
left_join(voting_info_2012, by = "cd") |> | ||
mutate(cd = str_replace(cd, "-AL$", "-01")) | ||
|
||
# Save the data | ||
usethis::use_data(cd_info_2012, overwrite = TRUE) | ||
usethis::use_data(cd_info_2008, overwrite = TRUE) |
Oops, something went wrong.