change default_reason -> missing_reason

khusmann · Mar 6, 2024 · 7409c18 · 7409c18
1 parent 72fd7cb
commit 7409c18
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 22 deletions.
diff --git a/R/coalesce_channels.R b/R/coalesce_channels.R
@@ -7,30 +7,30 @@
 #' `coalesce_channels()` takes care of both situations. In the case where
 #' there is both a value and missing reason, it will choose which to keep based
 #' on the `keep` paramter. In case where no value or missing reason exists, it
-#' will fill the missing reason with the `default_reason` parameter.
+#' will fill the missing reason with the `missing_reason` parameter.
 #'
 #' Mutations can also create new value columns without companion missing reason
 #' columns. In that case, a new missing reason will be created and filled with
-#' `default_reason` wherever there are missing values in the value column. (
+#' `missing_reason` wherever there are missing values in the value column. (
 #' This behavior can also be used to stub missing reason columns for value-only
 #' data frames)
 #'
 #' @param x A data frame
 #' @param keep When a variable has both a value and missing reason, choose which
 #' to keep. (A properly formed deinterlaced data frame has values OR missing
 #' reasons)
-#' @param default_reason When a variable is missing a value and a missing
-#' reason, the default missing reason to fill in.
+#' @param missing_reason When a variable is missing a value and a missing
+#' reason, the missing reason to fill in.
 #'
 #' @return A deinterlaced tibble.
 #'
 #' @export
 coalesce_channels <- function(
   x,
-  default_reason = getOption("interlacer.default_missing_reason"),
+  missing_reason = getOption("interlacer.default_missing_reason"),
   keep = c("values", "missing")
 ) {
-  default_reason <- factor(default_reason %||% "UNKNOWN_REASON")
+  missing_reason <- factor(missing_reason %||% "UNKNOWN_REASON")
   keep <- match.arg(keep)
 
   for (missing_name in missing_names(x)) {
@@ -53,7 +53,7 @@ coalesce_channels <- function(
     missing_name <- to_missing_name(value_name)
 
     missing_values <- x[[missing_name]] %||%
-      if_else(is.na(values), default_reason, NA)
+      if_else(is.na(values), missing_reason, NA)
 
     # Ensure missing reason column is always a factor
     if (!is.factor((missing_values))) {
@@ -66,14 +66,14 @@ coalesce_channels <- function(
       new_missing_values <- case_when(
         !is.na(values) ~ NA,
         !is.na(missing_values) ~ missing_values,
-        T ~ default_reason
+        T ~ missing_reason
       )
     } else {
       new_values <- if_else(
         !is.na(values) & !is.na(missing_values), NA, values
       )
       new_missing_values <- if_else(
-        is.na(values) & is.na(missing_values), default_reason, missing_values
+        is.na(values) & is.na(missing_values), missing_reason, missing_values
       )
     }
 

diff --git a/man/coalesce_channels.Rd b/man/coalesce_channels.Rd
diff --git a/vignettes/coded-data.Rmd b/vignettes/coded-data.Rmd
@@ -181,7 +181,7 @@ df_decoded_deinterlaced |>
     age_next_year = age + 1,
     .after = person_id
   ) |>
-  coalesce_channels(default_reason = "AGE_UNAVAILABLE")
+  coalesce_channels(missing_reason = "AGE_UNAVAILABLE")
 ```
 
 

diff --git a/vignettes/mutations.Rmd b/vignettes/mutations.Rmd
@@ -112,7 +112,7 @@ interlacer provides an easier way via `coalesce_channels()`:
 
 `coalesce_channels()` should be run every time you mutate something in
 a deinterlaced data frame. It accepts two arguments `keep`, and
-`default_reason`. It fixes both possible problem cases as follows:
+`missing_reason`. It fixes both possible problem cases as follows:
 
 Case 1: BOTH a value and a missing reason exists
 
@@ -121,7 +121,7 @@ Case 1: BOTH a value and a missing reason exists
 
 Case 2: NEITHER a value nor a missing reason exists
 
-- Fill in the missing reason with `default_reason`
+- Fill in the missing reason with `missing_reason`
 
 These rules allow us to mutate our deinterlaced variables without needing to
 specify BOTH the values and missing reason actions -- we only need to think
@@ -146,7 +146,7 @@ df |>
       NA
     )
   ) |>
-  coalesce_channels(default_reason = "TECHNICAL_ERROR")
+  coalesce_channels(missing_reason = "TECHNICAL_ERROR")
 ```
 
 
@@ -162,7 +162,7 @@ df |>
     person_type = if_else(age < 18, "CHILD", "ADULT"),
     .after = person_id
   ) |>
-    coalesce_channels(default_reason = "AGE_UNAVAILABLE")
+    coalesce_channels(missing_reason = "AGE_UNAVAILABLE")
 ```
 
 ## Joining columns
@@ -183,7 +183,7 @@ conditions <- tribble(
 df |>
   left_join(conditions, by = join_by(person_id)) |>
   relocate(condition, .after = person_id) |>
-  coalesce_channels(default_reason = "LEFT_STUDY")
+  coalesce_channels(missing_reason = "LEFT_STUDY")
 ```
 
 Deinterlaced data frames can be joined as well, but you need to include
@@ -202,7 +202,7 @@ conditions <- tribble(
 df |>
   left_join(conditions, by = join_by(person_id, .person_id.)) |>
   relocate(condition, .after = person_id) |>
-  coalesce_channels(default_reason = "LEFT_STUDY")
+  coalesce_channels(missing_reason = "LEFT_STUDY")
 ```
 
 Use caution when your keys have missing reasons though:
@@ -228,10 +228,10 @@ df_right <- tribble(
 )
 
 left_join(df_left, df_right, by = join_by(a, .a.)) |>
-  coalesce_channels(default_reason = "REASON_3")
+  coalesce_channels(missing_reason = "REASON_3")
 ```
 
-When keys when missing reasons, it will join on missing reasons as well as
+When keys have missing reasons, missing reasons will be matched as well as
 values! Before you get mad at interlacer though, note how this situation echoes
 a similar situation with missing values in regular data frames:
 

diff --git a/vignettes/other-approaches.Rmd b/vignettes/other-approaches.Rmd
@@ -73,6 +73,12 @@ df_spss |>
     n = n(),
     .by = favorite_color_missing_reasons
   )
+
+df_spss |>
+  mutate(
+    age_next_year = if_else(is.na(age), NA, age + 1),
+    .after = person_id
+  )
 ```
 
 It's a little bit of an improvement to working with raw coded values, because
@@ -89,8 +95,8 @@ This creates a lot more type gymnastics and potential errors when you're
 manipulating them.
 
 Reason 2: Keeping interlaced columns, even when the missing values are labelled,
-means aggregations are not protected. If you forget to take out your missing
-values, you get incorrect results:
+means aggregations and other math operatiosn are not protected. If you forget
+to take out your missing values, you get incorrect results / corrupted data:
 
 ```{r}
 df_spss |>
@@ -104,6 +110,12 @@ df_spss |>
     n = n(),
     .by = favorite_color_missing_reasons
   )
+
+df_spss |>
+  mutate(
+    age_next_year = age + 1,
+    .after = person_id
+  )
 ```