remove internal funcs from export, remove data_name arg from function…

…s, reorg internal funcs.
MSKCC-Epi-Bio · Dec 21, 2023 · 0238995 · 0238995
1 parent 9eb8f82
commit 0238995
Show file tree

Hide file tree

Showing 19 changed files with 194 additions and 204 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(.clean_and_check_cols)
 export(.mutations_gene_binary)
 export(.sum_alts_in_pathway)
 export(add_pathways)
@@ -26,9 +25,6 @@ export(reformat_fusion)
 export(rename_columns)
 export(reset_gnomer_palette)
 export(resolve_alias)
-export(sanitize_cna_input)
-export(sanitize_fusion_input)
-export(sanitize_mutation_input)
 export(set_gnomer_palette)
 export(show_col)
 export(specify_impact_panels)

diff --git a/R/add-pathways.R b/R/add-pathways.R
@@ -44,7 +44,7 @@ add_pathways <- function(gene_binary,
 
   all_path <- gnomeR::pathways
   all_path_names <- names(all_path)
-  .check_required_cols(gene_binary, "sample_id", "gene_binary")
+  .check_required_cols(gene_binary, "sample_id")
 
   # * Deprecated Arguments (will remove this in the future) ----
 

diff --git a/R/create-gene-binary.R b/R/create-gene-binary.R
@@ -149,20 +149,17 @@ create_gene_binary <- function(samples = NULL,
   mutation <- switch(!is.null(mutation),
                      .clean_and_check_cols(
                        df_to_check = mutation,
-                       required_cols = c("sample_id", "hugo_symbol"),
-                       data_name = "mutation"))
+                       required_cols = c("sample_id", "hugo_symbol")))
 
   fusion <- switch(!is.null(fusion),
                    .clean_and_check_cols(
                      df_to_check = fusion,
-                     required_cols = c("sample_id", "site_1_hugo_symbol", "site_2_hugo_symbol"),
-                     data_name = "fusion"))
+                     required_cols = c("sample_id", "site_1_hugo_symbol", "site_2_hugo_symbol")))
 
   cna <- switch(!is.null(cna),
                 .clean_and_check_cols(
                   df_to_check = cna,
-                  required_cols = c("hugo_symbol", "sample_id", "alteration"),
-                  data_name = "cna"))
+                  required_cols = c("hugo_symbol", "sample_id", "alteration")))
 
   #  Make Final Sample List ----------------------------------------------------
 
@@ -187,22 +184,22 @@ create_gene_binary <- function(samples = NULL,
   # Sanitize Data and Filter to Final Samples List  --------
 
   mutation <- switch(!is.null(mutation),
-    sanitize_mutation_input(
+    .sanitize_mutation_input(
       mutation = mutation,
       samples_final = samples_final,
       include_silent = include_silent
     )
   )
 
   fusion <- switch(!is.null(fusion),
-    sanitize_fusion_input(
+    .sanitize_fusion_input(
       fusion,
       samples_final = samples_final)
   )
 
   cna <- switch(!is.null(cna),
     {
-      sanitize_cna_input(
+      .sanitize_cna_input(
         cna,
         samples_final = samples_final)
     }

diff --git a/R/plotting-functions.R b/R/plotting-functions.R
@@ -122,6 +122,21 @@ ggvartype <- function(mutation) {
 
 }
 
+#' #' Utility Function to Extract SNV
+#' #'
+#' #' @param x string
+#' #' @param n number of characters from right
+#' #'
+#' #' @return string
+#' #' @noRd
+#' #' @examples
+#' #' substrRight("Hello", 2)
+#' #'
+#' substrRight <- function(x, n) {
+#'   x <- as.character(x)
+#'   substr(x, nchar(x) - n + 1, nchar(x))
+#' }
+
 
 # ggsnvclass <- function(mutation) {
 #

diff --git a/R/resolve-gene-aliases.R b/R/resolve-gene-aliases.R
@@ -50,7 +50,7 @@ recode_alias <- function(genomic_df, alias_table = "impact", supress_warnings =
       },
 
     "data.frame" = {
-      .check_required_cols(alias_table, "hugo_symbol", "alias")
+      .check_required_cols(alias_table, "hugo_symbol")
       alias_table
     })
 

diff --git a/R/sanitize-data.R b/R/sanitize-data.R
@@ -1,67 +1,21 @@
-#' Checks genomic input file columns to ensure column names are correct
-#'
-#' @param df_to_check Raw maf dataframe containing alteration data
-#' @param required_cols A character specifying names of columns to check
-#' @param data_name Optionally specify how the data set should be called in error message.
-#' Default is NULL and will call it a generic name.
-#' @return a corrected maf file or an error if problems with maf
-#' @keywords internal
-#' @export
-#'
-#' @examples
-#' .clean_and_check_cols(mutation = gnomeR::mutations, data_name = "mutation")
-#'
-.clean_and_check_cols <- function(df_to_check,
-                                 required_cols = c("sample_id", "hugo_symbol"),
-                                 data_name = NULL)  {
-
-  mutation <- rename_columns(df_to_check)
-  column_names <- colnames(df_to_check)
-
-  # Check required columns & data types ------------------------------------------
-  # I hate data_name
-  .check_required_cols(df_to_check, required_cols, data_name)
-
-  # If factor????
-  # Maybe String Trim on all required columns
-
-  # Make sure sample ID and hugo are character
-  df_to_check <- df_to_check %>%
-    mutate(across(all_of(required_cols), ~as.character(.x)))
 
-  return(df_to_check)
-
-}
 
 #' Checks MAF input to ensure column names are correct and renamed genes are corrected
 #'
 #' @param mutation Raw maf dataframe containing alteration data
 #' @param include_silent Silent mutations will be removed if FALSE (default). Variant classification column is needed.
-#' @param ... other arguments passed from create_gene_binary() (recode.aliases).
 #' @return a corrected maf file or an error if problems with maf
 #' @keywords internal
-#' @export
-#'
-#' @examples
-#' sanitize_mutation_input(mutation = gnomeR::mutations, include_silent = FALSE)
-#'
-sanitize_mutation_input <- function(mutation, include_silent, samples_final, ...) {
 
-  # adding this again so this function can still be used on it's own
-  # CHANGE TO RENAME ONLY
-  mutation = clean_and_check_cols(
-    df_to_check = mutation,
-    required_cols = c("sample_id", "hugo_symbol"),
-    data_name = "mutation"
-  )
+.sanitize_mutation_input <- function(mutation, include_silent, samples_final = NULL) {
 
   column_names <- colnames(mutation)
 
   # Filter to final sample list ---------
   # * I don't think this can be NULL so maybe can remove the `if` check for NULL.
   if (!is.null(samples_final)){
     mutation <- mutation %>%
-      filter(sample_id %in% samples_final)
+      filter(.data$sample_id %in% samples_final)
   }
 
   # if include_silent FALSE, check for variant classification column -----
@@ -131,29 +85,16 @@ sanitize_mutation_input <- function(mutation, include_silent, samples_final, ...
 #' Check fusion data frame to ensure columns are correct
 #'
 #' @param fusion a fusion data frame
-#' @param ... other arguments passed from create_gene_binary()
-#'
 #' @return a checked data frame
 #' @keywords internal
-#' @export
-#' @examples
-#' fus <- sanitize_fusion_input(fusion = gnomeR::sv)
-#'
-sanitize_fusion_input <- function(fusion, samples_final)  {
 
-  # Check required columns & data types ------------------------------------------
-  # adding this again so this function can still be used on it's own
-  fusion = clean_and_check_cols(
-    df_to_check = fusion,
-    required_cols = c("sample_id", "site_1_hugo_symbol", "site_2_hugo_symbol"),
-    data_name = "fusion"
-  )
+.sanitize_fusion_input <- function(fusion, samples_final = NULL)  {
 
   # Filter to final sample list ---------
   # * I don't think this can be NULL so maybe can remove the `if` check for NULL.
   if (!is.null(samples_final)){
     fusion <- fusion %>%
-      filter(sample_id %in% samples_final)
+      filter(.data$sample_id %in% samples_final)
   }
 
   return(fusion)
@@ -164,30 +105,16 @@ sanitize_fusion_input <- function(fusion, samples_final)  {
 #' Check CNA data frame to ensure columns are correct
 #'
 #' @param cna a cna data frame
-#' @param ... other arguments passed from create_gene_binary()
-#'
 #' @return a checked data frame
 #' @keywords internal
-#' @export
-#' @examples
-#'
-#' cna <- sanitize_cna_input(cna = cna)
-#'
-sanitize_cna_input <- function(cna, samples_final, ...)  {
 
-  # Check required columns & data types ------------------------------------------
-  # adding this again so this function can still be used on it's own
-  cna = clean_and_check_cols(
-    df_to_check = cna,
-    required_cols = c("hugo_symbol", "sample_id", "alteration"),
-    data_name = "cna"
-  )
+.sanitize_cna_input <- function(cna, samples_final = NULL)  {
 
   # Filter to final sample list ---------
   # * I don't think this can be NULL so maybe can remove the `if` check for NULL.
   if (!is.null(samples_final)){
     cna <- cna %>%
-      filter(sample_id %in% samples_final)
+      filter(.data$sample_id %in% samples_final)
   }
 
   # Make sure hugo & alteration is character and recode

diff --git a/R/subset-by-frequency.R b/R/subset-by-frequency.R
@@ -36,7 +36,7 @@ subset_by_frequency <- function(gene_binary, t = .1, other_vars = NULL) {
     cli::cli_abort("{.code gene_binary} must be a data.frame")
   }
 
-  .check_required_cols(gene_binary, "sample_id", "gene_binary")
+  .check_required_cols(gene_binary, "sample_id")
 
   # Other Vars - Capture Other Columns to Retain -----------------------------------
 

diff --git a/R/subset-by-panel.R b/R/subset-by-panel.R
@@ -41,7 +41,7 @@ subset_by_panel <- function(gene_binary, panel_id = NULL, other_vars = NULL){
     cli::cli_abort("{.code panel_id} must not be NULL")
   }
 
-  .check_required_cols(gene_binary, "sample_id", "gene_binary")
+  .check_required_cols(gene_binary, "sample_id")
 
   if (!(panel_id %in% c(gene_panels$gene_panel))){
     cli::cli_abort("The panel {panel_id} is not an available panel. See `gnomeR::gene_panels()` for the names of available panels.")

diff --git a/R/summarize-by-gene.R b/R/summarize-by-gene.R
@@ -30,7 +30,7 @@ summarize_by_gene <- function(gene_binary, other_vars = NULL) {
     cli::cli_abort("{.code gene_binary} must be a data.frame with sample ids")
   }
 
-  .check_required_cols(gene_binary, "sample_id", "gene_binary")
+  .check_required_cols(gene_binary, "sample_id")
 
   # check for repeat samples
   if(any(table(gene_binary$sample_id) > 1)) {

diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R
@@ -52,7 +52,7 @@ tbl_genomic <- function(gene_binary,
     stop("`gene_binary=` argument must be a tibble or data frame.", call. = FALSE)
   }
 
-  .check_required_cols(gene_binary, "sample_id", "gene_binary")
+  .check_required_cols(gene_binary, "sample_id")
 
   if("sample_id" %in% names(gene_binary)) {
     if(any(table(gene_binary$sample_id) > 1)) {

diff --git a/R/utils.R b/R/utils.R
@@ -1,3 +1,7 @@
+
+# Basic Data Cleaning -----------------------------------------------------
+
+
 #' Rename columns from API results to work with gnomeR functions
 #'
 #' @param df_to_check a data frame to check and recode names as needed
@@ -43,22 +47,59 @@ rename_columns <- function(df_to_check) {
 }
 
 
-
-#' Utility Function to Extract SNV
+#' Check a Data Frame for Required Columns
 #'
-#' @param x string
-#' @param n number of characters from right
+#' @param data A data frame to check
+#' @param required_cols A character specifying names of columns to check
+#' @return If data set doesn't have required columns it will return an error message.
+#' If it does have required columns, nothing will be returned
+#' @keywords internal
+
+.check_required_cols <- function(data, required_cols) {
+
+  # Get the name of the data object
+  data_name <- deparse(substitute(data))
+
+  column_names <- colnames(data)
+  which_missing <- required_cols[which(!(required_cols %in% column_names))]
+
+  if(length(which_missing) > 0) {
+    cli::cli_abort("The following required columns are missing in your {.field {data_name}} data: {.var {which_missing}}")
+  }
+}
+
+#' Checks genomic input file columns to ensure column names are correct
 #'
-#' @return string
-#' @noRd
+#' @param df_to_check Raw maf dataframe containing alteration data
+#' @param required_cols A character specifying names of columns to check
+#' @return a corrected maf file or an error if problems with maf
+#' @keywords internal
 #' @examples
-#' substrRight("Hello", 2)
+#' gnomeR:::.clean_and_check_cols(df_to_check = gnomeR::mutations)
 #'
-substrRight <- function(x, n) {
-  x <- as.character(x)
-  substr(x, nchar(x) - n + 1, nchar(x))
+.clean_and_check_cols <- function(df_to_check,
+                                  required_cols = c("sample_id", "hugo_symbol"))  {
+
+  df_to_check <- rename_columns(df_to_check)
+  column_names <- colnames(df_to_check)
+
+  # Check required columns & data types ------------------------------------------
+  .check_required_cols(df_to_check,
+                       required_cols = required_cols)
+
+  # Make sure sample ID and hugo are character
+  df_to_check <- df_to_check %>%
+    mutate(across(all_of(required_cols), ~as.character(.x)))
+
+  return(df_to_check)
+
 }
 
+
+
+# CNA Recode -----------------------------------------------------
+
+
 #' Internal function to recode numeric CNA alteration values to factor values
 #'
 #' @param alteration_vector a vector of CNA alterations coded with any of the
@@ -128,7 +169,7 @@ recode_cna <- function(alteration_vector){
     return(recoded_alterations)
   }
 
-
+# Binary Matrix Processing  -----------------------------------------------------
 
 
 #' Create binary data.frames depending on type of mutation data
@@ -171,29 +212,7 @@ recode_cna <- function(alteration_vector){
     ungroup()
 }
 
-
-#' Check a Data Frame for Required Columns
-#'
-#' @param data A data frame to check
-#' @param required_cols A character specifying names of columns to check
-#' @param data_name Optionally specify how the data set should be called in error message.
-#' Default is NULL and will call it a generic name.
-#' @return If data set doesn't have required columns it will return an error message.
-#' If it does have required columns, nothing will be returned
-#' @keywords internal
-
-.check_required_cols <- function(data, required_cols, data_name = NULL) {
-
-  data_name <- data_name %||% ""
-  column_names <- colnames(data)
-  which_missing <- required_cols[which(!(required_cols %in% column_names))]
-
-  if(length(which_missing) > 0) {
-    cli::cli_abort("The following required columns are missing in your {data_name} data: {.field {which_missing}}")
-  }
-
-}
-
+# Small Misc Utils  -----------------------------------------------------
 
 #' Add descriptive endings to hugo symbol names that do not have one already
 #'