Skip to content

Commit

Permalink
remove internal funcs from export, remove data_name arg from function…
Browse files Browse the repository at this point in the history
…s, reorg internal funcs.
  • Loading branch information
karissawhiting committed Dec 21, 2023
1 parent 9eb8f82 commit 0238995
Show file tree
Hide file tree
Showing 19 changed files with 194 additions and 204 deletions.
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(.clean_and_check_cols)
export(.mutations_gene_binary)
export(.sum_alts_in_pathway)
export(add_pathways)
Expand All @@ -26,9 +25,6 @@ export(reformat_fusion)
export(rename_columns)
export(reset_gnomer_palette)
export(resolve_alias)
export(sanitize_cna_input)
export(sanitize_fusion_input)
export(sanitize_mutation_input)
export(set_gnomer_palette)
export(show_col)
export(specify_impact_panels)
Expand Down
2 changes: 1 addition & 1 deletion R/add-pathways.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ add_pathways <- function(gene_binary,

all_path <- gnomeR::pathways
all_path_names <- names(all_path)
.check_required_cols(gene_binary, "sample_id", "gene_binary")
.check_required_cols(gene_binary, "sample_id")

# * Deprecated Arguments (will remove this in the future) ----

Expand Down
15 changes: 6 additions & 9 deletions R/create-gene-binary.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,20 +149,17 @@ create_gene_binary <- function(samples = NULL,
mutation <- switch(!is.null(mutation),
.clean_and_check_cols(
df_to_check = mutation,
required_cols = c("sample_id", "hugo_symbol"),
data_name = "mutation"))
required_cols = c("sample_id", "hugo_symbol")))

fusion <- switch(!is.null(fusion),
.clean_and_check_cols(
df_to_check = fusion,
required_cols = c("sample_id", "site_1_hugo_symbol", "site_2_hugo_symbol"),
data_name = "fusion"))
required_cols = c("sample_id", "site_1_hugo_symbol", "site_2_hugo_symbol")))

cna <- switch(!is.null(cna),
.clean_and_check_cols(
df_to_check = cna,
required_cols = c("hugo_symbol", "sample_id", "alteration"),
data_name = "cna"))
required_cols = c("hugo_symbol", "sample_id", "alteration")))

# Make Final Sample List ----------------------------------------------------

Expand All @@ -187,22 +184,22 @@ create_gene_binary <- function(samples = NULL,
# Sanitize Data and Filter to Final Samples List --------

mutation <- switch(!is.null(mutation),
sanitize_mutation_input(
.sanitize_mutation_input(
mutation = mutation,
samples_final = samples_final,
include_silent = include_silent
)
)

fusion <- switch(!is.null(fusion),
sanitize_fusion_input(
.sanitize_fusion_input(
fusion,
samples_final = samples_final)
)

cna <- switch(!is.null(cna),
{
sanitize_cna_input(
.sanitize_cna_input(
cna,
samples_final = samples_final)
}
Expand Down
15 changes: 15 additions & 0 deletions R/plotting-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,21 @@ ggvartype <- function(mutation) {

}

#' #' Utility Function to Extract SNV
#' #'
#' #' @param x string
#' #' @param n number of characters from right
#' #'
#' #' @return string
#' #' @noRd
#' #' @examples
#' #' substrRight("Hello", 2)
#' #'
#' substrRight <- function(x, n) {
#' x <- as.character(x)
#' substr(x, nchar(x) - n + 1, nchar(x))
#' }


# ggsnvclass <- function(mutation) {
#
Expand Down
2 changes: 1 addition & 1 deletion R/resolve-gene-aliases.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ recode_alias <- function(genomic_df, alias_table = "impact", supress_warnings =
},

"data.frame" = {
.check_required_cols(alias_table, "hugo_symbol", "alias")
.check_required_cols(alias_table, "hugo_symbol")
alias_table
})

Expand Down
85 changes: 6 additions & 79 deletions R/sanitize-data.R
Original file line number Diff line number Diff line change
@@ -1,67 +1,21 @@
#' Checks genomic input file columns to ensure column names are correct
#'
#' @param df_to_check Raw maf dataframe containing alteration data
#' @param required_cols A character specifying names of columns to check
#' @param data_name Optionally specify how the data set should be called in error message.
#' Default is NULL and will call it a generic name.
#' @return a corrected maf file or an error if problems with maf
#' @keywords internal
#' @export
#'
#' @examples
#' .clean_and_check_cols(mutation = gnomeR::mutations, data_name = "mutation")
#'
.clean_and_check_cols <- function(df_to_check,
required_cols = c("sample_id", "hugo_symbol"),
data_name = NULL) {

mutation <- rename_columns(df_to_check)
column_names <- colnames(df_to_check)

# Check required columns & data types ------------------------------------------
# I hate data_name
.check_required_cols(df_to_check, required_cols, data_name)

# If factor????
# Maybe String Trim on all required columns

# Make sure sample ID and hugo are character
df_to_check <- df_to_check %>%
mutate(across(all_of(required_cols), ~as.character(.x)))

return(df_to_check)

}

#' Checks MAF input to ensure column names are correct and renamed genes are corrected
#'
#' @param mutation Raw maf dataframe containing alteration data
#' @param include_silent Silent mutations will be removed if FALSE (default). Variant classification column is needed.
#' @param ... other arguments passed from create_gene_binary() (recode.aliases).
#' @return a corrected maf file or an error if problems with maf
#' @keywords internal
#' @export
#'
#' @examples
#' sanitize_mutation_input(mutation = gnomeR::mutations, include_silent = FALSE)
#'
sanitize_mutation_input <- function(mutation, include_silent, samples_final, ...) {

# adding this again so this function can still be used on it's own
# CHANGE TO RENAME ONLY
mutation = clean_and_check_cols(
df_to_check = mutation,
required_cols = c("sample_id", "hugo_symbol"),
data_name = "mutation"
)
.sanitize_mutation_input <- function(mutation, include_silent, samples_final = NULL) {

column_names <- colnames(mutation)

# Filter to final sample list ---------
# * I don't think this can be NULL so maybe can remove the `if` check for NULL.
if (!is.null(samples_final)){
mutation <- mutation %>%
filter(sample_id %in% samples_final)
filter(.data$sample_id %in% samples_final)
}

# if include_silent FALSE, check for variant classification column -----
Expand Down Expand Up @@ -131,29 +85,16 @@ sanitize_mutation_input <- function(mutation, include_silent, samples_final, ...
#' Check fusion data frame to ensure columns are correct
#'
#' @param fusion a fusion data frame
#' @param ... other arguments passed from create_gene_binary()
#'
#' @return a checked data frame
#' @keywords internal
#' @export
#' @examples
#' fus <- sanitize_fusion_input(fusion = gnomeR::sv)
#'
sanitize_fusion_input <- function(fusion, samples_final) {

# Check required columns & data types ------------------------------------------
# adding this again so this function can still be used on it's own
fusion = clean_and_check_cols(
df_to_check = fusion,
required_cols = c("sample_id", "site_1_hugo_symbol", "site_2_hugo_symbol"),
data_name = "fusion"
)
.sanitize_fusion_input <- function(fusion, samples_final = NULL) {

# Filter to final sample list ---------
# * I don't think this can be NULL so maybe can remove the `if` check for NULL.
if (!is.null(samples_final)){
fusion <- fusion %>%
filter(sample_id %in% samples_final)
filter(.data$sample_id %in% samples_final)
}

return(fusion)
Expand All @@ -164,30 +105,16 @@ sanitize_fusion_input <- function(fusion, samples_final) {
#' Check CNA data frame to ensure columns are correct
#'
#' @param cna a cna data frame
#' @param ... other arguments passed from create_gene_binary()
#'
#' @return a checked data frame
#' @keywords internal
#' @export
#' @examples
#'
#' cna <- sanitize_cna_input(cna = cna)
#'
sanitize_cna_input <- function(cna, samples_final, ...) {

# Check required columns & data types ------------------------------------------
# adding this again so this function can still be used on it's own
cna = clean_and_check_cols(
df_to_check = cna,
required_cols = c("hugo_symbol", "sample_id", "alteration"),
data_name = "cna"
)
.sanitize_cna_input <- function(cna, samples_final = NULL) {

# Filter to final sample list ---------
# * I don't think this can be NULL so maybe can remove the `if` check for NULL.
if (!is.null(samples_final)){
cna <- cna %>%
filter(sample_id %in% samples_final)
filter(.data$sample_id %in% samples_final)
}

# Make sure hugo & alteration is character and recode
Expand Down
2 changes: 1 addition & 1 deletion R/subset-by-frequency.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ subset_by_frequency <- function(gene_binary, t = .1, other_vars = NULL) {
cli::cli_abort("{.code gene_binary} must be a data.frame")
}

.check_required_cols(gene_binary, "sample_id", "gene_binary")
.check_required_cols(gene_binary, "sample_id")

# Other Vars - Capture Other Columns to Retain -----------------------------------

Expand Down
2 changes: 1 addition & 1 deletion R/subset-by-panel.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ subset_by_panel <- function(gene_binary, panel_id = NULL, other_vars = NULL){
cli::cli_abort("{.code panel_id} must not be NULL")
}

.check_required_cols(gene_binary, "sample_id", "gene_binary")
.check_required_cols(gene_binary, "sample_id")

if (!(panel_id %in% c(gene_panels$gene_panel))){
cli::cli_abort("The panel {panel_id} is not an available panel. See `gnomeR::gene_panels()` for the names of available panels.")
Expand Down
2 changes: 1 addition & 1 deletion R/summarize-by-gene.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ summarize_by_gene <- function(gene_binary, other_vars = NULL) {
cli::cli_abort("{.code gene_binary} must be a data.frame with sample ids")
}

.check_required_cols(gene_binary, "sample_id", "gene_binary")
.check_required_cols(gene_binary, "sample_id")

# check for repeat samples
if(any(table(gene_binary$sample_id) > 1)) {
Expand Down
2 changes: 1 addition & 1 deletion R/tbl_genomic.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ tbl_genomic <- function(gene_binary,
stop("`gene_binary=` argument must be a tibble or data frame.", call. = FALSE)
}

.check_required_cols(gene_binary, "sample_id", "gene_binary")
.check_required_cols(gene_binary, "sample_id")

if("sample_id" %in% names(gene_binary)) {
if(any(table(gene_binary$sample_id) > 1)) {
Expand Down
87 changes: 53 additions & 34 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@

# Basic Data Cleaning -----------------------------------------------------


#' Rename columns from API results to work with gnomeR functions
#'
#' @param df_to_check a data frame to check and recode names as needed
Expand Down Expand Up @@ -43,22 +47,59 @@ rename_columns <- function(df_to_check) {
}



#' Utility Function to Extract SNV
#' Check a Data Frame for Required Columns
#'
#' @param x string
#' @param n number of characters from right
#' @param data A data frame to check
#' @param required_cols A character specifying names of columns to check
#' @return If data set doesn't have required columns it will return an error message.
#' If it does have required columns, nothing will be returned
#' @keywords internal

.check_required_cols <- function(data, required_cols) {

# Get the name of the data object
data_name <- deparse(substitute(data))

column_names <- colnames(data)
which_missing <- required_cols[which(!(required_cols %in% column_names))]

if(length(which_missing) > 0) {
cli::cli_abort("The following required columns are missing in your {.field {data_name}} data: {.var {which_missing}}")
}
}

#' Checks genomic input file columns to ensure column names are correct
#'
#' @return string
#' @noRd
#' @param df_to_check Raw maf dataframe containing alteration data
#' @param required_cols A character specifying names of columns to check
#' @return a corrected maf file or an error if problems with maf
#' @keywords internal
#' @examples
#' substrRight("Hello", 2)
#' gnomeR:::.clean_and_check_cols(df_to_check = gnomeR::mutations)
#'
substrRight <- function(x, n) {
x <- as.character(x)
substr(x, nchar(x) - n + 1, nchar(x))
.clean_and_check_cols <- function(df_to_check,
required_cols = c("sample_id", "hugo_symbol")) {

df_to_check <- rename_columns(df_to_check)
column_names <- colnames(df_to_check)

# Check required columns & data types ------------------------------------------
.check_required_cols(df_to_check,
required_cols = required_cols)

# Make sure sample ID and hugo are character
df_to_check <- df_to_check %>%
mutate(across(all_of(required_cols), ~as.character(.x)))

return(df_to_check)

}



# CNA Recode -----------------------------------------------------


#' Internal function to recode numeric CNA alteration values to factor values
#'
#' @param alteration_vector a vector of CNA alterations coded with any of the
Expand Down Expand Up @@ -128,7 +169,7 @@ recode_cna <- function(alteration_vector){
return(recoded_alterations)
}


# Binary Matrix Processing -----------------------------------------------------


#' Create binary data.frames depending on type of mutation data
Expand Down Expand Up @@ -171,29 +212,7 @@ recode_cna <- function(alteration_vector){
ungroup()
}


#' Check a Data Frame for Required Columns
#'
#' @param data A data frame to check
#' @param required_cols A character specifying names of columns to check
#' @param data_name Optionally specify how the data set should be called in error message.
#' Default is NULL and will call it a generic name.
#' @return If data set doesn't have required columns it will return an error message.
#' If it does have required columns, nothing will be returned
#' @keywords internal

.check_required_cols <- function(data, required_cols, data_name = NULL) {

data_name <- data_name %||% ""
column_names <- colnames(data)
which_missing <- required_cols[which(!(required_cols %in% column_names))]

if(length(which_missing) > 0) {
cli::cli_abort("The following required columns are missing in your {data_name} data: {.field {which_missing}}")
}

}

# Small Misc Utils -----------------------------------------------------

#' Add descriptive endings to hugo symbol names that do not have one already
#'
Expand Down
Loading

0 comments on commit 0238995

Please sign in to comment.