Skip to content

Commit

Permalink
updates to summarize_by_patient, tests, and general error messages fo…
Browse files Browse the repository at this point in the history
…r the package
  • Loading branch information
karissawhiting committed Aug 16, 2024
1 parent c4cee52 commit ce5ba7e
Show file tree
Hide file tree
Showing 12 changed files with 129 additions and 62 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ LazyData: true
URL: https://github.com/MSKCC-Epi-Bio/gnomeR,
https://mskcc-epi-bio.github.io/gnomeR/
BugReports: https://github.com/MSKCC-Epi-Bio/gnomeR/issues
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Depends: R (>= 3.6)
biocViews:
ComplexHeatmap,
Expand Down
37 changes: 23 additions & 14 deletions R/summarize-by-patient.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#' samples = samples, mutation = mutations, cna = cna,
#' mut_type = "somatic_only",
#' include_silent = FALSE,
#' specify_panel = "IMPACT341"
#' ) %>%
#' summarize_by_patient()
#' specify_panel = "IMPACT341")
#'
#' gene_binary$patient_id = extract_patient_id(gene_binary$sample_id)
#'
#' summarize_by_patient(gene_binary)
#'
summarize_by_patient <- function(gene_binary, other_vars = NULL) {

Expand All @@ -36,7 +38,15 @@ summarize_by_patient <- function(gene_binary, other_vars = NULL) {
cli::cli_abort("{.code gene_binary} must be a data.frame with sample ids")
}

.check_required_cols(gene_binary, "sample_id")
# !!! I think we should allow sample ID as input but not require it
# .check_required_cols(
# gene_binary,
# c("sample_id"))

.check_required_cols(
gene_binary,
c("patient_id"),
add_to_message = c(i = "To extract patient IDs from IMPACT sample IDs (e.g. `P-XXXXXX-TXX-IMX`), use {.code gnomeR::extract_patient_id(data$sample_id)}"))

# Other Vars - Capture Other Columns to Retain -----------------------------------

Expand All @@ -49,12 +59,14 @@ summarize_by_patient <- function(gene_binary, other_vars = NULL) {

# Create Sample Index -----------------------------------------------------


sample_index <- gene_binary %>%
select("sample_id") %>%
select("patient_id") %>%
mutate(sample_index = paste0("samp", 1:nrow(gene_binary)))

# data frame of only alterations
alt_only <- as.data.frame(select(gene_binary, -"sample_id", -any_of(other_vars)))

alt_only <- as.data.frame(select(gene_binary, -"patient_id", -any_of("sample_id"), -any_of(other_vars)))

row.names(alt_only) <- sample_index$sample_index

Expand Down Expand Up @@ -102,12 +114,10 @@ summarize_by_patient <- function(gene_binary, other_vars = NULL) {
left_join(sample_index, ., by = "sample_index") %>%
select(-c("sample_index")) %>%
# identify patients
mutate(patient_id = gnomeR::extract_patient_id(.data$sample_id)) %>%
# determine number of samples per patient
group_by(.data$patient_id) %>%
mutate(n_samples = n()) %>%
ungroup() %>%
select(-.data$sample_id)
ungroup()

# summarize genomic information across patients
# separate patients w/ only 1 sample vs multiple samples to improve run time
Expand All @@ -131,20 +141,19 @@ summarize_by_patient <- function(gene_binary, other_vars = NULL) {

simp_gene_binary_pt <- bind_rows(simp_gene_binary_pt_single,
simp_gene_binary_pt_multiple) %>%
select(-.data$n_samples)
select(-"n_samples")
} else {
simp_gene_binary_pt <- simp_gene_binary_pt_single %>%
select(-.data$n_samples)
select(-"n_samples")
}


# !!!! Discuss this
simp_gene_binary <- left_join(simp_gene_binary_pt,
gene_binary %>%
mutate(patient_id = gnomeR::extract_patient_id(.data$sample_id)) %>%
select(any_of(c("patient_id", other_vars))) %>%
distinct(),
by = "patient_id") %>%
select(.data$patient_id, everything())
select("patient_id", everything())

return(simp_gene_binary)

Expand Down
4 changes: 3 additions & 1 deletion R/utils-gene-binary.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

}

#' Check for silent mutations
#'
#' @param mutation Raw maf dataframe containing alteration data
#' @param include_silent Silent mutations will be removed if FALSE (default). Variant classification column is needed.
#' @return a corrected maf file or an error if problems with maf
Expand All @@ -34,7 +36,7 @@

#' Check for fusions in maf file
#'
#' @param mutation
#' @param mutation data frame of mutations (e.g. MAF)
#' @return a data frame if no fusions found
#' @keywords internal
.check_for_fus_in_mut <- function(mutation) {
Expand Down
10 changes: 8 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
#'
#' @param data A data frame to check
#' @param required_cols A character specifying names of columns to check
#' @param add_to_message a vector (preferrably named) of text to add to the error message for specific cases
#' @return If data set doesn't have required columns it will return an error message.
#' If it does have required columns, nothing will be returned
#' @keywords internal

.check_required_cols <- function(data, required_cols) {
.check_required_cols <- function(data, required_cols, add_to_message = NULL) {

# Get the name of the data object
data_name <- deparse(substitute(data))
Expand All @@ -21,7 +22,12 @@
which_missing <- required_cols[which(!(required_cols %in% column_names))]

if(length(which_missing) > 0) {
cli::cli_abort("The following required columns are missing in your {.field {data_name}} data: {.var {which_missing}}")
message <-
c("Can't find required columns:", set_names(c(which_missing), "x"))

add_to_message <- add_to_message %||% ""
message <- c(message, add_to_message)
cli::cli_abort(message)
}
}

Expand Down
3 changes: 3 additions & 0 deletions man/dot-check_for_fus_in_mut.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/dot-check_for_silent.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/dot-check_required_cols.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions man/summarize_by_patient.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified tests/testthat/Rplots.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/testthat/test-recode-alias.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ test_that("missing columns of interest", {
"MYC",
"BCL1")

expect_error(recode_alias(genomic_df, alias_table = alias_table), "The following *")
expect_error(recode_alias(genomic_df, alias_table = alias_table), "Can't find required*")

})

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-reformat_fusion.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

# data checks ---------------------------
test_that("required columns are included & is data.frame", {
expect_error(reformat_fusion(gnomeR::sv_long %>% select(-fusion)), "The following*")
expect_error(reformat_fusion(gnomeR::sv_long %>% select(-fusion)), "Can't find*")
expect_error(reformat_fusion(gnomeR::sv_long$hugo_symbol), "`fusion`*")
})

Expand Down
Loading

0 comments on commit ce5ba7e

Please sign in to comment.