diff --git a/NAMESPACE b/NAMESPACE index 35a6d39..d1a7d3b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,7 @@ importFrom(SummarizedExperiment,"colData<-") importFrom(SummarizedExperiment,colData) importFrom(VGAM,dpareto) importFrom(assertthat,assert_that) +importFrom(assertthat,is.count) importFrom(assertthat,is.flag) importFrom(assertthat,is.string) importFrom(cubature,adaptIntegrate) diff --git a/NEWS.md b/NEWS.md index daaf102..cb00d51 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ## NEW FEATURES * Added ```getContigDoublets()``` experimental function to identify TCR and BCR doublets as a preprocessing step to ```combineExpression()``` +* Added **proportion** argument to ```clonalCompare()``` so that when set to FALSE, the comparison will be based on frequency normalized by per-sample repertoire diversity. ## UNDERLYING CHANGES * Fixed issue with single chain output for ```clonalLength()``` diff --git a/R/clonalCompare.R b/R/clonalCompare.R index e81aa00..b5d21e0 100644 --- a/R/clonalCompare.R +++ b/R/clonalCompare.R @@ -1,113 +1,145 @@ -#' Demonstrate the difference in clonal proportion between clones +#' Demonstrate the difference in clonal proportions / counts between clones #' -#' This function produces an alluvial or area graph of the proportion of -#' the indicated clones for all or selected samples (using the -#' **samples** parameter). Individual clones can be selected -#' using the **clones** parameter with the specific sequence of -#' interest or using the **top.clones** parameter with the top -#' n clones by proportion to be visualized. +#' This function produces an alluvial or area graph of the proportion or +#' count composition of +#' the indicated clones for all or selected samples (using the +#' \strong{samples} parameter). Individual clones can be selected +#' using the \strong{clones} parameter with the specific sequence of +#' interest or using the \strong{top.clones} parameter with the top +#' n clones by proportion / counts to be visualized. #' #' @examples #' #Making combined contig data -#' combined <- combineTCR(contig_list, -#' samples = c("P17B", "P17L", "P18B", "P18L", +#' combined <- combineTCR(contig_list, +#' samples = c("P17B", "P17L", "P18B", "P18L", #' "P19B","P19L", "P20B", "P20L")) -#' clonalCompare(combined, -#' top.clones = 5, -#' samples = c("P17B", "P17L"), +#' clonalCompare(combined, +#' top.clones = 5, +#' samples = c("P17B", "P17L"), #' cloneCall="aa") #' -#' @param input.data The product of [combineTCR()], -#' [combineBCR()], or [combineExpression()]. -#' @param cloneCall How to call the clone - VDJC gene (**gene**), -#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**), -#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable +#' @param input.data The product of \code{\link{combineTCR}}, +#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param cloneCall How to call the clone - VDJC gene (\strong{gene}), +#' CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), +#' VDJC gene + CDR3 nucleotide (\strong{strict}) or a custom variable #' in the data -#' @param chain indicate if both or a specific chain should be used - +#' @param chain indicate if both or a specific chain should be used - #' e.g. "both", "TRA", "TRG", "IGH", "IGL" #' @param samples The specific samples to isolate for visualization. #' @param clones The specific clonal sequences of interest #' @param top.clones The top number of clonal sequences per group. #' (e.g., top.clones = 5) -#' @param highlight.clones Clonal sequences to highlight, if present, +#' @param highlight.clones Clonal sequences to highlight, if present, #' all other clones returned will be grey #' @param relabel.clones Simplify the legend of the graph by returning #' clones that are numerically indexed #' @param group.by If using a single-cell object, the column header -#' to group the new list. **NULL** will return the active +#' to group the new list. \strong{NULL} will return the active #' identity or cluster #' @param order.by A vector of specific plotting order or "alphanumeric" #' to plot groups in order -#' @param graph The type of graph produced, either **"alluvial"** -#' or **"area"** +#' @param graph The type of graph produced, either \strong{"alluvial"} +#' or \strong{"area"} +#' @param proportion If \strong{TRUE}, the proportion of the total sequencing +#' reads will be used for the y-axis. If \strong{FALSE}, the raw count +#' will be used #' @param exportTable Returns the data frame used for forming the graph -#' @param palette Colors to use in visualization - input any -#' [hcl.pals][grDevices::hcl.pals] +#' @param palette Colors to use in visualization - input any +#' \link[grDevices]{hcl.pals} + #' @import ggplot2 #' @importFrom stringr str_sort #' #' @export #' @concept Visualizing_Clones -#' @return ggplot of the proportion of total sequencing read of +#' @return ggplot of the proportion of total sequencing read of #' selecting clones -clonalCompare <- function(input.data, - cloneCall = "strict", - chain = "both", - samples = NULL, - clones = NULL, +clonalCompare <- function(input.data, + cloneCall = "strict", + chain = "both", + samples = NULL, + clones = NULL, top.clones = NULL, highlight.clones = NULL, relabel.clones = FALSE, group.by = NULL, order.by = NULL, - graph = "alluvial", - exportTable = FALSE, - palette = "inferno"){ - + graph = "alluvial", + proportion = TRUE, + exportTable = FALSE, + palette = "inferno") { + + assert_that( + isListOfNonEmptyDataFrames(input.data) || + is_seurat_or_se_object(input.data), + is.string(cloneCall), + is.string(chain), chain %in% c("both", "TRA", "TRG", "IGH", "IGL"), + is.null(samples) || is.character(samples), + is.null(clones) || is.character(clones), + is.null(top.clones) || is.count(top.clones), + is.null(highlight.clones) || is.character(highlight.clones), + is.flag(relabel.clones), + is.null(group.by) || is.string(group.by), + is.null(order.by) || is.character(order.by), + is.string(graph), graph %in% c("alluvial", "area"), + is.flag(proportion), + is.flag(exportTable), + is.string(palette) + ) + #Tie goes to indicated clones over top clones - if(!is.null(top.clones) & !is.null(clones)) { + if(!is.null(top.clones) && !is.null(clones)) { top.clones <- NULL } - input.data <- .data.wrangle(input.data, - group.by, - .theCall(input.data, cloneCall, check.df = FALSE), + input.data <- .data.wrangle(input.data, + group.by, + .theCall(input.data, cloneCall, check.df = FALSE), chain) cloneCall <- .theCall(input.data, cloneCall) - + sco <- is_seurat_object(input.data) | is_se_object(input.data) - if(!is.null(group.by) & !sco) { + if(!is.null(group.by) && !sco) { input.data <- .groupList(input.data, group.by) } - - Con.df <- NULL - - #Loop through the list to get a proportional summary - for (i in seq_along(input.data)) { - tbl <- as.data.frame(table(input.data[[i]][,cloneCall])) - tbl[,2] <- tbl[,2]/sum(tbl[,2]) - colnames(tbl) <- c("clones", "Proportion") - tbl$Sample <- names(input.data[i]) - Con.df <- rbind.data.frame(Con.df, tbl) - } - - #Filtering steps + + compareColname <- ifelse(proportion, "Proportion", "Count") + normalizer <- ifelse(proportion, sum, length) + + Con.df <- input.data %>% + purrr::imap(function(df, columnNames) { + tbl <- as.data.frame(table(df[, cloneCall])) + if (proportion) { + tbl[, 2] <- tbl[, 2] / normalizer(tbl[, 2]) + } + colnames(tbl) <- c("clones", compareColname) + tbl$Sample <- columnNames + tbl + }) %>% + dplyr::bind_rows() + + #Filtering steps if (!is.null(samples)) { - Con.df <- Con.df[Con.df$Sample %in% samples,] + Con.df <- Con.df[Con.df$Sample %in% samples,] } if (!is.null(clones)) { - Con.df <- Con.df[Con.df$clones %in% clones,] + Con.df <- Con.df[Con.df$clones %in% clones,] } else if (!is.null(top.clones)) { top <- Con.df %>% - group_by(Con.df[,3]) %>% - slice_max(n = top.clones, order_by = Proportion, with_ties = FALSE) - Con.df <- Con.df[Con.df$clones %in% top$clones,] + group_by(Sample) %>% + slice_max( + n = top.clones, + order_by = !!sym(compareColname), + with_ties = FALSE + ) + Con.df <- Con.df[Con.df$clones %in% top$clones,] } if (nrow(Con.df) < length(unique(Con.df$Sample))) { - stop("Please reasses the filtering strategies here, there are not - enough clones to examine.") + stop("Please reasses the filtering strategies here, there are not + enough clones to examine.") } #Clones relabeling - clones.returned <- as.vector(unique(Con.df[order(Con.df[,"Proportion"], decreasing = TRUE),"clones"])) + clones.returned <- as.vector(unique(Con.df[order(Con.df[, compareColname], decreasing = TRUE),"clones"])) if (relabel.clones) { new.clones <- paste0("Clone: ", seq_len(length(clones.returned))) names(new.clones) <- clones.returned @@ -115,34 +147,34 @@ clonalCompare <- function(input.data, if(!is.null(highlight.clones)) { highlight.clones <- unname(new.clones[which(names(new.clones) %in% highlight.clones)]) } - Con.df[,"original.clones"] <- Con.df[,"clones"] - Con.df[,"clones"] <- new.clones[as.vector(Con.df[,"clones"])] - Con.df[,"clones"] <- factor(Con.df[,"clones"], - levels = str_sort(unique(Con.df[,"clones"]), numeric = TRUE)) - clones.returned <- as.vector(unique(Con.df[,"clones"])) - } - if (exportTable == TRUE) { - return(Con.df) + Con.df[,"original.clones"] <- Con.df[, "clones"] + Con.df[,"clones"] <- new.clones[as.vector(Con.df[, "clones"])] + Con.df[,"clones"] <- factor(Con.df[, "clones"], + levels = str_sort(unique(Con.df[, "clones"]), numeric = TRUE)) + clones.returned <- as.vector(unique(Con.df[, "clones"])) } - + if(!is.null(order.by)) { Con.df <- .ordering.function(vector = order.by, - group.by = "Sample", + group.by = "Sample", data.frame = Con.df) } - - + + if (exportTable) { + return(Con.df) + } + #Plotting Functions - plot <- ggplot(Con.df, aes(x = Sample, - fill = clones, + plot <- ggplot(Con.df, aes(x = Sample, + fill = clones, group = clones, - stratum = clones, - alluvium = clones, - y = Proportion, + stratum = clones, + alluvium = clones, + y = !!sym(compareColname), label = clones)) + theme_classic() + - theme(axis.title.x = element_blank(), - legend.text=element_text(size=rel(0.5)), + theme(axis.title.x = element_blank(), + legend.text=element_text(size=rel(0.5)), legend.key.size = unit(0.5,"line")) if (graph == "alluvial") { plot <- plot + geom_stratum() + geom_flow(stat = "alluvium") @@ -150,7 +182,7 @@ clonalCompare <- function(input.data, plot <- plot + geom_area(aes(group = clones), color = "black") } - + #Highlighting specific clones if (!is.null(highlight.clones)) { clone.colors <- rep("grey", length(clones.returned)) diff --git a/R/scRepertoire-package.R b/R/scRepertoire-package.R index ea79e49..2c99d9f 100644 --- a/R/scRepertoire-package.R +++ b/R/scRepertoire-package.R @@ -5,5 +5,6 @@ #' @importFrom lifecycle deprecated #' @importFrom Rcpp sourceCpp #' @useDynLib scRepertoire, .registration = TRUE +#' @importFrom assertthat assert_that is.count is.flag is.string ## usethis namespace: end NULL diff --git a/R/typecheck.R b/R/typecheck.R index e9b86bc..bed534b 100644 --- a/R/typecheck.R +++ b/R/typecheck.R @@ -35,3 +35,9 @@ is_named_numeric <- function(obj) { assertthat::on_failure(is_named_numeric) <- function(call, env) { paste0(deparse(call$obj), " is not a named numeric vector") } + +# functions + +assertthat::on_failure(`%in%`) <- function(call, env) { + paste0(deparse(call$x), " is not in ", deparse(call$table)) +} diff --git a/man/clonalCompare.Rd b/man/clonalCompare.Rd index 2781018..85e074c 100644 --- a/man/clonalCompare.Rd +++ b/man/clonalCompare.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/clonalCompare.R \name{clonalCompare} \alias{clonalCompare} -\title{Demonstrate the difference in clonal proportion between clones} +\title{Demonstrate the difference in clonal proportions / counts between clones} \usage{ clonalCompare( input.data, @@ -16,13 +16,14 @@ clonalCompare( group.by = NULL, order.by = NULL, graph = "alluvial", + proportion = TRUE, exportTable = FALSE, palette = "inferno" ) } \arguments{ -\item{input.data}{The product of \code{\link[=combineTCR]{combineTCR()}}, -\code{\link[=combineBCR]{combineBCR()}}, or \code{\link[=combineExpression]{combineExpression()}}.} +\item{input.data}{The product of \code{\link{combineTCR}}, +\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} \item{cloneCall}{How to call the clone - VDJC gene (\strong{gene}), CDR3 nucleotide (\strong{nt}), CDR3 amino acid (\strong{aa}), @@ -55,31 +56,36 @@ to plot groups in order} \item{graph}{The type of graph produced, either \strong{"alluvial"} or \strong{"area"}} +\item{proportion}{If \strong{TRUE}, the proportion of the total sequencing +reads will be used for the y-axis. If \strong{FALSE}, the raw count +will be used} + \item{exportTable}{Returns the data frame used for forming the graph} \item{palette}{Colors to use in visualization - input any -\link[grDevices:palettes]{hcl.pals}} +\link[grDevices]{hcl.pals}} } \value{ ggplot of the proportion of total sequencing read of selecting clones } \description{ -This function produces an alluvial or area graph of the proportion of +This function produces an alluvial or area graph of the proportion or +count composition of the indicated clones for all or selected samples (using the \strong{samples} parameter). Individual clones can be selected using the \strong{clones} parameter with the specific sequence of interest or using the \strong{top.clones} parameter with the top -n clones by proportion to be visualized. +n clones by proportion / counts to be visualized. } \examples{ #Making combined contig data -combined <- combineTCR(contig_list, - samples = c("P17B", "P17L", "P18B", "P18L", +combined <- combineTCR(contig_list, + samples = c("P17B", "P17L", "P18B", "P18L", "P19B","P19L", "P20B", "P20L")) -clonalCompare(combined, - top.clones = 5, - samples = c("P17B", "P17L"), +clonalCompare(combined, + top.clones = 5, + samples = c("P17B", "P17L"), cloneCall="aa") } diff --git a/tests/testthat/test-clonalCompare.R b/tests/testthat/test-clonalCompare.R index 517fa3f..4428ccc 100644 --- a/tests/testthat/test-clonalCompare.R +++ b/tests/testthat/test-clonalCompare.R @@ -49,3 +49,57 @@ test_that("clonalCompare works", { ) }) + +test_that("clonalCompare works with exportTable and prop FALSE", { + + combined <- getCombined() + + getClonalCompareRes <- function(prop) { + clonalCompare( + combined, + top.clones = 10, + highlight.clones = c("CVVSDNTGGFKTIF_CASSVRRERANTGELFF", "NA_CASSVRRERANTGELFF"), + relabel.clones = TRUE, + samples = c("P17B", "P17L"), + cloneCall = "aa", + graph = "alluvial", + exportTable = TRUE, + proportion = prop + ) + } + + countCompareRes <- getClonalCompareRes(prop = FALSE) + propCompareRes <- getClonalCompareRes(prop = TRUE) + + expect_identical( + countCompareRes %>% dplyr::select(-Count), + propCompareRes %>% dplyr::select(-Proportion) + ) + + fullJoined <- suppressMessages(getClonalCompareRes(FALSE) %>% + dplyr::full_join(getClonalCompareRes(TRUE)) + ) + + expect_setequal( + colnames(fullJoined), + c("clones", "Count", "original.clones", "Proportion", "Sample") + ) + + expect_identical( + fullJoined %>% + dplyr::mutate(propToCountScaleFactor = Count / Proportion) %>% + dplyr::group_by(Sample) %>% + dplyr::summarise( + areAllFactorsEqual = var(propToCountScaleFactor) < 1e-10 + ) %>% + dplyr::pull(areAllFactorsEqual), + rep(TRUE, length(unique(fullJoined$Sample))) + ) + + countPropFactor <- fullJoined$Count / fullJoined$Proportion + + expect_identical( + as.integer(fullJoined$Count - fullJoined$Proportion * countPropFactor), + integer(nrow(fullJoined)) + ) +}) diff --git a/vignettes/.gitignore b/vignettes/.gitignore index 097b241..fec9a60 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1,2 +1,3 @@ *.html *.R +