diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index fde02ff5..6695bbd6 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -20,9 +20,7 @@ jobs: config: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index ae2558f5..d39625f1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .Rhistory .Rhistory local_tests.R +.RData diff --git a/DESCRIPTION b/DESCRIPTION index b08eccde..bd69f2d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,8 @@ RoxygenNote: 7.2.3 biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing Depends: ggplot2, - R (>= 4.0) + R (>= 4.0), + Seurat Imports: stringdist, dplyr, @@ -33,7 +34,7 @@ Imports: tidygraph, SeuratObject, stats, - Seurat + Rcpp Suggests: knitr, rmarkdown, @@ -41,8 +42,11 @@ Suggests: circlize, scales, scater, + spelling, testthat (>= 3.0.0), - spelling + vdiffr VignetteBuilder: knitr Config/testthat/edition: 3 Language: en-US +LinkingTo: + Rcpp diff --git a/NAMESPACE b/NAMESPACE index 4955b2f9..bfd24fb5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(vizGenes) exportClasses(StartracOut) import(dplyr) import(ggplot2) +importFrom(Rcpp,sourceCpp) importFrom(SeuratObject,Embeddings) importFrom(SeuratObject,Idents) importFrom(SingleCellExperiment,colData) @@ -97,3 +98,4 @@ importFrom(utils,combn) importFrom(utils,head) importFrom(vegan,diversity) importFrom(vegan,estimateR) +useDynLib(scRepertoire, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 00000000..7b828278 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,7 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +rcpp_hello_world <- function() { + invisible(.Call(`_scRepertoire_rcpp_hello_world`)) +} + diff --git a/R/combineContigs.R b/R/combineContigs.R index 09f60ea9..83837a9a 100644 --- a/R/combineContigs.R +++ b/R/combineContigs.R @@ -13,20 +13,25 @@ data1_lines <- c("TCR1", "cdr3", "cdr3_nt") data2_lines <- c("TCR2", "cdr3", "cdr3_nt") CT_lines <- c("CTgene", "CTnt", "CTaa", "CTstrict") -utils::globalVariables(c("heavy_lines", "light_lines", "l_lines", "k_lines", - "h_lines", "tcr1_lines", "tcr2_lines", "data1_lines", - "data2_lines", "CT_lines")) +utils::globalVariables(c( + "heavy_lines", "light_lines", "l_lines", "k_lines", "h_lines", "tcr1_lines", + "tcr2_lines", "data1_lines", "data2_lines", "CT_lines" +)) -#' Combining the list of T Cell Receptor contigs +#' @title Combining the list of T Cell Receptor contigs #' -#' This function consolidates a list of TCR sequencing results to the level of -#' the individual cell barcodes. Using the samples and ID parameters, the -#' function will add the strings as prefixes to prevent issues with repeated -#' barcodes. The resulting new barcodes will need to match the Seurat or SCE -#' object in order to use, \code{\link{combineExpression}}. Several -#' levels of filtering exist - remove or filterMulti are parameters that -#' control how the function deals with barcodes with multiple chains -#' recovered. +#' @description This function consolidates a list of TCR sequencing results to +#' the level of the individual cell barcodes. Using the samples and ID +#' parameters, the function will add the strings as prefixes to prevent issues +#' with repeated barcodes. The resulting new barcodes will need to match the +#' Seurat or SCE object in order to use, \code{\link{combineExpression}}. +#' Several levels of filtering exist - `removeNA`, `removeMulti`, or +#' `filterMulti` are parameters that control how the function deals with +#' barcodes with multiple chains recovered. +#' +#' @details For single-sample TCR sequencing experiments, where the input is +#' just a single data.frame, the function will add the consolidated information +#' columns after the existing raw clonotype data #' #' @examples #' combineTCR(contig_list, @@ -44,6 +49,7 @@ utils::globalVariables(c("heavy_lines", "light_lines", "l_lines", "k_lines", #' @import dplyr #' @export #' @return List of clonotypes for individual cell barcodes +#' combineTCR <- function(df, samples = NULL, ID = NULL, @@ -83,10 +89,10 @@ combineTCR <- function(df, } else { out <- df } - for (i in seq_along(out)) { + for (i in seq_along(out)) { # ideally the nested code could be in a function for a better development/testing experience data2 <- out[[i]] data2 <- makeGenes(cellType = "T", data2) - unique_df <- unique(data2$barcode) + unique_df <- unique(data2$barcode) # could potentially display % here Con.df <- data.frame(matrix(NA, length(unique_df), 7)) colnames(Con.df) <- c("barcode",tcr1_lines, tcr2_lines) Con.df$barcode <- unique_df @@ -104,22 +110,24 @@ combineTCR <- function(df, } final[[i]] <- data3 } - names <- NULL + name_vector <- character(length(samples)) for (i in seq_along(samples)) { - if (!is.null(samples) & !is.null(ID)) { - c <- paste(samples[i], "_", ID[i], sep="") - } else if (!is.null(samples) & is.null(ID)) { - c <- paste(samples[i], sep="") - } - names <- c(names, c) + if (!is.null(samples) & !is.null(ID)) { + curr <- paste(samples[i], "_", ID[i], sep="") + } else if (!is.null(samples) & is.null(ID)) { + curr <- paste(samples[i], sep="") + } + name_vector[i] <- curr } - names(final) <- names + names(final) <- name_vector for (i in seq_along(final)){ - final[[i]]<-final[[i]][!duplicated(final[[i]]$barcode),] - final[[i]]<-final[[i]][rowSums(is.na(final[[i]])) < 10, ]} - if (removeNA == TRUE) { final <- removingNA(final)} - if (removeMulti == TRUE) { final <- removingMulti(final) } - return(final) } + final[[i]]<-final[[i]][!duplicated(final[[i]]$barcode),] + final[[i]]<-final[[i]][rowSums(is.na(final[[i]])) < 10, ] + } + if (removeNA) { final <- removingNA(final)} + if (removeMulti) { final <- removingMulti(final) } + final +} #' Combining the list of B Cell Receptor contigs #' @@ -133,8 +141,8 @@ combineTCR <- function(df, #' and the corresponding v-gene. This index automatically calculates #' the Levenshtein distance between sequences with the same V gene and will #' index sequences with <= 0.15 normalized Levenshtein distance with the same -#' ID. After which, clonotype clusters are called using the igraph -#' component() function. Clonotype that are clustered across multiple +#' ID. After which, clonotype clusters are called using the +#' `igraph:: component()` function. Clonotype that are clustered across multiple #' sequences will then be labeled with "LD" in the CTstrict header. #' #' @examples @@ -188,9 +196,9 @@ combineBCR <- function(df, } } if (!is.null(samples)) { - out <- modifyBarcodes(df, samples, ID) + out <- modifyBarcodes(df, samples, ID) } else { - out <- df + out <- df } for (i in seq_along(out)) { data2 <- data.frame(out[[i]]) @@ -226,7 +234,7 @@ combineBCR <- function(df, if (!is.null(sample) & !is.null(ID)) { final[[i]]<- final[[i]][, c("barcode", "sample", "ID", heavy_lines[c(1,2,3)], light_lines[c(1,2,3)], CT_lines)] - } + } else if (!is.null(sample) & is.null(ID)) { final[[i]]<- final[[i]][, c("barcode", "sample", heavy_lines[c(1,2,3)], light_lines[c(1,2,3)], CT_lines)] @@ -234,18 +242,19 @@ combineBCR <- function(df, } names <- NULL for (i in seq_along(samples)) { - if (!is.null(samples) & !is.null(ID)) { - c <- paste(samples[i], "_", ID[i], sep="") - } else if (!is.null(samples) & is.null(ID)) { - c <- paste(samples[i], sep="") - } - names <- c(names, c)} + if (!is.null(samples) & !is.null(ID)) { + c <- paste(samples[i], "_", ID[i], sep="") + } else if (!is.null(samples) & is.null(ID)) { + c <- paste(samples[i], sep="") + } + names <- c(names, c) + } names(final) <- names for (i in seq_along(final)) { final[[i]] <- final[[i]][!duplicated(final[[i]]$barcode),] final[[i]]<-final[[i]][rowSums(is.na(final[[i]])) < 10, ]} - if (removeNA == TRUE) { final <- removingNA(final) } - if (removeMulti == TRUE) { final <- removingMulti(final) } + if (removeNA) { final <- removingNA(final) } + if (removeMulti) { final <- removingMulti(final) } return(final) } diff --git a/R/contig_list.R b/R/contig_list.R deleted file mode 100644 index 31570298..00000000 --- a/R/contig_list.R +++ /dev/null @@ -1,6 +0,0 @@ -#' A data set of T cell contigs as a list outputed from the -#' filter_contig_annotation files. -#' @docType data -#' @name contig_list -#' -NULL diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..022400b3 --- /dev/null +++ b/R/data.R @@ -0,0 +1,64 @@ +#' A data set of T cell contigs as a list outputed from the +#' filter_contig_annotation files. +#' @docType data +#' @name contig_list +#' +NULL + +#' A seurat object of 100 single T cells derived +#' from 3 clear cell renal carcinoma patients. +#' +#' @description The object is compatible with `contig_list` and the TCR +#' sequencing data can be added with `combineExpression`. +#' +#' @name screp_example +#' @docType data +#' +NULL + +#' Processed subset of `contig_list` +#' +#' @description A list of 6 dataframes of T cell contigs outputted from the +#' `filtered_contig_annotation` files, but subsetted to about 92 valid T cells +#' which correspond to the same barcodes found in `screp_example` +#' +#' @usage data("combined_mini_contig_list") +#' +#' @format An R `list` of `data.frame` objects +#' +#' @docType data +#' +#' @seealso \code{\link{contig_list}} +#' +"combined_mini_contig_list" + +# # Code used for creating the combined_mini_contig_list: + +# library(hash, usethis) +# +#data("contig_list", "screp_example") + +#combined_mini_contig_list <- combineTCR( +# contig_list, +# samples = c("PY", "PY", "PX", "PX", "PZ","PZ"), +# ID = c("P", "T", "P", "T", "P", "T") +#) +#all_barcodes <- names(screp_example@active.ident) +#barcode_set <- hash::hash(all_barcodes, all_barcodes) # a worse version of a set +#col_names <- names(combined_mini_contig_list[[1]]) + +#for (i in seq_along(combined_mini_contig_list)) { +# curr_df <- setNames( +# data.frame(replicate(length(col_names), character(0))), col_names +# ) +# len <- 0 +# for (j in seq_along(combined_mini_contig_list[[i]][[1]])) { +# if (is.null(barcode_set[[combined_mini_contig_list[[i]][[1]][[j]]]])) { +# next +# } +# len <- len + 1 +# curr_df[len, ] <- combined_mini_contig_list[[i]][j, ] +# } +# combined_mini_contig_list[[i]] <- curr_df +#} +#usethis::use_data(combined_mini_contig_list) diff --git a/R/processing.R b/R/processing.R index 93a2e2ba..4e72aa89 100644 --- a/R/processing.R +++ b/R/processing.R @@ -9,12 +9,16 @@ #' stripBarcode(contig_list[[1]], column = 1, connector = "_", num_connects = 1) #' @export #' @return list with the suffixes of the barcodes removed. -stripBarcode <- function(contigs, column = 1, connector = "_", - num_connects = 3) { - count <- as.data.frame(t(data.frame(strsplit(contigs[,column], - paste("['", connector, "']", sep="")), - stringsAsFactors = FALSE)), - stringsAsFactors = FALSE)[num_connects] +stripBarcode <- function(contigs, column = 1, connector = "_", num_connects = 3) +{ + count <- as.data.frame( + t(data.frame( + strsplit(contigs[,column], paste("['", connector, "']", sep="")), + stringsAsFactors = FALSE + )), + stringsAsFactors = FALSE + )[num_connects] + contigs[,column] <- count return(contigs) } diff --git a/R/scRepertoire-package.R b/R/scRepertoire-package.R new file mode 100644 index 00000000..ce705cf9 --- /dev/null +++ b/R/scRepertoire-package.R @@ -0,0 +1,8 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom Rcpp sourceCpp +#' @useDynLib scRepertoire, .registration = TRUE +## usethis namespace: end +NULL diff --git a/R/screp_example.R b/R/screp_example.R deleted file mode 100644 index c00afd42..00000000 --- a/R/screp_example.R +++ /dev/null @@ -1,7 +0,0 @@ -#' A seurat object of 1000 single T cells derived -#' from 3 clear cell renal carcinoma patients. -#' @name screp_example -#' @docType data -#' -#' -NULL diff --git a/R/seuratFunctions.R b/R/seuratFunctions.R index ff5fe49b..aa16385e 100644 --- a/R/seuratFunctions.R +++ b/R/seuratFunctions.R @@ -42,7 +42,7 @@ #' clonotype information #' @param addLabel This will add a label to the frequency header, allowing #' the user to try multiple group.by variables or recalculate frequencies after -#' subseting the data. +#' subsetting the data. #' @importFrom dplyr bind_rows %>% summarise #' @importFrom rlang %||% #' @importFrom SummarizedExperiment colData<- colData @@ -50,17 +50,21 @@ #' @return seurat or SingleCellExperiment object with attached clonotype #' information #' - -combineExpression <- function(df, - sc, - cloneCall="strict", - chain = "both", - group.by="none", - proportion = TRUE, - filterNA = FALSE, - cloneTypes=c(Rare = 1e-4, Small = 0.001, - Medium = 0.01, Large = 0.1, Hyperexpanded = 1), - addLabel = FALSE) { +combineExpression <- function( + df, + sc, + cloneCall ="strict", + chain = "both", + group.by ="none", + proportion = TRUE, + filterNA = FALSE, + cloneTypes = c( + Rare = 1e-4,Small = 0.001,Medium = 0.01,Large = 0.1,Hyperexpanded = 1 + ), + addLabel = FALSE +) { + call_time <- Sys.time() + options( dplyr.summarise.inform = FALSE ) cloneTypes <- c(None = 0, cloneTypes) df <- checkList(df) @@ -71,7 +75,7 @@ combineExpression <- function(df, if (group.by == "none" | !is.null(group.by)) { for (i in seq_along(df)) { if (chain != "both") { - df[[i]] <- off.the.chain(df[[i]], chain, cloneCall) + df[[i]] <- off.the.chain(df[[i]], chain, cloneCall) } data <- data.frame(df[[i]], stringsAsFactors = FALSE) data2 <- unique(data[,c("barcode", cloneCall)]) @@ -80,8 +84,8 @@ combineExpression <- function(df, data2 <- data2 %>% group_by(data2[,cloneCall]) %>% summarise(Frequency = n()/nrow(data2)) } else { - data2 <- data2 %>% group_by(data2[,cloneCall]) %>% - summarise(Frequency = n()) + data2 <- data2 %>% group_by(data2[,cloneCall]) %>% + summarise(Frequency = n()) } colnames(data2)[1] <- cloneCall data <- merge(data, data2, by = cloneCall, all = TRUE) @@ -143,13 +147,12 @@ combineExpression <- function(df, } warn_str <- "< 1% of barcodes match: Ensure the barcodes in - the Seurat object match the - barcodes in the combined immune receptor list from - scRepertoire - most common issue is the addition of the - prefixes corresponding to `samples` and 'ID' in the combineTCR/BCR() - functions" + the Seurat object match the barcodes in the combined immune receptor + list from scRepertoire - most common issue is the addition of the + prefixes corresponding to `samples` and 'ID' in the combineTCR/BCR() + functions" - if (inherits(x=sc, what ="Seurat")) { + if (is_seurat_object(sc)) { if (length(which(rownames(PreMeta) %in% rownames(sc[[]])))/length(rownames(sc[[]])) < 0.01) { warning(warn_str) @@ -166,8 +169,14 @@ combineExpression <- function(df, } if (filterNA) { sc <- filteringNA(sc) } sc$cloneType <- factor(sc$cloneType, levels = rev(names(cloneTypes))) + + if(is_seurat_object(sc)) { + sc@commands[["combineExpression"]] <- make_screp_seurat_cmd( + call_time, sc@active.assay + ) + } sc -} +} # Qile: I think the barcode column added to the metadata is redundant? Since it matches the row names? #' Highlighting specific clonotypes in Seurat #' @@ -312,7 +321,8 @@ alluvialClonotypes <- function(sc, plot <- plot + facet_wrap(.~lodes[,facet], scales="free_y") } else if (length(facet) == 0) { plot <- plot } plot <- plot + geom_text(stat = ggalluvial::StatStratum, infer.label = FALSE, reverse = TRUE, size = 2) - return(plot)} + return(plot) +} #' Visualize the number of single cells with clonotype frequencies by cluster diff --git a/R/seurat_command.R b/R/seurat_command.R new file mode 100644 index 00000000..fa2e3206 --- /dev/null +++ b/R/seurat_command.R @@ -0,0 +1,92 @@ +# script of slightly modified versions of the seurat command script in Seurat +# commands should be added to the seurat@command attribute if the seurat object +# is modified. + +# function needed for make_screp_seurat_cmd +seurat_extractfield <- function(string, field = 1, delim = "_") { + fields <- as.numeric( + x = unlist(x = strsplit(x = as.character(x = field), split = ",")) + ) + if (length(x = fields) == 1) { + return(strsplit(x = string, split = delim)[[1]][field]) + } + return(paste( + strsplit(x = string, split = delim)[[1]][fields], collapse = delim + )) +} + +# seurat's command adding but if a param is a dataframe or list of dataframes, +# completely omits them to save memory. +make_screp_seurat_cmd <- function(call_time, assay) { + + if (as.character(x = sys.calls()[[1]])[1] == "do.call") { + call_string <- deparse(expr = sys.calls()[[1]]) + command_name <- as.character(x = sys.calls()[[1]])[2] + } else { + command_name <- as.character( + x = deparse(expr = sys.calls()[[sys.nframe() - 1]]) + ) + command_name <- gsub( + pattern = "\\.Seurat", + replacement = "", + x = command_name + ) + call_string <- command_name + command_name <- seurat_extractfield( + string = command_name, + field = 1, + delim = "\\(" + ) + } + + argnames <- names(x = formals(fun = sys.function(which = sys.parent(n = 1)))) + argnames <- grep( + pattern = "object", + x = argnames, + invert = TRUE, + value = TRUE + ) + argnames <- grep( + pattern = "anchorset", + x = argnames, + invert = TRUE, + value = TRUE + ) + argnames <- grep( + pattern = "\\.\\.\\.", + x = argnames, + invert = TRUE, + value = TRUE + ) + + params <- list() + p.env <- parent.frame(n = 1) + argnames <- intersect(x = argnames, y = ls(name = p.env)) + for (arg in argnames) { + param_value <- get(x = arg, envir = p.env) + if (is_seurat_object(param_value) || is_df_or_list_of_df(param_value)) { + next + } + params[[arg]] <- param_value + } + + command_name <- sub( + pattern = "[\\.]+$", + replacement = "", + x = command_name, + perl = TRUE + ) + command_name <- sub( + pattern = "\\.\\.", replacement = "\\.", x = command_name, perl = TRUE + ) + + # return the command object + methods::new( + Class = 'SeuratCommand', + name = command_name, + params = params, + time.stamp = call_time, + call.string = call_string, + assay.used = assay + ) +} diff --git a/R/startrac.R b/R/startrac.R index fe2ebe48..03acf3df 100644 --- a/R/startrac.R +++ b/R/startrac.R @@ -85,9 +85,9 @@ StartracDiversity <- function(sc, guides(fill="none") + theme(axis.title.x = element_blank()) } - if (exportTable == TRUE) { + if (exportTable) { return(indices) - } + } return(plot) } @@ -96,8 +96,7 @@ StartracDiversity <- function(sc, #' The Startrac Class #' -#' The Startrac object store the data for tcr-based T cell dynamics analyis. The slots contained -#' in Startrac object are listed below: +#' The Startrac object store the data for tcr-based T cell dynamics analysis. #' @slot aid character. aid of the object, used for identification of the object. #' For example, patient id. default: "AID" #' @slot cell.data data.frame. Each line for a cell, and these columns as @@ -111,7 +110,7 @@ StartracDiversity <- function(sc, #' @slot pIndex.migr data.frame. Each line for a cluster; pairwise migration #' index between the two locations indicated in the column name. #' @slot pIndex.tran data.frame. Each line for a cluster; pairwise transition -#' index betwwen the two major clusters indicated by the row name and column name. +#' index between the two major clusters indicated by the row name and column name. #' @slot cluster.sig.data data.frame. Each line for a cluster; contains the #' p values of cluster indices. #' @slot pIndex.sig.migr data.frame. Each line for a cluster; contains the @@ -129,7 +128,7 @@ StartracDiversity <- function(sc, #' @name Startrac #' @rdname Startrac #' @aliases Startrac-class -#' @return method definition for runing startrac +#' @return method definition for running startrac Startrac <- setClass("Startrac", slots = c(aid = "character", cell.data = "data.frame", @@ -430,13 +429,21 @@ setMethod("getSig", signature = "Startrac", definition = Startrac.getSig) #' The StartracOUt Class #' #' Object store the result of Startrac.run: -#' @slot proj character. identification of the object. For example, patient id. default: "AID" -#' @slot cluster.data data.frame. Each line for a cluster; contain the cluster level indexes information -#' @slot pIndex.migr data.frame. Each line for a cluster; pairwise migration index between the two locations indicated in the column name. -#' @slot pIndex.tran data.frame. Each line for a cluster; pairwise transition index betwwen the two major clusters indicated by the row name and column name. -#' @slot cluster.sig.data data.frame. Each line for a cluster; contains the p values of cluster indices. -#' @slot pIndex.sig.migr data.frame. Each line for a cluster; contains the p values of pairwise migration indices. -#' @slot pIndex.sig.tran data.frame. Each line for a cluster; contains the p values of pairwise transition indices. +#' @slot proj character. identification of the object. For example, patient id. +#' default: "AID" +#' @slot cluster.data data.frame. Each line for a cluster; contain the cluster +#' level indexes information +#' @slot pIndex.migr data.frame. Each line for a cluster; pairwise migration +#' index between the two locations indicated in the column name. +#' @slot pIndex.tran data.frame. Each line for a cluster; pairwise transition +#' index between the two major clusters indicated by the row name and column +#' name. +#' @slot cluster.sig.data data.frame. Each line for a cluster; contains the p +#' values of cluster indices. +#' @slot pIndex.sig.migr data.frame. Each line for a cluster; contains the p +#' values of pairwise migration indices. +#' @slot pIndex.sig.tran data.frame. Each line for a cluster; contains the p +#' values of pairwise transition indices. #' @slot objects list. other objects #' @name StartracOut #' @rdname StartracOut @@ -527,7 +534,7 @@ mcol.entropy <- function(x) return(H) } -#' warpper function for Startrac analysis +#' wrapper function for Startrac analysis #' @importFrom reshape2 dcast #' @importFrom plyr ldply adply llply #' @importFrom parallel makeCluster stopCluster @@ -537,7 +544,7 @@ mcol.entropy <- function(x) #' @param proj character. String used to annotate the project. #' @param cores integer. number of core to be used. default: NULL. #' @param n.perm integer. number of permutation will be performed. If NULL, no permutation. (default: NULL) -#' @param verbose logical. wheter return intermediate result (some Startrac objects) +#' @param verbose logical. whether return intermediate result (some Startrac objects) #' @details run the Startrac pipeline #' @keywords internal #' @return an list contains data.frame elements "cluster.data","pIndex.migr" and "pIndex.tran" diff --git a/R/utils.R b/R/utils.R index 568b634d..d5056256 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,4 +1,10 @@ +# readability functions "%!in%" <- Negate("%in%") +is_seurat_object <- function(obj) inherits(obj, "Seurat") +is_se_object <- function(obj) inherits(obj, "SummarizedExperiment") +is_seurat_or_se_object <- function(obj) { + is_seurat_object(obj) || is_se_object(obj) +} #Use to shuffle between chains off.the.chain <- function(dat, chain, cloneCall) { @@ -39,38 +45,54 @@ groupList <- function(df, group.by) { return(df) } -#Ensure df is in list format +# Ensure df is in list format checkList <- function(df) { - df <- if(is(df)[1] != "list") list(df) else df - return(df) + df <- tryCatch( + { + if (is(df)[1] != "list") { + df <- list(df) + } + df + }, + error = function(e) { + stop( + "Please ensure that the input consists of at least one dataframe" + ) + } + ) + df } +#checkList <- function(df) { +# df <- if(is(df)[1] != "list") list(df) else df +# return(df) +#} + checkContigs <- function(df) { - df <- lapply(seq_len(length(df)), function(x) { - df[[x]] <- if(is(df[[x]])[1] != "data.frame") as.data.frame(df[[x]]) else df[[x]] - df[[x]][df[[x]] == ""] <- NA - df[[x]] - }) + df <- lapply(seq_len(length(df)), function(x) { + df[[x]] <- if(!is.data.frame(df[[x]])) as.data.frame(df[[x]]) else df[[x]] + df[[x]][df[[x]] == ""] <- NA + df[[x]] + }) + df } #' @importFrom dplyr bind_rows bound.input.return <- function(df) { - if (inherits(x=df, what ="Seurat") | inherits(x=df, what ="SummarizedExperiment")) { - df <- grabMeta(df) - } else { - df <- bind_rows(df, .id = "element.names") - } - return(df) + if (is_seurat_or_se_object(df)) { + return(grabMeta(df)) + } + bind_rows(df, .id = "element.names") } list.input.return <- function(df, split.by) { - if (inherits(x=df, what ="Seurat") | inherits(x=df, what ="SummarizedExperiment")) { - if(is.null(split.by)){ - split.by <- "cluster" - } - df <- expression2List(df, split.by) - } - return(df) + if (is_seurat_or_se_object(df)) { + if(is.null(split.by)){ + split.by <- "cluster" + } + df <- expression2List(df, split.by) + } + df } #Get UMAP or other coordinates @@ -79,9 +101,9 @@ get.coord <- function(sc, reduction) { if (is.null(reduction)) { reduction <- "pca" } - if (inherits(x=sc, what ="Seurat")) { + if (is_seurat_object(sc)) { coord <- sc@reductions[[reduction]]@cell.embeddings - } else if (inherits(x=sc, what ="SummarizedExperiment")) { + } else if (is_se_object(sc)) { coord <- reducedDim(sc, reduction) } return(coord) @@ -89,21 +111,22 @@ get.coord <- function(sc, reduction) { #This is to check the single-cell expression object checkSingleObject <- function(sc) { - if (!inherits(x=sc, what ="Seurat") && - !inherits(x=sc, what ="SummarizedExperiment")){ + if (!is_seurat_or_se_object(sc)){ stop("Object indicated is not of class 'Seurat' or 'SummarizedExperiment', make sure you are using - the correct data.") } + the correct data.") } +} #This is to grab the meta data from a seurat or SCE object #' @importFrom SingleCellExperiment colData #' @importFrom SeuratObject Idents grabMeta <- function(sc) { - if (inherits(x=sc, what ="Seurat")) { + if (is_seurat_object(sc)) { meta <- data.frame(sc[[]], slot(sc, "active.ident")) colnames(meta)[length(meta)] <- "ident" - } else if (inherits(x=sc, what ="SummarizedExperiment")){ + + } else if (is_se_object(sc)){ meta <- data.frame(colData(sc)) rownames(meta) <- sc@colData@rownames clu <- which(colnames(meta) == "ident") @@ -250,16 +273,15 @@ jaccardIndex <- function(df, length, cloneCall, coef_matrix) { df.i_unique <- df.i[!duplicated(df.i[,cloneCall]),] for (j in seq_along(length)){ if (i >= j){ next } - else { - df.j <- df[[j]] - df.j <- df.j[,c("barcode",cloneCall)] - df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),] - overlap <- length(intersect(df.i_unique[,cloneCall], - df.j_unique[,cloneCall])) - coef_matrix[i,j] <- - overlap/(sum(length(df.i_unique[,cloneCall]), - length(df.j_unique[,cloneCall]))-overlap) - } + + df.j <- df[[j]] + df.j <- df.j[,c("barcode",cloneCall)] + df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),] + overlap <- length(intersect(df.i_unique[,cloneCall], + df.j_unique[,cloneCall])) + coef_matrix[i,j] <- + overlap/(sum(length(df.i_unique[,cloneCall]), + length(df.j_unique[,cloneCall]))-overlap) } } return(coef_matrix) @@ -272,14 +294,12 @@ rawIndex <- function(df, length, cloneCall, coef_matrix) { df.i_unique <- df.i[!duplicated(df.i[,cloneCall]),] for (j in seq_along(length)){ if (i >= j){ next } - else { - df.j <- df[[j]] - df.j <- df.j[,c("barcode",cloneCall)] - df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),] - overlap <- length(intersect(df.i_unique[,cloneCall], - df.j_unique[,cloneCall])) - coef_matrix[i,j] <- overlap - } + df.j <- df[[j]] + df.j <- df.j[,c("barcode",cloneCall)] + df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),] + overlap <- length(intersect(df.i_unique[,cloneCall], + df.j_unique[,cloneCall])) + coef_matrix[i,j] <- overlap } } return(coef_matrix) @@ -337,26 +357,32 @@ parseTCR <- function(Con.df, unique_df, data2) { barcode.i <- Con.df$barcode[y] location.i <- which(barcode.i == data2$barcode) for (z in seq_along(location.i)) { - where.chain <- data2[location.i[z],"chain"] - - if (where.chain == "TRA" | where.chain == "TRG") { - if(is.na(Con.df[y,"TCR1"])) { - Con.df[y,tcr1_lines] <- data2[location.i[z],data1_lines] - } else { - Con.df[y,tcr1_lines] <- paste(Con.df[y, tcr1_lines], - data2[location.i[z],data1_lines],sep=";") - } - } else if (where.chain == "TRB" | where.chain == "TRD") { - if(is.na(Con.df[y,"TCR2"])) { - Con.df[y,tcr2_lines] <- data2[location.i[z],data2_lines] - } else { - Con.df[y,tcr2_lines] <- paste(Con.df[y, tcr2_lines], - data2[location.i[z],data2_lines],sep=";") + where.chain <- data2[location.i[z],"chain"] + + if (where.chain == "TRA" | where.chain == "TRG") { + if (is.na(Con.df[y,"TCR1"])) { + Con.df[y,tcr1_lines] <- data2[location.i[z],data1_lines] + } else { + Con.df[y,tcr1_lines] <- paste( + Con.df[y, tcr1_lines], + data2[location.i[z],data1_lines], + sep=";" + ) + } + } else if (where.chain == "TRB" | where.chain == "TRD") { + if (is.na(Con.df[y,"TCR2"])) { + Con.df[y,tcr2_lines] <- data2[location.i[z],data2_lines] + } else { + Con.df[y,tcr2_lines] <- paste( + Con.df[y, tcr2_lines], + data2[location.i[z],data2_lines], + sep=";" + ) + } } - } } } - return(Con.df) + Con.df } #Assigning positions for BCR contig data @@ -404,7 +430,7 @@ parseBCR <- function (Con.df, unique_df, data2) { lengthDF <- function(df, cloneCall, chain, group, c1, c2){ Con.df <- NULL names <- names(df) - if (chain == "both") { + if (identical(chain, "both")) { for (i in seq_along(df)) { length <- nchar(gsub("_", "", df[[i]][,cloneCall])) val <- df[[i]][,cloneCall] @@ -417,7 +443,7 @@ lengthDF <- function(df, cloneCall, chain, group, c1, c2){ data <- na.omit(data.frame(length, val, names[i])) colnames(data) <- c("length", "CT", "values") Con.df<- rbind.data.frame(Con.df, data) }} - } else if (chain != "both") { + } else { for (x in seq_along(df)) { df[[x]] <- off.the.chain(df[[x]], chain, cloneCall) strings <- df[[x]][,cloneCall] @@ -438,7 +464,8 @@ lengthDF <- function(df, cloneCall, chain, group, c1, c2){ data <- subset(data, CT != "NA" & CT != "") Con.df<- rbind.data.frame(Con.df, data) }} } -return(Con.df)} + return(Con.df) +} #General combination of nucleotide, aa, and gene sequences for T/B cells assignCT <- function(cellType, Con.df) { @@ -448,11 +475,12 @@ assignCT <- function(cellType, Con.df) { Con.df$CTaa <- paste(Con.df$cdr3_aa1, Con.df$cdr3_aa2, sep="_") Con.df$CTstrict <- paste(Con.df$TCR1, Con.df$cdr3_nt1, Con.df$TCR2, Con.df$cdr3_nt2, sep="_") - } else { + } else { # assume cellType = B Con.df$CTgene <- paste(Con.df$IGH, Con.df$IGLC, sep="_") Con.df$CTnt <- paste(Con.df$cdr3_nt1, Con.df$cdr3_nt2, sep="_") - Con.df$CTaa <- paste(Con.df$cdr3_aa1, Con.df$cdr3_aa2, sep="_") } -return(Con.df) + Con.df$CTaa <- paste(Con.df$cdr3_aa1, Con.df$cdr3_aa2, sep="_") + } + return(Con.df) } @@ -467,7 +495,7 @@ makeGenes <- function(cellType, data2, chain1, chain2) { mutate(TCR2 = ifelse(chain %in% c("TRB", "TRD"), str_c(str_replace_na(v_gene), str_replace_na(d_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."), NA)) } - else { + else { # assume BCR (`c("B")`) heavy <- data2[data2$chain == "IGH",] %>% mutate(IGHct = str_c(str_replace_na(v_gene), str_replace_na(d_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = ".")) kappa <- data2[data2$chain == "IGK",] %>% @@ -476,7 +504,7 @@ makeGenes <- function(cellType, data2, chain1, chain2) { mutate(IGLct = str_c(str_replace_na(v_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = ".")) data2 <- bind_rows(heavy, kappa, lambda) } - return(data2) + data2 } short.check <- function(df, cloneCall) { @@ -515,3 +543,16 @@ select.gene <- function(df, chain, gene, label) { return(df) } +# check if object is a dataframe or list of dataframes +is_df_or_list_of_df <- function(x) { + if (is.data.frame(x)) { + return(TRUE) + } else if (is.list(x)) { + if (length(x) == 0) { + return(FALSE) + } + return(all(sapply(x, is.data.frame))) + } else { + return(FALSE) + } +} diff --git a/R/viz.R b/R/viz.R index 953eed94..25024cec 100644 --- a/R/viz.R +++ b/R/viz.R @@ -37,8 +37,10 @@ quantContig <- function(df, split.by = NULL, order = TRUE, exportTable = FALSE) { - if (length(group.by) > 1) { stop("Only one item in the group.by variable can - be listed.") } + + if (length(group.by) > 1) { + stop("Only one item in the group.by variable can be listed.") + } df <- list.input.return(df, split.by) cloneCall <- theCall(cloneCall) df <- checkBlanks(df, cloneCall) @@ -59,7 +61,7 @@ quantContig <- function(df, location <- which(colnames(df[[i]]) == group.by) Con.df[i,4] <- df[[i]][1,location] } col <- length(unique(Con.df[,group.by])) - if (scale == TRUE) { y <- "scaled" + if (scale) { y <- "scaled" Con.df$scaled <- Con.df$contigs/Con.df$total*100 ylab <- "Percent of Unique Clonotype" @@ -81,7 +83,19 @@ quantContig <- function(df, ylab <- "Percent of Unique Clonotype" } else { y <- "contigs" ylab <- "Unique Clonotypes" } } - if (exportTable == TRUE) { return(Con.df) } + + if (exportTable) { + if (length(df) > 1) { + return(Con.df) + } + + # if a single sample, remove the "values" column if NA + if (is.na(Con.df[[2]])) { + Con.df[[2]] <- NULL + } + return(Con.df) + } + if(order & is.null(group.by)) { Con.df[,x] <- factor(Con.df[,x], levels = Con.df[,x]) } @@ -92,8 +106,18 @@ quantContig <- function(df, stat_summary(fun=mean, geom="bar", color="black", lwd=0.25)+ theme_classic() + xlab("Samples") + ylab(ylab) + scale_fill_manual(values = colorblind_vector(col)) - return(plot) } - + + # if it is a single run, remove x axis labels if sample name missing + if ((length(df) == 1) && identical(names(df), NA_character_)) { + plot <- plot + + ggplot2::theme( + axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank() + ) + } + return(plot) +} #' Demonstrate the relative abundance of clonotypes by group or sample. #' @@ -418,19 +442,25 @@ compareClonotypes <- function(df, #' to group the new list. NULL will return clusters. #' @param graph graph either proportion or raw clonotype count #' @param exportTable Returns the data frame used for forming the graph. +#' @param seed the integer seed to set for the random variation of point coords. #' #' @import ggplot2 #' #' @export #' @return ggplot of the relative clonotype numbers -scatterClonotype <- function(df, cloneCall ="strict", - x.axis = NULL, y.axis = NULL, - chain = "both", - dot.size = "total", - split.by = NULL, - graph = "proportion", - exportTable = FALSE) { +scatterClonotype <- function( + df, cloneCall ="strict", + x.axis = NULL, y.axis = NULL, + chain = "both", + dot.size = "total", + split.by = NULL, + graph = "proportion", + exportTable = FALSE, + seed = NULL +) { + if (!is.null(seed)) {set.seed(seed)} + df <- list.input.return(df, split.by) cloneCall <- theCall(cloneCall) axes <- which(names(df) %in% c(x.axis, y.axis, dot.size)) @@ -531,52 +561,56 @@ scatterClonotype <- function(df, cloneCall ="strict", #' @export #' @return ggplot dendrogram of the clone size distribution -clonesizeDistribution <- function(df, cloneCall ="strict", - chain = "both", - method = "ward.D2", - threshold = 1, - group.by = NULL, - split.by = NULL, - exportTable = FALSE) { - df <- list.input.return(df, split.by) - cloneCall <- theCall(cloneCall) - df <- checkBlanks(df, cloneCall) - if(!is.null(group.by)) { - df <- groupList(df, group.by) - } - data <- bind_rows(df) - unique_df <- unique(data[,cloneCall]) - Con.df <- data.frame(matrix(NA, length(unique_df), length(df))) - Con.df <- data.frame(unique_df, Con.df, stringsAsFactors = FALSE) - colnames(Con.df)[1] <- "clonotype" - for (i in seq_along(df)) { - if (chain != "both") { +clonesizeDistribution <- function( + df, + cloneCall ="strict", + chain = "both", + method = "ward.D2", + threshold = 1, + group.by = NULL, + split.by = NULL, + exportTable = FALSE +) { + df <- list.input.return(df, split.by) + cloneCall <- theCall(cloneCall) + df <- checkBlanks(df, cloneCall) + if(!is.null(group.by)) { + df <- groupList(df, group.by) + } + data <- bind_rows(df) + unique_df <- unique(data[,cloneCall]) + Con.df <- data.frame(matrix(NA, length(unique_df), length(df))) + Con.df <- data.frame(unique_df, Con.df, stringsAsFactors = FALSE) + colnames(Con.df)[1] <- "clonotype" + for (i in seq_along(df)) { + if (chain != "both") { df[[i]] <- off.the.chain(df[[i]], chain, cloneCall) - } - data <- df[[i]] - data <- data.frame(table(data[,cloneCall]), - stringsAsFactors = FALSE) - colnames(data) <- c(cloneCall, "Freq") - for (y in seq_along(unique_df)){ - clonotype.y <- Con.df$clonotype[y] - location.y <- which(clonotype.y == data[,cloneCall]) - Con.df[y,i+1] <- data[location.y[1],"Freq"] } } - colnames(Con.df)[2:(length(df)+1)] <- names(df) - Con.df[is.na(Con.df)] <- 0 - list <- list() - for (i in seq_along(df)) { - list[[i]] <- Con.df[,i+1] - list[[i]] <- suppressWarnings(fdiscgammagpd(list[[i]], useq = threshold)) - } - names(list) <- names(df) - grid <- 0:10000 - distances <- get_distances(list, grid, modelType="Spliced") - hclust <- hclust(as.dist(distances), method = method) - hcd <- as.dendrogram(hclust) - plot <- plot(hcd) - if (exportTable == TRUE) { return(distances) } - return(plot) + data <- df[[i]] + data <- data.frame(table(data[,cloneCall]), + stringsAsFactors = FALSE) + colnames(data) <- c(cloneCall, "Freq") + for (y in seq_along(unique_df)){ + clonotype.y <- Con.df$clonotype[y] + location.y <- which(clonotype.y == data[,cloneCall]) + Con.df[y,i+1] <- data[location.y[1],"Freq"] + } + } + colnames(Con.df)[2:(length(df)+1)] <- names(df) + Con.df[is.na(Con.df)] <- 0 + list <- list() + for (i in seq_along(df)) { + list[[i]] <- Con.df[,i+1] + list[[i]] <- suppressWarnings(fdiscgammagpd(list[[i]], useq = threshold)) + } + names(list) <- names(df) + grid <- 0:10000 + distances <- get_distances(list, grid, modelType="Spliced") + hclust <- hclust(as.dist(distances), method = method) + hcd <- as.dendrogram(hclust) + plot <- plot(hcd) + if (exportTable) { return(distances) } + return(plot) } #This is the basic color palette for the package @@ -638,7 +672,7 @@ makingLodes <- function(meta2, color, alpha, facet, set.axes) { #' gene segments such as V, D, J, or C. #' @param order Categorical variable to organize the x-axis, either "gene" or "variance" #' @param scale Converts the individual count of genes to proportion using the total -#' respective reprtoire size +#' respective repertoire size #' @param group.by The column header used for grouping. #' @param split.by If using a single-cell object, the column header #' to group the new list. NULL will return clusters. diff --git a/README.md b/README.md index 70fc39b3..3d1b1cf2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Single-cell sequencing is an emerging technology in the field of immunology and oncology that allows researchers to couple RNA quantification and other modalities, like immune cell receptor profiling at the level of an individual cell. A number of workflows and software packages have been created to process and analyze single-cell transcriptomic data. These packages allow users to take the vast dimensionality of the data generated in single-cell-based experiments and distill the data into novel insights. Unlike the transcriptomic field, there is a lack of options for software that allow for single-cell immune receptor profiling. Enabling users to easily combine RNA and immune profiling, scRepertoire was built to process data derived from the 10x Genomics Chromium Immune Profiling for both T-cell receptor (TCR) and immunoglobulin (Ig) enrichment workflows and subsequently interacts with the popular Seurat R package. ### Applying Deep Learning to VDJ data -scRepertoire is compatible and integrated with the R packages [Trex](https://github.com/ncborcherding/Trex) for deep-learning-based autencoding of the T cell receptor and [Ibex](https://github.com/ncborcherding/Ibex) for the B cell receptor. +scRepertoire is compatible and integrated with the R packages [Trex](https://github.com/ncborcherding/Trex) for deep-learning-based autoencoding of the T cell receptor and [Ibex](https://github.com/ncborcherding/Ibex) for the B cell receptor. ### Wrapper Functions scRepertoire v1.0.2 has the functionality of the [powerTCR](https://github.com/hillarykoch/powerTCR) approach to comparing clone size distribution, [please cite](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006571) the manuscript if using the ```clonesizeDistribution()``` function. In addition, we recently added the [Startrac](https://github.com/Japrin/STARTRAC) clonotype metrics, if using the ```StartracDiversity()``` please read and cite [the accompanying article](https://www.nature.com/articles/s41586-018-0694-x). @@ -44,7 +44,7 @@ BiocManager::install("scRepertoire") ### Getting Data -Unfortunately, Github limits the size of individual files. In order to access the seurat object paired with scRepetoire please download the .rda from [here](https://drive.google.com/file/d/1Iv6t2BScpnLLrFWaWFUGwne3XzRAwMOc/view?usp=share_link). +Unfortunately, Github limits the size of individual files. In order to access the seurat object paired with scRepertoire please download the .rda from [here](https://drive.google.com/file/d/1Iv6t2BScpnLLrFWaWFUGwne3XzRAwMOc/view?usp=share_link). ### Learning To Use scRepertoire diff --git a/data/combined_mini_contig_list.rda b/data/combined_mini_contig_list.rda new file mode 100644 index 00000000..4cd0b7bc Binary files /dev/null and b/data/combined_mini_contig_list.rda differ diff --git a/data/screp_example.rda b/data/screp_example.rda index 4dd65f08..550e9fe2 100644 Binary files a/data/screp_example.rda and b/data/screp_example.rda differ diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 00000000..472966aa --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,18 @@ +citHeader("To cite scRepertoire in publications use:") + +citEntry( + entry = "Article", + title = "scRepertoire: An R-based toolkit for single-cell immune receptor analysis", + author = personList( + as.person("Nicholas Borcherding"), + as.person("Nicholas L Bormann"), + as.person("Gloria Kraus") + ), + journal = "F1000Research", + volume = "9", + year = "2022", + publisher = "Faculty of 1000 Ltd", + doi = "10.12688/f1000research.22139.2", + url = "https://doi.org/10.12688/f1000research.22139.2", + textVersion = "Borcherding N, Bormann NL and Kraus G. scRepertoire: An R-based toolkit for single-cell immune receptor analysis [version 2; peer review: 2 approved]. F1000Research 2020, 9:47 (https://doi.org/10.12688/f1000research.22139.2)" +) diff --git a/inst/WORDLIST b/inst/WORDLIST index ab652fd4..e33e95f3 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -65,11 +65,8 @@ aa airr al alluvialClonotypes -analyis -autencoding barcode barcodes -betwwen changeNames circlize clonalNetwork @@ -134,15 +131,10 @@ rda registerDoParallel removeMulti removeNA -reprtoire -runing -scRepetoire scater seurat startrac stripBarcode -subseted -subseting subtype sys tcr @@ -152,5 +144,8 @@ transcriptomic tsv unqiue unreturned -warpper -wheter +CMD +Codecov +coords +autoencoding +screp diff --git a/man/Startrac.Rd b/man/Startrac.Rd index 92a933d0..2f354b11 100644 --- a/man/Startrac.Rd +++ b/man/Startrac.Rd @@ -6,11 +6,10 @@ \alias{Startrac-class} \title{The Startrac Class} \value{ -method definition for runing startrac +method definition for running startrac } \description{ -The Startrac object store the data for tcr-based T cell dynamics analyis. The slots contained -in Startrac object are listed below: +The Startrac object store the data for tcr-based T cell dynamics analysis. } \section{Slots}{ @@ -34,7 +33,7 @@ level indexes information} index between the two locations indicated in the column name.} \item{\code{pIndex.tran}}{data.frame. Each line for a cluster; pairwise transition -index betwwen the two major clusters indicated by the row name and column name.} +index between the two major clusters indicated by the row name and column name.} \item{\code{cluster.sig.data}}{data.frame. Each line for a cluster; contains the p values of cluster indices.} diff --git a/man/Startrac.run.Rd b/man/Startrac.run.Rd index b66e479f..8bd184c0 100644 --- a/man/Startrac.run.Rd +++ b/man/Startrac.run.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/startrac.R \name{Startrac.run} \alias{Startrac.run} -\title{warpper function for Startrac analysis} +\title{wrapper function for Startrac analysis} \usage{ Startrac.run( cell.data, @@ -21,13 +21,13 @@ Startrac.run( \item{n.perm}{integer. number of permutation will be performed. If NULL, no permutation. (default: NULL)} -\item{verbose}{logical. wheter return intermediate result (some Startrac objects)} +\item{verbose}{logical. whether return intermediate result (some Startrac objects)} } \value{ an list contains data.frame elements "cluster.data","pIndex.migr" and "pIndex.tran" } \description{ -warpper function for Startrac analysis +wrapper function for Startrac analysis } \details{ run the Startrac pipeline diff --git a/man/StartracOut.Rd b/man/StartracOut.Rd index bacd0e5b..0e8378d5 100644 --- a/man/StartracOut.Rd +++ b/man/StartracOut.Rd @@ -11,19 +11,27 @@ Object store the result of Startrac.run: \section{Slots}{ \describe{ -\item{\code{proj}}{character. identification of the object. For example, patient id. default: "AID"} +\item{\code{proj}}{character. identification of the object. For example, patient id. +default: "AID"} -\item{\code{cluster.data}}{data.frame. Each line for a cluster; contain the cluster level indexes information} +\item{\code{cluster.data}}{data.frame. Each line for a cluster; contain the cluster +level indexes information} -\item{\code{pIndex.migr}}{data.frame. Each line for a cluster; pairwise migration index between the two locations indicated in the column name.} +\item{\code{pIndex.migr}}{data.frame. Each line for a cluster; pairwise migration +index between the two locations indicated in the column name.} -\item{\code{pIndex.tran}}{data.frame. Each line for a cluster; pairwise transition index betwwen the two major clusters indicated by the row name and column name.} +\item{\code{pIndex.tran}}{data.frame. Each line for a cluster; pairwise transition +index between the two major clusters indicated by the row name and column +name.} -\item{\code{cluster.sig.data}}{data.frame. Each line for a cluster; contains the p values of cluster indices.} +\item{\code{cluster.sig.data}}{data.frame. Each line for a cluster; contains the p +values of cluster indices.} -\item{\code{pIndex.sig.migr}}{data.frame. Each line for a cluster; contains the p values of pairwise migration indices.} +\item{\code{pIndex.sig.migr}}{data.frame. Each line for a cluster; contains the p +values of pairwise migration indices.} -\item{\code{pIndex.sig.tran}}{data.frame. Each line for a cluster; contains the p values of pairwise transition indices.} +\item{\code{pIndex.sig.tran}}{data.frame. Each line for a cluster; contains the p +values of pairwise transition indices.} \item{\code{objects}}{list. other objects} }} diff --git a/man/combineBCR.Rd b/man/combineBCR.Rd index d73e9a75..ac7474ea 100644 --- a/man/combineBCR.Rd +++ b/man/combineBCR.Rd @@ -49,8 +49,8 @@ combineBCR produces a column CTstrict of an index of nucleotide sequence and the corresponding v-gene. This index automatically calculates the Levenshtein distance between sequences with the same V gene and will index sequences with <= 0.15 normalized Levenshtein distance with the same -ID. After which, clonotype clusters are called using the igraph -component() function. Clonotype that are clustered across multiple +ID. After which, clonotype clusters are called using the +`igraph:: component()` function. Clonotype that are clustered across multiple sequences will then be labeled with "LD" in the CTstrict header. } \examples{ diff --git a/man/combineExpression.Rd b/man/combineExpression.Rd index a18350a3..d43f56e3 100644 --- a/man/combineExpression.Rd +++ b/man/combineExpression.Rd @@ -44,7 +44,7 @@ clonotype information} \item{addLabel}{This will add a label to the frequency header, allowing the user to try multiple group.by variables or recalculate frequencies after -subseting the data.} +subsetting the data.} } \value{ seurat or SingleCellExperiment object with attached clonotype diff --git a/man/combineTCR.Rd b/man/combineTCR.Rd index b893136d..f6ca727a 100644 --- a/man/combineTCR.Rd +++ b/man/combineTCR.Rd @@ -31,14 +31,19 @@ corresponding chains with the highest expression for a single barcode.} List of clonotypes for individual cell barcodes } \description{ -This function consolidates a list of TCR sequencing results to the level of -the individual cell barcodes. Using the samples and ID parameters, the -function will add the strings as prefixes to prevent issues with repeated -barcodes. The resulting new barcodes will need to match the Seurat or SCE -object in order to use, \code{\link{combineExpression}}. Several -levels of filtering exist - remove or filterMulti are parameters that -control how the function deals with barcodes with multiple chains -recovered. +This function consolidates a list of TCR sequencing results to +the level of the individual cell barcodes. Using the samples and ID +parameters, the function will add the strings as prefixes to prevent issues +with repeated barcodes. The resulting new barcodes will need to match the +Seurat or SCE object in order to use, \code{\link{combineExpression}}. +Several levels of filtering exist - `removeNA`, `removeMulti`, or +`filterMulti` are parameters that control how the function deals with +barcodes with multiple chains recovered. +} +\details{ +For single-sample TCR sequencing experiments, where the input is +just a single data.frame, the function will add the consolidated information +columns after the existing raw clonotype data } \examples{ combineTCR(contig_list, diff --git a/man/combined_mini_contig_list.Rd b/man/combined_mini_contig_list.Rd new file mode 100644 index 00000000..4f01dff9 --- /dev/null +++ b/man/combined_mini_contig_list.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{combined_mini_contig_list} +\alias{combined_mini_contig_list} +\title{Processed subset of `contig_list`} +\format{ +An R `list` of `data.frame` objects +} +\usage{ +data("combined_mini_contig_list") +} +\description{ +A list of 6 dataframes of T cell contigs outputted from the +`filtered_contig_annotation` files, but subsetted to about 92 valid T cells +which correspond to the same barcodes found in `screp_example` +} +\seealso{ +\code{\link{contig_list}} +} +\keyword{datasets} diff --git a/man/contig_list.Rd b/man/contig_list.Rd index 97a2c118..b2440c3f 100644 --- a/man/contig_list.Rd +++ b/man/contig_list.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/contig_list.R +% Please edit documentation in R/data.R \docType{data} \name{contig_list} \alias{contig_list} diff --git a/man/scRepertoire-package.Rd b/man/scRepertoire-package.Rd new file mode 100644 index 00000000..8e5ddd84 --- /dev/null +++ b/man/scRepertoire-package.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scRepertoire-package.R +\docType{package} +\name{scRepertoire-package} +\alias{scRepertoire} +\alias{scRepertoire-package} +\title{scRepertoire: A toolkit for single-cell immune receptor profiling} +\description{ +scRepertoire was built to process data derived from the 10x Genomics Chromium Immune Profiling for both T-cell receptor (TCR) and immunoglobulin (Ig) enrichment workflows and subsequently interacts with the popular Seurat and SingleCellExperiment R packages. It also allows for general analysis of single-cell clonotype information without the use of expression information. The package functions as a wrapper for Startrac and powerTCR R packages. +} +\author{ +\strong{Maintainer}: Nick Borcherding \email{ncborch@gmail.com} + +} +\keyword{internal} diff --git a/man/scatterClonotype.Rd b/man/scatterClonotype.Rd index 62bd01f6..4208a509 100644 --- a/man/scatterClonotype.Rd +++ b/man/scatterClonotype.Rd @@ -13,7 +13,8 @@ scatterClonotype( dot.size = "total", split.by = NULL, graph = "proportion", - exportTable = FALSE + exportTable = FALSE, + seed = NULL ) } \arguments{ @@ -39,6 +40,8 @@ to group the new list. NULL will return clusters.} \item{graph}{graph either proportion or raw clonotype count} \item{exportTable}{Returns the data frame used for forming the graph.} + +\item{seed}{the integer seed to set for the random variation of point coords.} } \value{ ggplot of the relative clonotype numbers diff --git a/man/screp_example.Rd b/man/screp_example.Rd index c29ecd04..f2a058a0 100644 --- a/man/screp_example.Rd +++ b/man/screp_example.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/screp_example.R +% Please edit documentation in R/data.R \docType{data} \name{screp_example} \alias{screp_example} -\title{A seurat object of 1000 single T cells derived +\title{A seurat object of 100 single T cells derived from 3 clear cell renal carcinoma patients.} \description{ -A seurat object of 1000 single T cells derived -from 3 clear cell renal carcinoma patients. +The object is compatible with `contig_list` and the TCR +sequencing data can be added with `combineExpression`. } diff --git a/man/vizGenes.Rd b/man/vizGenes.Rd index 87696b58..24f359c4 100644 --- a/man/vizGenes.Rd +++ b/man/vizGenes.Rd @@ -34,7 +34,7 @@ gene segments such as V, D, J, or C.} \item{order}{Categorical variable to organize the x-axis, either "gene" or "variance"} \item{scale}{Converts the individual count of genes to proportion using the total -respective reprtoire size} +respective repertoire size} \item{group.by}{The column header used for grouping.} diff --git a/scRepertoire.Rproj b/scRepertoire.Rproj index 566bd78d..3d3340ee 100644 --- a/scRepertoire.Rproj +++ b/scRepertoire.Rproj @@ -5,7 +5,7 @@ SaveWorkspace: Default AlwaysSaveHistory: Default EnableCodeIndexing: Yes -UseSpacesForTab: Yes +UseSpacesForTab: No NumSpacesForTab: 4 Encoding: UTF-8 @@ -17,3 +17,4 @@ AutoAppendNewline: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --as-cran diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 00000000..22034c46 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 00000000..0dc7fc4c --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,31 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// rcpp_hello_world +void rcpp_hello_world(); +RcppExport SEXP _scRepertoire_rcpp_hello_world() { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_hello_world(); + return R_NilValue; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_scRepertoire_rcpp_hello_world", (DL_FUNC) &_scRepertoire_rcpp_hello_world, 0}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_scRepertoire(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/hello_world.cpp b/src/hello_world.cpp new file mode 100644 index 00000000..02307bba --- /dev/null +++ b/src/hello_world.cpp @@ -0,0 +1,6 @@ +#include + +// [[Rcpp::export]] +void rcpp_hello_world() { + Rcpp::Rcout << "hello, world!" << "\n"; +} diff --git a/tests/testthat/_snaps/viz/abundancecontig-scaled-plot.svg b/tests/testthat/_snaps/viz/abundancecontig-scaled-plot.svg new file mode 100644 index 00000000..94131b11 --- /dev/null +++ b/tests/testthat/_snaps/viz/abundancecontig-scaled-plot.svg @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +1000 +2000 + + + + + + + +1 +10 +100 +Abundance +Number of Clonotypes + +Samples + + + + + + +PY_P +PY_T +PX_P +PX_T +PZ_P +PZ_T +abundanceContig_scaled_plot + + diff --git a/tests/testthat/_snaps/viz/clonesizedistribution-vignette-plot.svg b/tests/testthat/_snaps/viz/clonesizedistribution-vignette-plot.svg new file mode 100644 index 00000000..fb5f72c6 --- /dev/null +++ b/tests/testthat/_snaps/viz/clonesizedistribution-vignette-plot.svg @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + +0.00 +0.05 +0.10 +0.15 +0.20 +0.25 + + + + + + + + + + + + + +PY_P + + + + + + + + +PY_T + + + + + + + + + + + + +PX_T +PZ_T + + + + + + + + +PX_P +PZ_P + + diff --git a/tests/testthat/_snaps/viz/compareclonotypes-alluvial-plot.svg b/tests/testthat/_snaps/viz/compareclonotypes-alluvial-plot.svg new file mode 100644 index 00000000..419cbf5b --- /dev/null +++ b/tests/testthat/_snaps/viz/compareclonotypes-alluvial-plot.svg @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.05 +0.10 +0.15 + + + + + + + +PX_P +PX_T +Proportion + +Clonotypes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +CAASAKETSGSRLTF_CASSRILITSGSPLHF +CAASAKETSGSRLTF_NA +CAGDDRGSTLGRLYF_CASSLVEEDEQFF +CAMREGIGGYQKVTF_CSALPASGSDEQFF +CASLQNRDDKIIF_CASGAFGGIGRPYNSPLHF +CATVPNSGGYQKVTF_CASSRPPGGRVSDTQYF +CAVHPRDSSYKLIF_CASSLTVGSAETQYF +CAVRVRMDSSYKLIF_CASSPPDAAYNEQFF +NA_CASGAFGGIGRPYNSPLHF +NA_CASSLTVGSAETQYF +NA_CASSRILITSGSPLHF +CAAGTNAGGTSYGKLTF_CASSPGHDTEAFF +CAESLTDSWGKLQF_CASSPPSNDEQYF +CAESSPGGYNKLIF_CASSWNTGELFF +CAMSYGGSQGNLIF_CASSRDPTSDSTDTQYF +CLPYTQGGSEKLVF_CASRFLGDSPLHF +NA_CASSLVEEDEQFF +compareClonotypes_alluvial_plot + + diff --git a/tests/testthat/_snaps/viz/compareclonotypes-area-plot.svg b/tests/testthat/_snaps/viz/compareclonotypes-area-plot.svg new file mode 100644 index 00000000..8d1f931c --- /dev/null +++ b/tests/testthat/_snaps/viz/compareclonotypes-area-plot.svg @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.05 +0.10 +0.15 + + + + + + + +PX_P +PX_T +Proportion + +Clonotypes + + + + + + + + + + + +CAASAKETSGSRLTF_CASSRILITSGSPLHF +CAASAKETSGSRLTF_NA +CAGDDRGSTLGRLYF_CASSLVEEDEQFF +CAMREGIGGYQKVTF_CSALPASGSDEQFF +CASLQNRDDKIIF_CASGAFGGIGRPYNSPLHF +CATVPNSGGYQKVTF_CASSRPPGGRVSDTQYF +CAVHPRDSSYKLIF_CASSLTVGSAETQYF +CAVRVRMDSSYKLIF_CASSPPDAAYNEQFF +NA_CASGAFGGIGRPYNSPLHF +NA_CASSLTVGSAETQYF +NA_CASSRILITSGSPLHF +compareClonotypes_area_plot + + diff --git a/tests/testthat/_snaps/viz/lengthcontig-both-chain-plot.svg b/tests/testthat/_snaps/viz/lengthcontig-both-chain-plot.svg new file mode 100644 index 00000000..c7b08c07 --- /dev/null +++ b/tests/testthat/_snaps/viz/lengthcontig-both-chain-plot.svg @@ -0,0 +1,393 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +200 +400 +600 + + + + + + + + + + + + + + + + + + + + + + +7 +12 +17 +22 +27 +32 +37 +42 +47 +52 +57 +62 +67 +72 +77 +87 +92 +Length +Number of CDR3 (AA) + +Samples + + + + + + +PY_P +PY_T +PX_P +PX_T +PZ_P +PZ_T +lengthContig_both_chain_plot + + diff --git a/tests/testthat/_snaps/viz/quantcontig-scaled-plot.svg b/tests/testthat/_snaps/viz/quantcontig-scaled-plot.svg new file mode 100644 index 00000000..f72bada6 --- /dev/null +++ b/tests/testthat/_snaps/viz/quantcontig-scaled-plot.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +20 +40 +60 +80 + + + + + + + + + + + + +PY_P +PY_T +PX_P +PX_T +PZ_P +PZ_T +Samples +Percent of Unique Clonotype + +Samples + + + + + + +PY_P +PY_T +PX_P +PX_T +PZ_P +PZ_T +quantContig_scaled_plot + + diff --git a/tests/testthat/_snaps/viz/quantcontig-single-sample-plot.svg b/tests/testthat/_snaps/viz/quantcontig-single-sample-plot.svg new file mode 100644 index 00000000..4145234a --- /dev/null +++ b/tests/testthat/_snaps/viz/quantcontig-single-sample-plot.svg @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0 +1000 +2000 + + + + +Unique Clonotypes +quantContig_single_sample_plot + + diff --git a/tests/testthat/_snaps/viz/scatterclonotype-vignette-plot.svg b/tests/testthat/_snaps/viz/scatterclonotype-vignette-plot.svg new file mode 100644 index 00000000..3402bd43 --- /dev/null +++ b/tests/testthat/_snaps/viz/scatterclonotype-vignette-plot.svg @@ -0,0 +1,3841 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.01 +0.02 +0.03 + + + + + + + + + + + +0.000 +0.005 +0.010 +0.015 +0.020 +0.025 +PY_P +PY_T + +Total n + + + +30 +60 +90 + +class + + + + + +PY_P.multiplet +PY_P.singlet +PY_T.multiplet +PY_T.singlet +dual.expanded +scatterClonotype_vignette_plot + + diff --git a/tests/testthat/_snaps/viz/vizgenes-bar-vignette-plot.svg b/tests/testthat/_snaps/viz/vizgenes-bar-vignette-plot.svg new file mode 100644 index 00000000..a319f7cc --- /dev/null +++ b/tests/testthat/_snaps/viz/vizgenes-bar-vignette-plot.svg @@ -0,0 +1,686 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +PX + + + + + + + + + + +PY + + + + + + + + + + +PZ + + + +TRBV20-1 +TRBV28 +TRBV2 +TRBV5-1 +TRBV6-5 +TRBV24-1 +TRBV19 +TRBV27 +TRBV7-9 +TRBV10-1 +TRBV12-4 +TRBV5-6 +TRBV4-1 +TRBV5-4 +TRBV3-1 +TRBV18 +TRBV11-1 +TRBV9 +TRBV7-2 +TRBV12-3 +TRBV7-3 +TRBV29-1 +TRBV11-2 +TRBV6-3 +TRBV7-8 +TRBV4-3 +TRBV30 +TRBV6-4 +TRBV6-1 +TRBV6-6 +TRBV10-3 +TRBV13 +TRBV14 +TRBV15 +TRBV5-5 +TRBV7-6 +TRBV11-3 +TRBV4-2 +TRBV25-1 +TRBV5-8 +TRBV21-1 +TRBV12-5 +TRBV7-7 +TRBV10-2 +TRBV16 +TRBV7-4 +TRBV5-3 + +0.0 +0.1 +0.2 + + + + +0.0 +0.1 +0.2 + + + + +0.0 +0.1 +0.2 + + + +vizGenes_bar_vignette_plot + + diff --git a/tests/testthat/_snaps/viz/vizgenes-heatmap-vignette-plot.svg b/tests/testthat/_snaps/viz/vizgenes-heatmap-vignette-plot.svg new file mode 100644 index 00000000..c385183f --- /dev/null +++ b/tests/testthat/_snaps/viz/vizgenes-heatmap-vignette-plot.svg @@ -0,0 +1,1186 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +TRBJ1-1 +TRBJ1-2 +TRBJ1-3 +TRBJ1-4 +TRBJ1-5 +TRBJ1-6 +TRBJ2-1 +TRBJ2-2 +TRBJ2-3 +TRBJ2-4 +TRBJ2-5 +TRBJ2-6 +TRBJ2-7 + + + + + + + + + + + + + + +TRBV10-1 +TRBV10-2 +TRBV10-3 +TRBV11-1 +TRBV11-2 +TRBV11-3 +TRBV12-3 +TRBV12-4 +TRBV12-5 +TRBV13 +TRBV14 +TRBV15 +TRBV16 +TRBV18 +TRBV19 +TRBV2 +TRBV20-1 +TRBV21-1 +TRBV24-1 +TRBV25-1 +TRBV27 +TRBV28 +TRBV29-1 +TRBV3-1 +TRBV30 +TRBV4-1 +TRBV4-2 +TRBV4-3 +TRBV5-1 +TRBV5-3 +TRBV5-4 +TRBV5-5 +TRBV5-6 +TRBV5-8 +TRBV6-1 +TRBV6-3 +TRBV6-4 +TRBV6-5 +TRBV6-6 +TRBV7-2 +TRBV7-3 +TRBV7-6 +TRBV7-7 +TRBV7-8 +TRBV7-9 +TRBV9 + + +0.02 +0.04 +0.06 +Scaled Values + + + + + + +vizGenes_heatmap_vignette_plot + + diff --git a/tests/testthat/helper-testing_functions.R b/tests/testthat/helper-testing_functions.R new file mode 100644 index 00000000..55f04651 --- /dev/null +++ b/tests/testthat/helper-testing_functions.R @@ -0,0 +1,3 @@ +getdata <- function(dir, name) { + readRDS(paste("testdata/", dir, "/", name, ".rds", sep = "")) +} diff --git a/tests/testthat/helper-vdiffr.R b/tests/testthat/helper-vdiffr.R new file mode 100644 index 00000000..eb4b8ea0 --- /dev/null +++ b/tests/testthat/helper-vdiffr.R @@ -0,0 +1,15 @@ +# By default, if vdiffr is not installed, all visual tests are skipped unless +# VDIFFR_RUN_TESTS is explicitly set to "true", which should be the case only on +# a GitHub Actions CI runner with stable version of R. + +if (requireNamespace("vdiffr", quietly = TRUE) && utils::packageVersion('testthat') >= '3.0.3') { + expect_doppelganger <- vdiffr::expect_doppelganger +} else { + # If vdiffr is not available and visual tests are explicitly required, raise error. + if (identical(Sys.getenv("VDIFFR_RUN_TESTS"), "true")) { + rlang::abort("vdiffr is not installed") + } + + # Otherwise, assign a dummy function + expect_doppelganger <- function(...) skip("vdiffr is not installed.") +} diff --git a/tests/testthat/test-combineContigs.R b/tests/testthat/test-combineContigs.R new file mode 100644 index 00000000..4ef32f8a --- /dev/null +++ b/tests/testthat/test-combineContigs.R @@ -0,0 +1,25 @@ +# test script for combineContigs.R - testcases are NOT comprehensive! + +test_that("combineTCR works", { + data("contig_list") + + trial1 <- combineTCR( + df = lapply(contig_list[1:3], head), + samples = c("PY", "PY", "PX"), + ID = c("P", "T", "P") + ) + expected1 <- readRDS("testdata/combineContigs/combineTCR_list_expected.rds") + + expect_identical(trial1, expected1) + + trial2 <- combineTCR( + head(contig_list[[1]]), samples = "PY", ID = "P" + )[[1]] + expected2 <- expected1[[1]] + + expect_identical(trial2, expected2) +}) + +# TODO combineTCR (need more edge cases, different args, errors, etc.) +# TODO combineBCR +# TODO lvCompare diff --git a/tests/testthat/test-seuratFunctions.R b/tests/testthat/test-seuratFunctions.R new file mode 100644 index 00000000..4cb7625d --- /dev/null +++ b/tests/testthat/test-seuratFunctions.R @@ -0,0 +1,20 @@ +# test script for seuratFunctions.R - testcases are NOT comprehensive! + +test_that("combineExpression works with seurat objects", { + data("combined_mini_contig_list", "screp_example") + combined_test <- combineExpression(combined_mini_contig_list, screp_example) + + expect_length(combined_test@meta.data, 15) + expect_equal(combined_test@meta.data[, 1:8], screp_example@meta.data[, 1:8]) + expect_equal( + combined_test@meta.data[, 9:15], + getdata("seuratFunctions", "combineExpression_new_metadata") + ) +}) + +# TODO more testcases for combineEXpression, especially with SCE objects +# TODO highlightClonotypes +# TODO alluvialClonotypes +# TODO occupiedscRepertoire +# TODO clonalOverlay +# TODO createHTOContigList diff --git a/tests/testthat/test-seurat_command.R b/tests/testthat/test-seurat_command.R new file mode 100644 index 00000000..f70d0667 --- /dev/null +++ b/tests/testthat/test-seurat_command.R @@ -0,0 +1,31 @@ +test_that("make_screp_seurat_cmd works", { + data("screp_example", "combined_mini_contig_list") + + test_obj <- combineExpression(combined_mini_contig_list, screp_example) + expect_true(!is.null(test_obj@commands[["combineExpression"]])) + + test_obj <- test_obj@commands[["combineExpression"]] + + expect_identical(test_obj@name, "combineExpression") + expect_identical(class(test_obj@time.stamp), c("POSIXct", "POSIXt")) + expect_identical(test_obj@assay.used, "integrated") + expect_identical( + test_obj@call.string, + "combineExpression(combined_mini_contig_list, screp_example)" + ) + expect_equal( + test_obj@params, + list( + cloneCall = "CTstrict", chain = "both", group.by = "none", + proportion = TRUE, filterNA = FALSE, + cloneTypes = c( + `None ( < X <= 0)` = 0, `Rare (0 < X <= 1e-04)` = 1e-04, + `Small (1e-04 < X <= 0.001)` = 0.001, + `Medium (0.001 < X <= 0.01)` = 0.01, + `Large (0.01 < X <= 0.1)` = 0.1, + `Hyperexpanded (0.1 < X <= 1)` = 1 + ), + addLabel = FALSE + ) + ) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index d08471ae..b3d158b7 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,10 +1,139 @@ +# test script for utils.R - testcases are NOT comprehensive! + test_that("'%!in%' works", { - vector <- c(1, 2, 3, 4, 5) + v <- c(1, 2, 3, 4, 5) - expect_true(0 %!in% vector) - expect_true(6 %!in% vector) - expect_false(3 %!in% vector) - expect_false(5 %!in% vector) + expect_true(0 %!in% v) + expect_true(6 %!in% v) + expect_false(3 %!in% v) + expect_false(5 %!in% v) expect_true(1 %!in% NULL) expect_true(list(1) %!in% NA) }) + +# TODO off.the.chain +# TODO checkBlanks +# TODO groupList + +test_that("checkList works", { + data("contig_list") + expect_identical(checkList(contig_list), contig_list) + expect_identical(checkList(contig_list[[1]]), list(contig_list[[1]])) + expect_identical(checkList(contig_list[[1]])[[1]], contig_list[[1]]) + # no idea what to put to make the stop message happen. perhaps with data.table(contig_list) ? but then it shouldnt fail :P +}) + +test_that("checkContigs works", { + input <- list( + df1 = data.frame(a = c("x", "", "z"), b = c("1", "2", "3")), + df2 = data.frame(c = c("foo", "bar", ""), d = c("", "spam", "eggs")) + ) + expected <- checkContigs(input) + expect_true(is.list(expected)) + expect_true(is.data.frame(expected[[1]])) + expect_equal(expected[[1]]$a, c("x", NA, "z")) + expect_equal(expected[[1]]$b, c("1", "2", "3")) + expect_true(is.data.frame(expected[[2]])) + expect_equal(expected[[2]]$c, c("foo", "bar", NA)) + expect_equal(expected[[2]]$d, c(NA, "spam", "eggs")) +}) + +# TODO bound.input.return +# TODO get.coord +# TODO checkSingleObject +# TODO grabMeta + +# Test cases +test_that("Modifying barcodes without ID works correctly", { + samples <- c("sample1", "sample2") + modified_data <- modifyBarcodes( + df = getdata("utils", "df_list"), samples = samples, ID = NULL + ) + + expected_modified_data <- list( + data.frame( + barcode = c("sample1_A", "sample1_B", "sample1_C"), + value = c(10, 20, 30) + ), + data.frame( + barcode = c("sample2_X", "sample2_Y", "sample2_Z"), + value = c(100, 200, 300) + ) + ) + + expect_identical(modified_data, expected_modified_data) +}) + +test_that("Modifying barcodes with ID works correctly", { + samples <- c("sample3", "sample4") + ID <- c("id1", "id2") + modified_data <- modified_data <- modifyBarcodes( + df = getdata("utils", "df_list"), samples = samples, ID = ID + ) + + expected_modified_data <- list( + data.frame( + barcode = c("sample3_id1_A", "sample3_id1_B", "sample3_id1_C"), + value = c(10, 20, 30) + ), + data.frame( + barcode = c("sample4_id2_X", "sample4_id2_Y", "sample4_id2_Z"), + value = c(100, 200, 300) + ) + ) + + expect_identical(modified_data, expected_modified_data) +}) + +# TODO removingNA +# TODO removingMulti + +test_that("filteringMulti works", { + expect_identical( + filteringMulti(head(contig_list[[1]])), + getdata("utils", "filteringMulti_expected") + ) +}) + +# TODO filteringNA +# TODO diversityCall +# TODO parseContigs +# TODO morisitaIndex +# TODO jaccardIndex +# TODO rawIndex +# TODO overlapIndex +# TODO quiet +# TODO theCall +# TODO parseTCR +# TODO parseBCR +# TODO lengthDF +# TODO assignCT + +test_that("makeGenes works for cellType T", { + expect_identical( + makeGenes("T", getdata("utils", "makeGenes_T_input")), + getdata("utils", "makeGenes_T_expected") + ) +}) + +# TODO makesGenes (cellType B) +# TODO short.check +# TODO select.gene + +test_that("Check if object is a dataframe or list of dataframes", { + df <- data.frame(x = 1:5, y = letters[1:5]) + list_of_dfs <- list( + data.frame(a = 1:3, b = letters[1:3]), + data.frame(x = 4:6, y = letters[4:6]) + ) + mixed_list <- list( + data.frame(a = 1:3, b = letters[1:3]), + "not a dataframe" + ) + + expect_true(is_df_or_list_of_df(df)) + expect_true(is_df_or_list_of_df(list_of_dfs)) + expect_false(is_df_or_list_of_df(mixed_list)) + expect_false(is_df_or_list_of_df(list())) + expect_false(is_df_or_list_of_df(c(1, 2, 3, 4, 5))) +}) diff --git a/tests/testthat/test-viz.R b/tests/testthat/test-viz.R new file mode 100644 index 00000000..1ec5f91c --- /dev/null +++ b/tests/testthat/test-viz.R @@ -0,0 +1,133 @@ +# test script for viz.R - testcases are NOT comprehensive! +# TODO all functions need to be tested for a single sample - however, many don't +# really work properly for single samples + +# testdata: (assumes combineTCR works) +combined <- combineTCR( + contig_list, + samples = c("PY", "PY", "PX", "PX", "PZ","PZ"), + ID = c("P", "T", "P", "T", "P", "T") +) + +single_contig <- combineTCR(contig_list[[1]]) + +single_contig_with_sample <- combineTCR( + contig_list[[1]], samples = "PX", ID = "P" +) +# TODO test more cases with single_contig + +test_that("quantContig works", { + expect_doppelganger( + "quantContig_scaled_plot", quantContig(combined, scale = TRUE) + ) + expect_equal( + quantContig(combined, scale = TRUE, exportTable = TRUE), + data.frame( + "contigs" = c(2712L, 1585L, 823L, 918L, 1143L, 768L), + "values" = c("PY_P", "PY_T", "PX_P", "PX_T", "PZ_P", "PZ_T"), + "total" = c(3208L, 3119L, 1068L, 1678L, 1434L, 2768L), + "scaled" = c( + 84.5386533665835, 50.8175697338891, 77.059925093633, + 54.7079856972586, 79.7071129707113, 27.7456647398844 + ) + ) + ) + + expect_doppelganger( + "quantContig_single_sample_plot", quantContig(single_contig) + ) + expect_identical( + quantContig(single_contig, exportTable = TRUE), + data.frame("contigs" = 2712L, "total" = 3208L) + ) +}) + +test_that("abundanceContig works", { + expect_doppelganger( + "abundanceContig_scaled_plot", abundanceContig(combined, scale = FALSE) + ) +}) + +test_that("lengthContig works", { + expect_doppelganger( + "lengthContig_both_chain_plot", lengthContig(combined, chain = "both") + ) +}) + +test_that("compareClonotypes works", { + expect_doppelganger( + "compareClonotypes_alluvial_plot", + compareClonotypes( + combined, + numbers = 10, + samples = c("PX_P", "PX_T"), + cloneCall="aa", + graph = "alluvial" + ) + ) + + expect_doppelganger( + "compareClonotypes_area_plot", + compareClonotypes( + combined, + numbers = 10, + samples = c("PX_P", "PX_T"), + cloneCall="aa", + graph = "area" + ) + ) +}) + +test_that("scatterClonotype works", { + expect_doppelganger( + "scatterClonotype_vignette_plot", + scatterClonotype( + combined, + cloneCall ="gene", + x.axis = "PY_P", + y.axis = "PY_T", + dot.size = "total", + graph = "proportion", + seed = 42 + ) + ) + + # TODO test the exportTable arg +}) + +# something in `clonesizeDistribution` prints "NULL" to the terminal +test_that("clonesizeDistribution works", { + expect_doppelganger( + "clonesizeDistribution_vignette_plot", + clonesizeDistribution(combined, cloneCall = "gene+nt", method="ward.D2") + ) +}) + +# TODO makingLodes + +test_that("vizGenes works", { + expect_doppelganger( + "vizGenes_bar_vignette_plot", + vizGenes( + combined, + gene = "V", + chain = "TRB", + plot = "bar", + order = "variance", + scale = TRUE + ) + ) + + expect_doppelganger( + "vizGenes_heatmap_vignette_plot", + vizGenes( + combined[c(1,3,5)], + gene = "V", + chain = "TRB", + y.axis = "J", + plot = "heatmap", + scale = TRUE, + order = "gene" + ) + ) +}) diff --git a/tests/testthat/testdata/combineContigs/combineTCR_list_expected.rds b/tests/testthat/testdata/combineContigs/combineTCR_list_expected.rds new file mode 100644 index 00000000..5d08d24f Binary files /dev/null and b/tests/testthat/testdata/combineContigs/combineTCR_list_expected.rds differ diff --git a/tests/testthat/testdata/seuratFunctions/combineExpression_new_metadata.rds b/tests/testthat/testdata/seuratFunctions/combineExpression_new_metadata.rds new file mode 100644 index 00000000..a40e4ea4 Binary files /dev/null and b/tests/testthat/testdata/seuratFunctions/combineExpression_new_metadata.rds differ diff --git a/tests/testthat/testdata/utils/df_list.rds b/tests/testthat/testdata/utils/df_list.rds new file mode 100644 index 00000000..f1a726cd Binary files /dev/null and b/tests/testthat/testdata/utils/df_list.rds differ diff --git a/tests/testthat/testdata/utils/filteringMulti_expected.rds b/tests/testthat/testdata/utils/filteringMulti_expected.rds new file mode 100644 index 00000000..02814263 Binary files /dev/null and b/tests/testthat/testdata/utils/filteringMulti_expected.rds differ diff --git a/tests/testthat/testdata/utils/makeGenes_T_expected.rds b/tests/testthat/testdata/utils/makeGenes_T_expected.rds new file mode 100644 index 00000000..b5cbc1e4 Binary files /dev/null and b/tests/testthat/testdata/utils/makeGenes_T_expected.rds differ diff --git a/tests/testthat/testdata/utils/makeGenes_T_input.rds b/tests/testthat/testdata/utils/makeGenes_T_input.rds new file mode 100644 index 00000000..c69bd91b Binary files /dev/null and b/tests/testthat/testdata/utils/makeGenes_T_input.rds differ diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 10590c62..0337b346 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -16,22 +16,21 @@ vignette: > --- ```{r, echo=FALSE, results="hide", message=FALSE} -knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) +knitr::opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE) # to pass R CMD check, packages can be installed in the knitted environment if (!require("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } -library(BiocManager) +suppressPackageStartupMessages(invisible(base::library(BiocManager))) quiet_bioc_load <- function(...) { - pkgs <- list(...) - for (pkg in pkgs) { - if (!require(pkg, character.only = TRUE, quietly = TRUE)) { - BiocManager::install(pkg) - } - suppressPackageStartupMessages(invisible(library(pkg, character.only = TRUE))) - } + pkgs <- list(...) + for (pkg in pkgs) { + if (base::require(pkg, character.only = TRUE, quietly = TRUE)) {next} + BiocManager::install(pkg) + suppressPackageStartupMessages(invisible(base::library(pkg, character.only = TRUE))) + } } quiet_bioc_load("BiocStyle", "scater") @@ -760,7 +759,7 @@ sub_combined <- clusterTCR(combined[[2]], From the excellent work by Lei Zhang, et al in [Lineage tracking reveals dynamic relationships of T cells in colorectal cancer](https://www.nature.com/articles/s41586-018-0694-x), the authors introduce new methods for looking at clonotypes by cellular origins and cluster identification. Their [startrac](https://github.com/Japrin/STARTRAC) software has been incorporated into scRepertoire. If you are using the output of this specific function, please cite their excellent work. -In order to use the ```StartracDiversity()``` function, you will need to include the product of the ```combinedSeurat()``` function. The second requirement is a column header in the meta data of the Seurat object that has tissue of origin. In the example data,**type** corresponds to the column "Type", which includes the "P" and "T" classifier. The indices can be subseted for a specific patient or examined overall using the **by** variable. Importantly, the function uses only the strict definition of clonotype of the VDJC genes and the CDR3 nucleotide sequence. +In order to use the ```StartracDiversity()``` function, you will need to include the product of the ```combinedSeurat()``` function. The second requirement is a column header in the meta data of the Seurat object that has tissue of origin. In the example data,**type** corresponds to the column "Type", which includes the "P" and "T" classifier. The indices can be subsetted for a specific patient or examined overall using the **by** variable. Importantly, the function uses only the strict definition of clonotype of the VDJC genes and the CDR3 nucleotide sequence. *The indices output includes:* + expa - Clonal Expansion