diff --git a/DESCRIPTION b/DESCRIPTION index fe05de59..bd69f2d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ RoxygenNote: 7.2.3 biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing Depends: ggplot2, - R (>= 4.0) + R (>= 4.0), Seurat Imports: stringdist, diff --git a/R/data.R b/R/data.R index b0404de7..a3b676d1 100644 --- a/R/data.R +++ b/R/data.R @@ -5,10 +5,29 @@ #' NULL -#' A seurat object of 1000 single T cells derived +#' A seurat object of 100 single T cells derived #' from 3 clear cell renal carcinoma patients. +#' +#' @description The object is compatible with `contig_list` and the TCR +#' sequencing data can be added with `combineExpression`. +#' #' @name screp_example #' @docType data #' -#' NULL + +#' Processed subset of `contig_list` +#' +#' @description A list of 6 dataframes of T cell contigs outputted from the +#' `filtered_contig_annotation` files, but subsetted to about 92 valid T cells +#' which correspond to the same barcodes found in `screp_example` +#' +#' @usage data("combined_mini_contig_list") +#' +#' @format An R `list` of `data.frame` objects +#' +#' @docType data +#' +#' @seealso \code{\link{contig_list}} +#' +"combined_mini_contig_list" diff --git a/R/seuratFunctions.R b/R/seuratFunctions.R index e752c04e..08629d17 100644 --- a/R/seuratFunctions.R +++ b/R/seuratFunctions.R @@ -42,7 +42,7 @@ #' clonotype information #' @param addLabel This will add a label to the frequency header, allowing #' the user to try multiple group.by variables or recalculate frequencies after -#' subseting the data. +#' subsetting the data. #' @importFrom dplyr bind_rows %>% summarise #' @importFrom rlang %||% #' @importFrom SummarizedExperiment colData<- colData @@ -147,11 +147,10 @@ combineExpression <- function( } warn_str <- "< 1% of barcodes match: Ensure the barcodes in - the Seurat object match the - barcodes in the combined immune receptor list from - scRepertoire - most common issue is the addition of the - prefixes corresponding to `samples` and 'ID' in the combineTCR/BCR() - functions" + the Seurat object match the barcodes in the combined immune receptor + list from scRepertoire - most common issue is the addition of the + prefixes corresponding to `samples` and 'ID' in the combineTCR/BCR() + functions" if (is_seurat_object(sc)) { if (length(which(rownames(PreMeta) %in% diff --git a/R/startrac.R b/R/startrac.R index fe2ebe48..a5173c99 100644 --- a/R/startrac.R +++ b/R/startrac.R @@ -111,7 +111,7 @@ StartracDiversity <- function(sc, #' @slot pIndex.migr data.frame. Each line for a cluster; pairwise migration #' index between the two locations indicated in the column name. #' @slot pIndex.tran data.frame. Each line for a cluster; pairwise transition -#' index betwwen the two major clusters indicated by the row name and column name. +#' index between the two major clusters indicated by the row name and column name. #' @slot cluster.sig.data data.frame. Each line for a cluster; contains the #' p values of cluster indices. #' @slot pIndex.sig.migr data.frame. Each line for a cluster; contains the @@ -129,7 +129,7 @@ StartracDiversity <- function(sc, #' @name Startrac #' @rdname Startrac #' @aliases Startrac-class -#' @return method definition for runing startrac +#' @return method definition for running startrac Startrac <- setClass("Startrac", slots = c(aid = "character", cell.data = "data.frame", @@ -527,7 +527,7 @@ mcol.entropy <- function(x) return(H) } -#' warpper function for Startrac analysis +#' wrapper function for Startrac analysis #' @importFrom reshape2 dcast #' @importFrom plyr ldply adply llply #' @importFrom parallel makeCluster stopCluster @@ -537,7 +537,7 @@ mcol.entropy <- function(x) #' @param proj character. String used to annotate the project. #' @param cores integer. number of core to be used. default: NULL. #' @param n.perm integer. number of permutation will be performed. If NULL, no permutation. (default: NULL) -#' @param verbose logical. wheter return intermediate result (some Startrac objects) +#' @param verbose logical. whether return intermediate result (some Startrac objects) #' @details run the Startrac pipeline #' @keywords internal #' @return an list contains data.frame elements "cluster.data","pIndex.migr" and "pIndex.tran" diff --git a/R/viz.R b/R/viz.R index 762b0d2d..9dbf3302 100644 --- a/R/viz.R +++ b/R/viz.R @@ -668,7 +668,7 @@ makingLodes <- function(meta2, color, alpha, facet, set.axes) { #' gene segments such as V, D, J, or C. #' @param order Categorical variable to organize the x-axis, either "gene" or "variance" #' @param scale Converts the individual count of genes to proportion using the total -#' respective reprtoire size +#' respective repertoire size #' @param group.by The column header used for grouping. #' @param split.by If using a single-cell object, the column header #' to group the new list. NULL will return clusters. diff --git a/README.md b/README.md index 70fc39b3..3d1b1cf2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Single-cell sequencing is an emerging technology in the field of immunology and oncology that allows researchers to couple RNA quantification and other modalities, like immune cell receptor profiling at the level of an individual cell. A number of workflows and software packages have been created to process and analyze single-cell transcriptomic data. These packages allow users to take the vast dimensionality of the data generated in single-cell-based experiments and distill the data into novel insights. Unlike the transcriptomic field, there is a lack of options for software that allow for single-cell immune receptor profiling. Enabling users to easily combine RNA and immune profiling, scRepertoire was built to process data derived from the 10x Genomics Chromium Immune Profiling for both T-cell receptor (TCR) and immunoglobulin (Ig) enrichment workflows and subsequently interacts with the popular Seurat R package. ### Applying Deep Learning to VDJ data -scRepertoire is compatible and integrated with the R packages [Trex](https://github.com/ncborcherding/Trex) for deep-learning-based autencoding of the T cell receptor and [Ibex](https://github.com/ncborcherding/Ibex) for the B cell receptor. +scRepertoire is compatible and integrated with the R packages [Trex](https://github.com/ncborcherding/Trex) for deep-learning-based autoencoding of the T cell receptor and [Ibex](https://github.com/ncborcherding/Ibex) for the B cell receptor. ### Wrapper Functions scRepertoire v1.0.2 has the functionality of the [powerTCR](https://github.com/hillarykoch/powerTCR) approach to comparing clone size distribution, [please cite](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006571) the manuscript if using the ```clonesizeDistribution()``` function. In addition, we recently added the [Startrac](https://github.com/Japrin/STARTRAC) clonotype metrics, if using the ```StartracDiversity()``` please read and cite [the accompanying article](https://www.nature.com/articles/s41586-018-0694-x). @@ -44,7 +44,7 @@ BiocManager::install("scRepertoire") ### Getting Data -Unfortunately, Github limits the size of individual files. In order to access the seurat object paired with scRepetoire please download the .rda from [here](https://drive.google.com/file/d/1Iv6t2BScpnLLrFWaWFUGwne3XzRAwMOc/view?usp=share_link). +Unfortunately, Github limits the size of individual files. In order to access the seurat object paired with scRepertoire please download the .rda from [here](https://drive.google.com/file/d/1Iv6t2BScpnLLrFWaWFUGwne3XzRAwMOc/view?usp=share_link). ### Learning To Use scRepertoire diff --git a/data/combined_mini_contig_list.rda b/data/combined_mini_contig_list.rda new file mode 100644 index 00000000..4cd0b7bc Binary files /dev/null and b/data/combined_mini_contig_list.rda differ diff --git a/data/screp_example.rda b/data/screp_example.rda index 4dd65f08..550e9fe2 100644 Binary files a/data/screp_example.rda and b/data/screp_example.rda differ diff --git a/inst/CITATION b/inst/CITATION index aa30aab3..472966aa 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,13 +1,18 @@ citHeader("To cite scRepertoire in publications use:") citEntry( - entry = "Article", - title = "scRepertoire: An R-based toolkit for single-cell immune receptor analysis", - author = {Borcherding, Nicholas and Bormann, Nicholas L and Kraus, Gloria}, - journal = "F1000Research", - volume = 9, - year = 2022, - publisher = Faculty of 1000 Ltd - url = https://doi.org/10.12688/f1000research.22139.2, + entry = "Article", + title = "scRepertoire: An R-based toolkit for single-cell immune receptor analysis", + author = personList( + as.person("Nicholas Borcherding"), + as.person("Nicholas L Bormann"), + as.person("Gloria Kraus") + ), + journal = "F1000Research", + volume = "9", + year = "2022", + publisher = "Faculty of 1000 Ltd", + doi = "10.12688/f1000research.22139.2", + url = "https://doi.org/10.12688/f1000research.22139.2", textVersion = "Borcherding N, Bormann NL and Kraus G. scRepertoire: An R-based toolkit for single-cell immune receptor analysis [version 2; peer review: 2 approved]. F1000Research 2020, 9:47 (https://doi.org/10.12688/f1000research.22139.2)" ) diff --git a/inst/WORDLIST b/inst/WORDLIST index ab652fd4..2ad426fa 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -65,11 +65,8 @@ aa airr al alluvialClonotypes -analyis -autencoding barcode barcodes -betwwen changeNames circlize clonalNetwork @@ -134,15 +131,10 @@ rda registerDoParallel removeMulti removeNA -reprtoire -runing -scRepetoire scater seurat startrac stripBarcode -subseted -subseting subtype sys tcr @@ -152,5 +144,6 @@ transcriptomic tsv unqiue unreturned -warpper -wheter +CMD +Codecov +coords diff --git a/man/Startrac.Rd b/man/Startrac.Rd index 92a933d0..3783e76b 100644 --- a/man/Startrac.Rd +++ b/man/Startrac.Rd @@ -6,7 +6,7 @@ \alias{Startrac-class} \title{The Startrac Class} \value{ -method definition for runing startrac +method definition for running startrac } \description{ The Startrac object store the data for tcr-based T cell dynamics analyis. The slots contained @@ -34,7 +34,7 @@ level indexes information} index between the two locations indicated in the column name.} \item{\code{pIndex.tran}}{data.frame. Each line for a cluster; pairwise transition -index betwwen the two major clusters indicated by the row name and column name.} +index between the two major clusters indicated by the row name and column name.} \item{\code{cluster.sig.data}}{data.frame. Each line for a cluster; contains the p values of cluster indices.} diff --git a/man/Startrac.run.Rd b/man/Startrac.run.Rd index b66e479f..8bd184c0 100644 --- a/man/Startrac.run.Rd +++ b/man/Startrac.run.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/startrac.R \name{Startrac.run} \alias{Startrac.run} -\title{warpper function for Startrac analysis} +\title{wrapper function for Startrac analysis} \usage{ Startrac.run( cell.data, @@ -21,13 +21,13 @@ Startrac.run( \item{n.perm}{integer. number of permutation will be performed. If NULL, no permutation. (default: NULL)} -\item{verbose}{logical. wheter return intermediate result (some Startrac objects)} +\item{verbose}{logical. whether return intermediate result (some Startrac objects)} } \value{ an list contains data.frame elements "cluster.data","pIndex.migr" and "pIndex.tran" } \description{ -warpper function for Startrac analysis +wrapper function for Startrac analysis } \details{ run the Startrac pipeline diff --git a/man/combined_mini_contig_list.Rd b/man/combined_mini_contig_list.Rd new file mode 100644 index 00000000..4f01dff9 --- /dev/null +++ b/man/combined_mini_contig_list.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{combined_mini_contig_list} +\alias{combined_mini_contig_list} +\title{Processed subset of `contig_list`} +\format{ +An R `list` of `data.frame` objects +} +\usage{ +data("combined_mini_contig_list") +} +\description{ +A list of 6 dataframes of T cell contigs outputted from the +`filtered_contig_annotation` files, but subsetted to about 92 valid T cells +which correspond to the same barcodes found in `screp_example` +} +\seealso{ +\code{\link{contig_list}} +} +\keyword{datasets} diff --git a/man/screp_example.Rd b/man/screp_example.Rd index 41ec8ed8..f2a058a0 100644 --- a/man/screp_example.Rd +++ b/man/screp_example.Rd @@ -3,9 +3,9 @@ \docType{data} \name{screp_example} \alias{screp_example} -\title{A seurat object of 1000 single T cells derived +\title{A seurat object of 100 single T cells derived from 3 clear cell renal carcinoma patients.} \description{ -A seurat object of 1000 single T cells derived -from 3 clear cell renal carcinoma patients. +The object is compatible with `contig_list` and the TCR +sequencing data can be added with `combineExpression`. } diff --git a/man/vizGenes.Rd b/man/vizGenes.Rd index 87696b58..24f359c4 100644 --- a/man/vizGenes.Rd +++ b/man/vizGenes.Rd @@ -34,7 +34,7 @@ gene segments such as V, D, J, or C.} \item{order}{Categorical variable to organize the x-axis, either "gene" or "variance"} \item{scale}{Converts the individual count of genes to proportion using the total -respective reprtoire size} +respective repertoire size} \item{group.by}{The column header used for grouping.} diff --git a/scRepertoire.Rproj b/scRepertoire.Rproj index f5d9c6c8..75941c0e 100644 --- a/scRepertoire.Rproj +++ b/scRepertoire.Rproj @@ -17,3 +17,4 @@ AutoAppendNewline: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --as-cran --use-valgrind diff --git a/tests/testthat/test-viz.R b/tests/testthat/test-viz.R index 9d4650ea..414c74a2 100644 --- a/tests/testthat/test-viz.R +++ b/tests/testthat/test-viz.R @@ -11,9 +11,10 @@ combined <- combineTCR( single_contig <- combineTCR(contig_list[[1]]) -single_contig_with__sample <- combineTCR( +single_contig_with_sample <- combineTCR( contig_list[[1]], samples = "PX", ID = "P" ) +# TODO test more cases with single_contig test_that("quantContig works", { expect_doppelganger( diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 10590c62..0337b346 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -16,22 +16,21 @@ vignette: > --- ```{r, echo=FALSE, results="hide", message=FALSE} -knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) +knitr::opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE) # to pass R CMD check, packages can be installed in the knitted environment if (!require("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } -library(BiocManager) +suppressPackageStartupMessages(invisible(base::library(BiocManager))) quiet_bioc_load <- function(...) { - pkgs <- list(...) - for (pkg in pkgs) { - if (!require(pkg, character.only = TRUE, quietly = TRUE)) { - BiocManager::install(pkg) - } - suppressPackageStartupMessages(invisible(library(pkg, character.only = TRUE))) - } + pkgs <- list(...) + for (pkg in pkgs) { + if (base::require(pkg, character.only = TRUE, quietly = TRUE)) {next} + BiocManager::install(pkg) + suppressPackageStartupMessages(invisible(base::library(pkg, character.only = TRUE))) + } } quiet_bioc_load("BiocStyle", "scater") @@ -760,7 +759,7 @@ sub_combined <- clusterTCR(combined[[2]], From the excellent work by Lei Zhang, et al in [Lineage tracking reveals dynamic relationships of T cells in colorectal cancer](https://www.nature.com/articles/s41586-018-0694-x), the authors introduce new methods for looking at clonotypes by cellular origins and cluster identification. Their [startrac](https://github.com/Japrin/STARTRAC) software has been incorporated into scRepertoire. If you are using the output of this specific function, please cite their excellent work. -In order to use the ```StartracDiversity()``` function, you will need to include the product of the ```combinedSeurat()``` function. The second requirement is a column header in the meta data of the Seurat object that has tissue of origin. In the example data,**type** corresponds to the column "Type", which includes the "P" and "T" classifier. The indices can be subseted for a specific patient or examined overall using the **by** variable. Importantly, the function uses only the strict definition of clonotype of the VDJC genes and the CDR3 nucleotide sequence. +In order to use the ```StartracDiversity()``` function, you will need to include the product of the ```combinedSeurat()``` function. The second requirement is a column header in the meta data of the Seurat object that has tissue of origin. In the example data,**type** corresponds to the column "Type", which includes the "P" and "T" classifier. The indices can be subsetted for a specific patient or examined overall using the **by** variable. Importantly, the function uses only the strict definition of clonotype of the VDJC genes and the CDR3 nucleotide sequence. *The indices output includes:* + expa - Clonal Expansion