From 568edc1db8f11a513565a7662cc1274de87995d9 Mon Sep 17 00:00:00 2001 From: theHumanBorch Date: Fri, 5 Apr 2024 07:48:39 -0500 Subject: [PATCH] Update Ibex/Trex vingette --- vignettes/articles/Ibex.Rmd | 66 ++++++++++++++++++++++++++++++++++++- vignettes/articles/Trex.Rmd | 65 +++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 2 deletions(-) diff --git a/vignettes/articles/Ibex.Rmd b/vignettes/articles/Ibex.Rmd index 04608087..64b63d85 100644 --- a/vignettes/articles/Ibex.Rmd +++ b/vignettes/articles/Ibex.Rmd @@ -59,7 +59,71 @@ devtools::install_github("ncborcherding/Ibex") ## The Data Set -To show the multiple options of Ibex, the example data is derived from [this manuscript](https://pubmed.ncbi.nlm.nih.gov/33891889/), multimodal single-cell characterization of COVID19-associated multisystem inflammatory syndrome in children. The data example built into the package (**ibex_example**) is derived from randomly sampling cells from Patient 1. +To show the multiple options of Ibex, the example data is derived from [this manuscript](https://pubmed.ncbi.nlm.nih.gov/33891889/), multimodal single-cell characterization of COVID19-associated multisystem inflammatory syndrome in children. + +### Formation of the Single-cell Object + +Here is the basic workflow that was used to make the single-cell object to use in the vignette. Notice there is a removal of BCR-related RNA features (using the **Ibex** function ```quietBCRgenes()```). As we are going to combine multimodal data, both the GEX and CITE probes may cause bias in the weighted output. + + +```{r, eval=FALSE, tidy = FALSE} +################################## +#scRNA/ADT loading and processing +################################# +tmp <- Read10X("~/data/GSM5073055_P1.1_filtered_feature_bc_matrix") + +MIS.sample <- CreateSeuratObject(counts = tmp$`Gene Expression`) +rownames(tmp$`Antibody Capture`) <- stringr::str_remove_all(rownames(tmp$`Antibody Capture`), "anti_human_") +rownames(tmp$`Antibody Capture`) <- stringr::str_remove_all(rownames(tmp$`Antibody Capture`), "anti_mousehuman_") +rownames(tmp$`Antibody Capture`) <- substr(rownames(tmp$`Antibody Capture`), 6, nchar(rownames(tmp$`Antibody Capture`))) + +adt_assay <- CreateAssayObject(counts = tmp$`Antibody Capture`) + + +MIS.sample[["ADT"]] <- adt_assay +MIS.sample <- subset(MIS.sample, subset = nFeature_RNA > 100) +MIS.sample <- RenameCells(object = MIS.sample , new.names = paste0("MIS.sample_", rownames(MIS.sample[[]]))) +MIS.sample[["mito.genes"]] <- PercentageFeatureSet(MIS.sample, pattern = "^MT-") + +#Filtering step +standev <- sd(log(MIS.sample$nFeature_RNA))*2.5 #cutting off above standard deviation of 2.5 +mean <- mean(log(MIS.sample$nFeature_RNA)) +cut <- round(exp(standev+mean)) +MIS.sample <- subset(MIS.sample, subset = mito.genes < 10 & nFeature_RNA < cut) + +#Processing and Adding Contig Info +contigs <- read.csv("~/data/GSM5073091_PBMC_P1.1_MIS-C_Severe_BCR_filtered_contig_annotations.csv.gz") +clones <- combineBCR(contigs, samples = "MIS.sample", removeNA = TRUE) +MIS.sample <- combineExpression(clones, MIS.sample, cloneCall="aa") + +#Subset only B cells (by contigs) +MIS.sample$BCR.recoverd <- "No" +MIS.sample$BCR.recoverd[!is.na(MIS.sample$CTaa)] <- "Yes" +MIS.sample <- subset(MIS.sample, BCR.recoverd == "Yes") + +#Processing RNA +DefaultAssay(MIS.sample) <- 'RNA' +MIS.sample <- NormalizeData(MIS.sample) %>% FindVariableFeatures() %>% + quietBCRgenes() %>% ScaleData() %>% RunPCA(verbose = FALSE) + +#Processing ADT +DefaultAssay(MIS.sample) <- 'ADT' +VariableFeatures(MIS.sample) <- rownames(MIS.sample[["ADT"]]) +MIS.sample <- NormalizeData(MIS.sample, normalization.method = 'CLR', margin = 2) %>% + ScaleData() %>% RunPCA(reduction.name = 'apca') + +################################### +#Making Example Data Set for Trex +################################# +meta <- MIS.sample[[]] +meta <- meta[sample(nrow(meta), nrow(meta)*0.33),] +ibex_example <- subset(MIS.sample, cells = rownames(meta)) +save(ibex_example, file = "ibex_example.rda", compress = "xz") +``` + +### Loading the Data Object + +For the purpose of the vignette, we will load the full object. The data example built into the package (**ibex_example**) is derived from randomly sampling cells from Patient 1 (see above). ```{r tidy = FALSE} SeuratObj <- readRDS(url("https://www.borch.dev/uploads/data/Ibex_FullExample.rds")) diff --git a/vignettes/articles/Trex.Rmd b/vignettes/articles/Trex.Rmd index 1843c26f..e1520d74 100644 --- a/vignettes/articles/Trex.Rmd +++ b/vignettes/articles/Trex.Rmd @@ -60,7 +60,70 @@ devtools::install_github("ncborcherding/Trex") ## The Data Set -To show the multiple options of Trex, the example data is derived from [GSE167118](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE167118), a cohort of CITE-seq data derived from severe COVID-19 patients. More information is available in the [corresponding manuscript](https://pubmed.ncbi.nlm.nih.gov/33622974/). The data example built into the package (**trex_example**) is derived from randomly sampling T cells from patient 17. +To show the multiple options of Trex, the example data is derived from [GSE167118](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE167118), a cohort of CITE-seq data derived from severe COVID-19 patients. More information is available in the [corresponding manuscript](https://pubmed.ncbi.nlm.nih.gov/33622974/). + +### Formation of the Single-cell Object + +Here is the basic workflow that was used to make the single-cell object to use in the vignette. Notice there is a removal of TCR-related CITE-seq data and RNA features (using the **Trex** function ```quietTCRgenes()```). As we are going to combine multimodal data, both the GEX and CITE probes may cause bias in the weighted output. + +```{r, eval=FALSE, tidy=FALSE} +################################## +#scRNA/ADT loading and processing +################################# +tmp <- Read10X("~/Patient17/filtered_feature_bc_matrix") + +Pt17 <- CreateSeuratObject(counts = tmp$`Gene Expression`) + +#Removing TCR-specific antibody +adt_assay <- CreateAssayObject(counts = tmp$`Antibody Capture`[1:37,]) +Pt17[["ADT"]] <- adt_assay +Pt17 <- subset(Pt17, subset = nFeature_RNA > 100) +Pt17 <- RenameCells(object = Pt17 , new.names = paste0("Pt17_", rownames(Pt17[[]]))) +Pt17[["mito.genes"]] <- PercentageFeatureSet(Pt17, pattern = "^MT-") + +#Filtering step +standev <- sd(log(Pt17$nFeature_RNA))*2.5 #cutting off above standard deviation of 2.5 +mean <- mean(log(Pt17$nFeature_RNA)) +cut <- round(exp(standev+mean)) +Pt17 <- subset(Pt17, subset = mito.genes < 10 & nFeature_RNA < cut) + +#Processing RNA +DefaultAssay(Pt17) <- 'RNA' +Pt17 <- NormalizeData(Pt17) %>% + FindVariableFeatures() %>% + quietTCRgenes() %>% + ScaleData() %>% + RunPCA(verbose = FALSE) + +#Processing ADT +DefaultAssay(Pt17) <- 'ADT' +VariableFeatures(Pt17) <- rownames(Pt17[["ADT"]]) +Pt17 <- NormalizeData(Pt17, normalization.method = 'CLR', margin = 2) %>% + ScaleData() %>% + RunPCA(reduction.name = 'apca') + + +################################## +#Processing and Adding Contig Info +################################## + +contigs <- read.csv("~/Patient17/filtered_contig_annotations.csv") +clones <- combineTCR(contigs, samples = "Pt17", cells = "T-AB", filterMulti = TRUE, removeNA = TRUE) +Pt17 <- combineExpression(clones, Pt17, cloneCall="aa") +saveRDS(Pt17, file = "Trex_FullExample.rds") + +################################### +#Making Example Data Set for Trex +################################# +meta <- Pt17[[]] +meta <- meta[sample(nrow(meta), nrow(meta)*0.1),] +trex_example <- subset(Pt17, cells = rownames(meta)) +save(trex_example, file = "trex_example.rda", compress = "xz") +``` + +### Loading the Data Object + +For the purpose of the vignette, we will load the full object. The data example built into the package (**trex_example**) is derived from randomly sampling T cells from patient 17 (see above workflow). ```{r tidy = FALSE} SeuratObj <- readRDS(url("https://www.borch.dev/uploads/data/Trex_FullExample.rds"))