Skip to content

Latest commit

 

History

History
1068 lines (690 loc) · 42.3 KB

Bald-scRNAseq-HNSCC.R.md

File metadata and controls

1068 lines (690 loc) · 42.3 KB

Analyse single cell RNA seqeuncing and CITE sequencing on human head and neck cancer

File name: Bald-scRNAseq-HNSCC.R.ipynb

Date created: 13-May-2021

Programmer: Lun-Hsien Chang

dir.C <- "C:"

dir.R.packages <- "C:/Program Files/R/R-4.0.3/library" #"C:/Program Files/R/R-4.0.2/library"

library(dplyr, lib.loc = dir.R.packages)
Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
dir.Bald.scRNA.202002.analysis <- "E:/backup-genomeinfo/share/analysis/Bald_-_RNASeq/scRNA_Feb2020_CITE_VDJ/GEX_CiteSeq/analysis-results"
# merged_seurat <- readRDS(file = file.path(dir.Bald.scRNA.202002.analysis,"merged_seurat.rds"))

GEX1 <- readRDS(file = file.path(dir.Bald.scRNA.202002.analysis,"Bald-HNSCC-scRNAseq-CITEseq_filtered_ADT-quantiledGEX1.rds"))
GEX2 <- readRDS(file = file.path(dir.Bald.scRNA.202002.analysis,"Bald-HNSCC-scRNAseq-CITEseq_filtered_ADT-quantiledGEX2.rds")) 
GEX3 <- readRDS(file = file.path(dir.Bald.scRNA.202002.analysis,"Bald-HNSCC-scRNAseq-CITEseq_filtered_ADT-quantiledGEX3.rds")) 
GEX4 <- readRDS(file = file.path(dir.Bald.scRNA.202002.analysis,"Bald-HNSCC-scRNAseq-CITEseq_filtered_ADT-quantiledGEX4.rds")) # class(GEX4)

# Edit meta data
GEX1@meta.data$condition <- "unstimulated"
GEX2@meta.data$condition <- "unstimulated"
GEX3@meta.data$condition <- "stimulated"
GEX4@meta.data$condition <- "stimulated"
Loading required package: SeuratObject

Warning message:
"package 'SeuratObject' was built under R version 4.0.5"
# Merge all samples of the same tissue type
merged_seurat <- merge( x=GEX1
                        ,y=c(GEX2, GEX3, GEX4))
Warning message in CheckDuplicateCellNames(object.list = objects):
"Some cell names are duplicated across objects provided. Renaming to enforce unique cell names."
#---------------------------------------------------------------------------------------------------------------
# Cluster cells on the basis of their scRNA-seq profiles
## reference [Using Seurat with multimodal data](https://satijalab.org/seurat/articles/multimodal_vignette.html)
#---------------------------------------------------------------------------------------------------------------

# View the keys for all keyed components (assays, dimensional reduction, spatial images) of a Seurat object using the Key functio
cat("Keys to Seurat elements",Seurat::Key(merged_seurat))
#    RNA    ADT 
# "rna_" "adt_" 

# DefaultAssay(merged_seurat) # "RNA"
merged_seurat <- Seurat::NormalizeData(object = merged_seurat
                                       ,assay="RNA"
                                       ,verbose=FALSE) %>%
  Seurat::FindVariableFeatures(verbose=FALSE) %>%
  Seurat::ScaleData(verbose=FALSE) %>%
  Seurat::RunPCA(verbose=FALSE)

# Change default R plot size
options(repr.plot.width = 4*5, repr.plot.height = 3*5)

# Clustering and dimension (PCs) selection
# Check dimensions
## A dimension is a PC. Choose PC23 as the break point. Take the more conservative PC.
Seurat::ElbowPlot(object = merged_seurat
                  ,ndims = 30
                  ,reduction = "pca")
Keys to Seurat elements rna_ adt_

png

merged_seurat <- Seurat::FindNeighbors(object = merged_seurat, dims=1:23, verbose=FALSE)

merged_seurat <- Seurat::FindClusters(object = merged_seurat, resolution=c(0.5, 0.9), verbose=FALSE)

merged_seurat <- Seurat::RunTSNE(merged_seurat, assay="RNA",dims=1:23, verbose=FALSE) 

merged_seurat <- Seurat::RunUMAP(merged_seurat, assay="RNA", dims=1:23, verbose=FALSE)
Warning message:
"The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session"
expansion.factor <- 5
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 3*expansion.factor)
cat("Visualising clustering by t-SNE")
Seurat::DimPlot(merged_seurat, reduction = "tsne", label = TRUE, pt.size=2, label.size=10)
Visualising clustering by t-SNE

png

cat("Visualising clustering by UMAP")
Seurat::DimPlot(merged_seurat, reduction = "umap", label = TRUE, pt.size=2, label.size=10)
Visualising clustering by UMAP

png

#-------------------------
# Normalise assay ADT
#-------------------------
merged_seurat <- Seurat::NormalizeData(object = merged_seurat
                                       , normalization.method="CLR"
                                       , assay="ADT"
                                       , margin=2 # normalise data across features (margin=1) or cells (margin=2)
                                       , verbose=FALSE
                                       ) %>%
  Seurat::ScaleData(verbose=FALSE) %>%
  Seurat::RunPCA(reduction.name='adtpca',verbose=FALSE)
Warning message:
"Cannot add objects with duplicate keys (offending key: PC_), setting key to 'adtpca_'"
#-------------------------------------------
# Visualize multiple modalities side-by-side
#-------------------------------------------
# Find out the prefixes of different assays
## Features (i.e., genes, protein) can be referred as assayKey_featureName
Seurat::Key(merged_seurat[["RNA"]]) # [1] "rna_"
Seurat::Key(merged_seurat[["ADT"]]) # [1] "adt_"

# Compare gene expression between conditions and clusters
genes.selected <- c("CD226","PDCD1","CD28","ENTPD1","CD96","TIGIT") # ,"EOMES"
genes.selected %in% rownames(merged_seurat) # Should be all TRUE
genes.selected.RNA <- paste0(Seurat::Key(merged_seurat[["RNA"]]), genes.selected) # Include only genes that can be found in the rownames() # ,"CD279" ,"CD39"

# protein feature names
protein.selected <- rownames(merged_seurat@assays$ADT)
protein.selected.ADT <- paste0(Seurat::Key(merged_seurat[["ADT"]]), protein.selected)

'rna_'

'adt_'

<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
# Compare gene expression of all genes between conditions
cat("Compare gene expression of all genes between conditions stimulated and unstimulated")
dittoSeq::dittoDimPlot(object=merged_seurat
                       ,var = "condition"
                       ,reduction.use = "umap"
                       ,assay = "RNA"
                      ,size=2)
Compare gene expression of all genes between conditions stimulated and unstimulated

png

# Plot expression of CD226 at protein and RNA levels
## gene name is specified as assayName_featureName (e.g., rna_CD226, adtcd226_CD226)
featurePlot.1.rnaCD226 <- Seurat::FeaturePlot(object = merged_seurat
                                              ,features = genes.selected.RNA[1]
                                              , cols = c("lightgrey", "darkgreen")) + ggplot2::ggtitle("CD226 RNA")

featurePlot.1.adtCD226 <- Seurat::FeaturePlot(object = merged_seurat
                                              ,features = protein.selected.ADT[1]
                                              , cols = c("lightgrey", "darkgreen")) + ggplot2::ggtitle("CD226 protein")
# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 2*expansion.factor)

featurePlot.1.adtCD226 | featurePlot.1.rnaCD226

png

#------------------------------------------------
# Expression of single gene between conditions in assay RNA
#------------------------------------------------
## Arrange multiple violin plots into a grid using cowplot::plot_grid()
cowplot::plot_grid(
     dittoSeq::dittoPlot(object=merged_seurat
                    ,var=genes.selected[1] # var argument cannot take assayName_featureName
                    ,assay = "RNA"
                    ,group.by = "condition"
                    ,plots = c("vlnplot", "jitter"))
    ,dittoSeq::dittoPlot(object=merged_seurat
                    ,var=protein.selected[1]
                    ,assay = "ADT"
                    ,group.by = "condition"
                    ,plots = c("vlnplot", "jitter"))
    ,ncol=2)

png

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 6*expansion.factor)

# Plot RNA and protein side by side
cat("Violin plots for expression levels of RNA and protein side by side")
## Arrange multiple violin plots into a grid using cowplot::plot_grid()
cowplot::plot_grid(
  # CD226 RNA and protein
   Seurat::VlnPlot(object=merged_seurat, features = genes.selected.RNA[1])
  ,Seurat::VlnPlot(object=merged_seurat, features = protein.selected.ADT[1])
  # PDCD1 RNA and protein CD279-PD1-TotalSeqC
  ,Seurat::VlnPlot(object=merged_seurat, features = genes.selected.RNA[2])
  ,Seurat::VlnPlot(object=merged_seurat, features = protein.selected.ADT[2])
  # CD28 RNA and protein
  ,Seurat::VlnPlot(object=merged_seurat, features = genes.selected.RNA[3])
  ,Seurat::VlnPlot(object=merged_seurat, features = protein.selected.ADT[3])
  ,ncol=2)
Violin plots for expression levels of RNA and protein side by side

png

# Plot RNA and protein side by side
cat("Violin plots for expression levels of RNA and protein side by side")
## Arrange multiple violin plots into a grid using cowplot::plot_grid()
cowplot::plot_grid(
  # ENTPD1 RNA and protein CD39-TotalSeqC
   Seurat::VlnPlot(object=merged_seurat, features = genes.selected.RNA[4])
  ,Seurat::VlnPlot(object=merged_seurat, features = protein.selected.ADT[4])
   # CD 96 RNA and CD96TACTILE-TotalSeqC protein 
  ,Seurat::VlnPlot(object=merged_seurat, features = genes.selected.RNA[5])
  ,Seurat::VlnPlot(object=merged_seurat, features = protein.selected.ADT[5])
   # TIGIT RNA and protein TIGITVSTM3-TotalSeqC
  ,Seurat::VlnPlot(object=merged_seurat, features = genes.selected.RNA[6])
  ,Seurat::VlnPlot(object=merged_seurat, features = protein.selected.ADT[6]) 
  ,ncol=2)
Violin plots for expression levels of RNA and protein side by side

png

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 3*expansion.factor)
cat("Violin plots for RNA gene expression between conditions stimulated and unstimulated")
dittoSeq::multi_dittoPlot(object= merged_seurat
                          , assay="RNA"
                          , var= genes.selected # c(genes.selected.RNA, genes.selected.ADT) 
                          , group.by = "condition"
                          , vlnplot.lineweight = 0.3
                          , jitter.size = 1
                          ,legend.show = FALSE
                          ,xlab = "Conditions"
                          ,ylab = "Expression") # CD226, TIGIT have higher expression in stim than unsti; EOMES
Violin plots for RNA gene expression between conditions stimulated and unstimulated

png

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 6*expansion.factor)

# CD226, CDPD1 gene and protein expression between conditions
cat("tSNE plots for expression of RNA and protien between conditions stimulated and unstimulated")
Seurat::FeaturePlot(object=merged_seurat
                    , features = c( genes.selected.RNA[1], protein.selected.ADT[1]
                                   ,genes.selected.RNA[2], protein.selected.ADT[2])
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    , split.by = "condition"
                    ) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
tSNE plots for expression of RNA and protien between conditions stimulated and unstimulated

png

# CD28, ENTPD1 gene and protein expression between conditions
cat("tSNE plots for expression of RNA and protien between conditions stimulated and unstimulated")
Seurat::FeaturePlot(object=merged_seurat
                    , features = c( genes.selected.RNA[3], protein.selected.ADT[3]
                                   ,genes.selected.RNA[4], protein.selected.ADT[4])
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    , split.by = "condition"
                    ) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
tSNE plots for expression of RNA and protien between conditions stimulated and unstimulated

png

# CD96, TIGIT gene and protein expression between conditions
cat("tSNE plots for expression of RNA and protien between conditions stimulated and unstimulated")
Seurat::FeaturePlot(object=merged_seurat
                    , features = c( genes.selected.RNA[5], protein.selected.ADT[5]
                                   ,genes.selected.RNA[6], protein.selected.ADT[6])
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    , split.by = "condition"
                    ) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
tSNE plots for expression of RNA and protien between conditions stimulated and unstimulated

png

Identify potential marker genes for each cluster

This type of analysis is typically recommended for when evaluating a single sample group/condition. With the FindAllMarkers() function we are comparing each cluster against all other clusters to identify potential marker genes. The cells in each cluster are treated as replicates, and essentially a differential expression analysis is performed with some statistical test.

NOTE: The default is a Wilcoxon Rank Sum test, but there are other options available.

marker_ident_function1.png

# Find markers for every cluster compared to all remaining cells, report only the positive ones
cat("Identify differentially expressed genes for each cluster")
markers <- Seurat::FindAllMarkers(object=merged_seurat
                                  ,min.pct=0.25
                                  ,only.pos = TRUE
                                  ,logfc.threshold = 0.25
                                  ,verbose=FALSE)
Identify differentially expressed genes for each cluster

Identify gene markers that are conserved between the groups

Since we have samples representing different conditions in our dataset, our best option is to find conserved markers. This function internally separates out cells by sample group/condition, and then performs differential gene expression testing for a single specified cluster against all other clusters (or a second cluster, if specified). Gene-level p-values are computed for each condition and then combined across groups using meta-analysis methods from the MetaDE R package.

marker_ident_function2.png

cluster0_conserved_markers <- Seurat::FindConservedMarkers(merged_seurat
                                                           ,ident.1 = 0
                                                           ,grouping.var = "condition"
                                                           ,only.pos = TRUE
                                                           ,logfc.threshold = 0.25)
head(cluster0_conserved_markers)
Testing group unstimulated: (0) vs (10, 4, 7, 8, 6, 9, 5, 12, 13, 3, 14, 15, 1, 2, 11)

Testing group stimulated: (0) vs (2, 7, 3, 5, 1, 6, 10, 11, 4, 13, 9, 14, 15, 12, 8)
A data.frame: 6 × 12
unstimulated_p_valunstimulated_avg_log2FCunstimulated_pct.1unstimulated_pct.2unstimulated_p_val_adjstimulated_p_valstimulated_avg_log2FCstimulated_pct.1stimulated_pct.2stimulated_p_val_adjmax_pvalminimump_p_val
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
GZMK5.659935e-2651.7414230.8250.2969.224562e-2611.522696e-711.91093460.7320.2662.481690e-671.522696e-711.131987e-264
CMC18.986527e-1631.5093200.6180.2421.464624e-1583.240914e-481.30443520.6140.2435.282042e-443.240914e-481.797305e-162
CST79.525578e-1541.0743600.9510.7701.552479e-1491.327403e-340.92684260.9500.8372.163401e-301.327403e-341.905116e-153
EOMES1.408120e-1511.3000990.5380.1752.294955e-1475.301662e-371.21121080.4230.1408.640648e-335.301662e-372.816241e-151
RPS15A1.118059e-1220.4628110.9990.9971.822213e-1183.793634e-330.51461041.0000.9966.182865e-293.793634e-332.236118e-122
DKK31.390575e-1081.2185040.3290.0722.266358e-1041.375378e-531.24237140.3090.0552.241592e-491.375378e-532.781149e-108
merged_seurat_0.1 <- Seurat::FindClusters(merged_seurat, resolution = 0.1, verbose=FALSE)
merged_seurat_0.2 <- Seurat::FindClusters(merged_seurat, resolution = 0.2, verbose=FALSE)
merged_seurat_0.3 <- Seurat::FindClusters(merged_seurat, resolution = 0.3, verbose=FALSE)
merged_seurat_0.4 <- Seurat::FindClusters(merged_seurat, resolution = 0.4, verbose=FALSE)
merged_seurat_0.5 <- Seurat::FindClusters(merged_seurat, resolution = 0.5, verbose=FALSE)
merged_seurat_0.6 <- Seurat::FindClusters(merged_seurat, resolution = 0.6, verbose=FALSE)
merged_seurat_0.7 <- Seurat::FindClusters(merged_seurat, resolution = 0.7, verbose=FALSE)
merged_seurat_0.8 <- Seurat::FindClusters(merged_seurat, resolution = 0.8, verbose=FALSE)
merged_seurat_0.9 <- Seurat::FindClusters(merged_seurat, resolution = 0.9, verbose=FALSE)
merged_seurat_1.0 <- Seurat::FindClusters(merged_seurat, resolution = 1.0, verbose=FALSE)
merged_seurat_1.1 <- Seurat::FindClusters(merged_seurat, resolution = 1.1, verbose=FALSE)
merged_seurat_1.2 <- Seurat::FindClusters(merged_seurat, resolution = 1.2, verbose=FALSE)

p1 <- Seurat::DimPlot(merged_seurat_0.1, reduction = "tsne")+ ggplot2::ggtitle("resol_0.1")
p2 <- Seurat::DimPlot(merged_seurat_0.2, reduction = "tsne")+ ggplot2::ggtitle("resol_0.2")
p3 <- Seurat::DimPlot(merged_seurat_0.3, reduction = "tsne")+ ggplot2::ggtitle("resol_0.3")
p4 <- Seurat::DimPlot(merged_seurat_0.4, reduction = "tsne")+ ggplot2::ggtitle("resol_0.4")
p5 <- Seurat::DimPlot(merged_seurat_0.5, reduction = "tsne")+ ggplot2::ggtitle("resol_0.5")
p6 <- Seurat::DimPlot(merged_seurat_0.6, reduction = "tsne")+ ggplot2::ggtitle("resol_0.6")
p7 <- Seurat::DimPlot(merged_seurat_0.7, reduction = "tsne")+ ggplot2::ggtitle("resol_0.7")
p8 <- Seurat::DimPlot(merged_seurat_0.8, reduction = "tsne")+ ggplot2::ggtitle("resol_0.8")
p9 <- Seurat::DimPlot(merged_seurat_0.9, reduction = "tsne")+ ggplot2::ggtitle("resol_0.9")
p10 <- Seurat::DimPlot(merged_seurat_1.0, reduction = "tsne")+ ggplot2::ggtitle("resol_1.0")
p11 <- Seurat::DimPlot(merged_seurat_1.1, reduction = "tsne")+ ggplot2::ggtitle("resol_1.1")
p12 <- Seurat::DimPlot(merged_seurat_1.2, reduction = "tsne")+ ggplot2::ggtitle("resol_1.2")
# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 2.5*expansion.factor)

Seurat::CombinePlots(plots = list(p1, p2,p3,p4,p5,p6))
Warning message:
"CombinePlots is being deprecated. Plots should now be combined using the patchwork system."

png

Seurat::CombinePlots(plots = list(p7,p8,p9,p10,p11,p12))
Warning message:
"CombinePlots is being deprecated. Plots should now be combined using the patchwork system."

png

p1 <- Seurat::DimPlot(merged_seurat_0.1, reduction = "umap")+ ggplot2::ggtitle("resol_0.1")
p2 <- Seurat::DimPlot(merged_seurat_0.2, reduction = "umap")+ ggplot2::ggtitle("resol_0.2")
p3 <- Seurat::DimPlot(merged_seurat_0.3, reduction = "umap")+ ggplot2::ggtitle("resol_0.3")
p4 <- Seurat::DimPlot(merged_seurat_0.4, reduction = "umap")+ ggplot2::ggtitle("resol_0.4")
p5 <- Seurat::DimPlot(merged_seurat_0.5, reduction = "umap")+ ggplot2::ggtitle("resol_0.5")
p6 <- Seurat::DimPlot(merged_seurat_0.6, reduction = "umap")+ ggplot2::ggtitle("resol_0.6")
p7 <- Seurat::DimPlot(merged_seurat_0.7, reduction = "umap")+ ggplot2::ggtitle("resol_0.7")
p8 <- Seurat::DimPlot(merged_seurat_0.8, reduction = "umap")+ ggplot2::ggtitle("resol_0.8")
p9 <- Seurat::DimPlot(merged_seurat_0.9, reduction = "umap")+ ggplot2::ggtitle("resol_0.9")
p10 <- Seurat::DimPlot(merged_seurat_1.0, reduction = "umap")+ ggplot2::ggtitle("resol_1.0")
p11 <- Seurat::DimPlot(merged_seurat_1.1, reduction = "umap")+ ggplot2::ggtitle("resol_1.1")
p12 <- Seurat::DimPlot(merged_seurat_1.2, reduction = "umap")+ ggplot2::ggtitle("resol_1.2")
Seurat::CombinePlots(plots = list(p1, p2,p3,p4,p5,p6))
Warning message:
"CombinePlots is being deprecated. Plots should now be combined using the patchwork system."

png

Seurat::CombinePlots(plots = list(p7,p8,p9,p10,p11,p12))
Warning message:
"CombinePlots is being deprecated. Plots should now be combined using the patchwork system."

png

# Find markers for every cluster compared to all remaining cells, report only the positive ones
cat("Identify differentially expressed genes for each cluster")
markers_0.5 <- Seurat::FindAllMarkers(object= merged_seurat_0.5
                                  ,min.pct=0.25
                                  ,only.pos = TRUE
                                  ,logfc.threshold = 0.25
                                  ,verbose=FALSE)
head(markers_0.5)
Identify differentially expressed genes for each cluster
A data.frame: 6 × 7
p_valavg_log2FCpct.1pct.2p_val_adjclustergene
<dbl><dbl><dbl><dbl><dbl><fct><chr>
IL7R9.678977e-2681.36602220.8560.5191.577480e-2630IL7R
RPS121.887499e-2540.59626690.9980.9983.076247e-2500RPS12
RPL329.104873e-2370.54864330.9950.9981.483912e-2320RPL32
RPL102.111811e-2340.52731150.9950.9993.441830e-2300RPL10
RPS3A1.218826e-2250.58810000.9910.9961.986442e-2210RPS3A
RPL349.950714e-2230.59021060.9840.9951.621767e-2180RPL34
write.table(markers_0.5, "E:/backup-genomeinfo/share/analysis/Bald_-_RNASeq/scRNA_Feb2020_CITE_VDJ/GEX_CiteSeq/analysis-results/FindAllMarkers_resolution-0.5.txt"
            , sep = "\t", row.names = F)
top10 <- markers_0.5 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
head(top10)
A grouped_df: 6 × 7
p_valavg_log2FCpct.1pct.2p_val_adjclustergene
<dbl><dbl><dbl><dbl><dbl><fct><chr>
9.678977e-2681.36602220.8560.5191.577480e-2630IL7R
1.887499e-2540.59626690.9980.9983.076247e-2500RPS12
6.742727e-1641.02334870.9420.9181.098930e-1590PABPC1
5.589620e-1461.33564410.9820.9859.109963e-1420FTH1
4.438425e-1350.82329580.8500.6807.233745e-1310ZFP36L2
2.077494e-1000.70986260.8180.665 3.385900e-960ZFP36
Seurat::DoHeatmap( subset(merged_seurat_0.5, downsample = 100)
                  ,features = top10$gene
                  , size = 9
                  ,assay = "RNA")
Warning message in Seurat::DoHeatmap(subset(merged_seurat_0.5, downsample = 100), :
"The following features were omitted as they were not found in the scale.data slot for the RNA assay: SPOCK2, SPINT2, SAMD3, CLDND1, CST7, ADK, RPS12"

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C01-LEF1 cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
genes.repre.CD8.cluster.1 <- c("CCR7","LEF1","SELL","TCF7","CD27","CD28","S1PR1")
genes.repre.CD8.cluster.1 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C01-LEF1 cluster")
Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.1
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
                    ) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
Expression of representative genes reported in Zhang's CD8_C01-LEF1 cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C02-GPR183 cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
genes.repre.CD8.cluster.2 <- c("CCR7","SELL","IL7R","CD27","CD28","PRF1","GZMA","CCL5","GPR183","S1PR1")
genes.repre.CD8.cluster.2 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 5*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C02-GPR183 cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.2
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
  8. TRUE
  9. TRUE
  10. TRUE
Expression of representative genes reported in Zhang's CD8_C02-GPR183 cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C03-CX3CR1 cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
genes.repre.CD8.cluster.3 <- c("KLRG1","CX3CR1","FCGR3A","FGFBP2","PRF1","GZMH","TBX21","EOMES","S1PR1","S1PR5")
genes.repre.CD8.cluster.3 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 5*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C03-CX3CR1 cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.3
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
  8. TRUE
  9. TRUE
  10. TRUE
Expression of representative genes reported in Zhang's CD8_C03-CX3CR1 cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C04-GZMK cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
genes.repre.CD8.cluster.4 <- c("GZMK","CXCR4","CXCR3","CD44")
genes.repre.CD8.cluster.4 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C04-GZMK cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.4
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
Expression of representative genes reported in Zhang's CD8_C04-GZMK cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C05-CD6 cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
# The gene NR4A1/2/3 is not found. Close genes are [1] "NR4A2" "NR4A3" "NR4A1"
genes.repre.CD8.cluster.5 <- c("CD6","XCL1","XCL2","MYADM","CAPG","RORA","NR4A1","NR4A2","NR4A3","CD69","ITGAE")
genes.repre.CD8.cluster.5 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 5*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C05-CD6 cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.5
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
  8. TRUE
  9. TRUE
  10. TRUE
  11. TRUE
Expression of representative genes reported in Zhang's CD8_C05-CD6 cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C06-CD160 cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
grep(pattern = "KLRC.*", x=rownames(merged_seurat), value = TRUE)
# The gene KLRC1/2/3 is not found. Close genes are [1] "KLRC4-KLRK1" "KLRC4" "KLRC3" "KLRC2" "KLRC1"  
# The gene NR4A1/2/3 is not found. Close genes are [1] "NR4A2" "NR4A3" "NR4A1"

genes.repre.CD8.cluster.6 <- c("CD160","KIR2DL4","TMIGD2","KLRC1","KLRC2","KLRC3","NR4A1","NR4A2","NR4A3","IKZF2","ENTPD1","CD69","ITGAE")
genes.repre.CD8.cluster.6 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 4*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C06-CD160 cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.6
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. 'KLRC4-KLRK1'
  2. 'KLRC4'
  3. 'KLRC3'
  4. 'KLRC2'
  5. 'KLRC1'
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
  8. TRUE
  9. TRUE
  10. TRUE
  11. TRUE
  12. TRUE
  13. TRUE
Expression of representative genes reported in Zhang's CD8_C06-CD160 cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C07-LAYN cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
genes.repre.CD8.cluster.7 <- c("HAVCR2","CXCL13","PDCD1","LAYN","TOX","IFNG","GZMB","MIR155HG","TNFRSF9","ITGAE")
genes.repre.CD8.cluster.7 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 5*expansion.factor, repr.plot.height = 4*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C07-LAYN cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.7
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
  8. TRUE
  9. TRUE
  10. TRUE
Expression of representative genes reported in Zhang's CD8_C07-LAYN cluster

png

#---------------------------------------------------------------------------------------------------
# Plot expression of representative genes in CD8_C08-SLC4A10 cluster 
# reported in Supplementary table 5, Zhang et al 2018 paper
#---------------------------------------------------------------------------------------------------
genes.repre.CD8.cluster.8 <- c("SLC4A10","KLRB1","ZBTB16","NCR3","RORC","RORA")
genes.repre.CD8.cluster.8 %in% rownames(merged_seurat) # All TRUE

# Chang R plot sizes
expansion.factor <- 4
options(repr.plot.width = 3*expansion.factor, repr.plot.height = 3.5*expansion.factor)

cat("Expression of representative genes reported in Zhang's CD8_C08-SLC4A10 cluster")

Seurat::FeaturePlot(object=merged_seurat
                    , features = genes.repre.CD8.cluster.8
                    , reduction = "tsne"
                    , label = TRUE
                    , label.size = 7.5
                    #, split.by = "condition"
) & ggplot2::theme(text = ggplot2::element_text(size = 25, face = "bold")) +
  ggplot2::theme(legend.text=ggplot2::element_text(size=25, color="black"))
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
Expression of representative genes reported in Zhang's CD8_C08-SLC4A10 cluster

png

Reference

Chapter 3 Heatmap Annotations

Changing R plot options in Jupyter

Changing R plot options in Jupyter

Using a new windows version of R in Jupyter notebooks

scRNAseq_Braun_etal_Immunity_Script.Rmd

Introduction to scRNA-seq integration

Using dittoSeq to visualize (sc)RNAseq data

Setup a Seurat object, add the RNA and protein data

scRNA-seq/lessons

How to solve Error: cannot allocate vector of size 1.2 Gb in R?