Skip to content

Latest commit

 

History

History
1866 lines (1448 loc) · 61 KB

EAE_IC.md

File metadata and controls

1866 lines (1448 loc) · 61 KB

Myelin insulation as a risk factor for axonal degeneration in autoimmune demyelinating disease. Analysis. Profiling of CD45+ immune cells from wt and hMBP SpC 4 days post-EAE onset by single-cell RNA sequencing

scRNAseq of CD45+ Immune cells WT vs hMBP in peak EAE

Initialization

Environment preparation

Document setup

knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(error = FALSE)

Attach required packages

#general packages 
library(tidyverse)
library(PCAtools)

#single cell packages 
library(SeuratDisk)
library(Seurat)
library(scDblFinder)
library(scater)

#graphics packages
library(patchwork)
library(scCustomize)
library(facefuns)
library(viridis)
library(Polychrome)

Define custom functions

#function to provide negated set operation
`%!in%` = Negate(`%in%`)

#function to calculate relative abundance of individual clusters to a sample 
pTable = function(seurat_object,cluster_column,split_by_){
  x = data.frame(
        t(
          t(
            table(
              seurat_object@meta.data[[cluster_column]],
              seurat_object@meta.data[[split_by_]]
                  )
            )  
          /
            colSums(
              table(
                seurat_object@meta.data[[cluster_column]],
               seurat_object@meta.data[[split_by_]]
                    )
                    )
          *100
          )
                    ) 
  
  #to programmatically rename columns using dplyr evaluate variable content with !! and use special operator :=
  x = dplyr::rename(x,
                    !!cluster_column := Var1,
                    !!split_by_ := Var2)
  y = data.frame(
    #the levels of the cluster_column e.g. celltypes are the output lavels
        unique(x[[cluster_column]]), 
    #calculating the log2 of the ratio of frequencies in each of the split_by_ entities is the log2FC
    #with .data[[var_name]] one can programmatically access the columns
        x  %>% summarise(log2(
                            Freq[.data[[split_by_]] == 
                                    levels(
                                      as.factor(
                                        x[[split_by_]]))[1]] 
                                                          / 
                             Freq[.data[[split_by_]] == 
                                    levels(
                                      as.factor(
                                        x[[split_by_]]))[2]])))
  names(y) = c(cluster_column,'log2FC')
  return(list('freq' = x,
              'log2fc' = y))

}

Define input and output directories

wd = getwd()

indir = paste0(wd,'/scRNAdata/IC/')
dir.create(indir,recursive = TRUE)

outdir = paste0(wd,'/Output/IC/')
dir.create(outdir,recursive = TRUE)

figdir = paste0(wd,'/figures/IC/')
dir.create(figdir,recursive = TRUE)

Reading in raw count matrices output by CellRanger, annotating meta data, merging both hMBP and WT counts into one object. Barcodes with less than 100 reads are discarded as empty droplets.

#tag
filenameWT = paste0(indir,'GSM6914114_MQ_hMBP_EAE_IC_pre_m.h5')
filenameHMBP = paste0(indir,'GSM6914113_MQ_WT_EAE_IC_pre_m.h5')

#Reading in Raw 10X count files 
rawWT = Read10X_h5(filenameWT, use.names = TRUE, unique.features = TRUE)
rawHMBP = Read10X_h5(filenameHMBP, use.names = TRUE, unique.features = TRUE)

#Creating Seurat Objects 
sc.taggedWT = CreateSeuratObject(counts = rawWT, min.cells = 0, min.genes = 200, project = 'EAE_IC_WT')
sc.taggedWT$genotype = 'WT'
sc.taggedHMBP = CreateSeuratObject(counts = rawHMBP, min.cells = 0, min.genes = 200, project = 'EAE_IC_HMBP')
sc.taggedHMBP$genotype = 'hMBP'

#Merging objects
sc.tagged = merge(sc.taggedWT,sc.taggedHMBP)

#filtering empty droplets 
sc.tagged = sc.tagged %>% subset(nCount_RNA > 100)

#factoring sample metadata 
sc.tagged$sample = factor(sc.tagged$orig.ident,levels = c('EAE_IC_WT','EAE_IC_HMBP'))
sc.tagged$genotype = factor(sc.tagged$genotype, levels = c('WT','hMBP'))

#calculating mitochondrial genes
sc.tagged$mitoRatio <- PercentageFeatureSet(object = sc.tagged, features = rownames(sc.tagged)[grep('^mt',x =rownames(sc.tagged))]
) / 100

SaveH5Seurat(sc.tagged,paste0(outdir,'IC.tagged.h5seurat'),overwrite = TRUE)

QC of raw data

cutoff.reads = 1000
cutoff.features = 500
cutoff.mitoRatio = 0.05

Density of Counts

Plotting densities of counts/cell by sample. The line represents the applied cutoff.

#tag

qc.counts.density = sc.tagged@meta.data %>% 
  ggplot(aes(color=sample, x=nCount_RNA, fill= sample)) + 
    geom_density(alpha = 0.2) + 
    scale_x_log10() + 
    ylab("Cell density") +
    geom_vline(xintercept = cutoff.reads) + 
    theme(legend.position = 'none')

qc.counts.violin = sc.tagged@meta.data %>% 
  ggplot(aes(x=sample, y=nCount_RNA, fill= sample, color = sample)) + 
    geom_violin(scale = 'width', alpha = 0.2) + 
    geom_jitter(size = 0.1, alpha = 0.5, color = 'black') +
    scale_fill_discrete() + 
    scale_y_log10() + 
    geom_boxplot(width=0.2, outlier.size = 0, fill = 'white', color = 'black', coef = 0, outlier.color = NA) +
    geom_hline(yintercept = cutoff.reads) + 
    ylab('Read counts') +
    xlab('') + 
    theme(legend.position = 'right')

qc.counts.density + qc.counts.violin + 
  plot_annotation(
    tag_levels = 'A') 

Density of Genes

Plotting the density of detected genes/cell by sample. The line represents the applied cutoff.

#tag

qc.features.density = sc.tagged@meta.data %>% 
  ggplot(aes(color=sample, x=nFeature_RNA, fill= sample)) + 
    geom_density(alpha = 0.2) + 
    scale_x_log10() + 
    ylab("Cell density") +
    geom_vline(xintercept = cutoff.features) + 
    theme(legend.position = 'none')

qc.features.violin = sc.tagged@meta.data %>% 
  ggplot(aes(x=sample, y=nFeature_RNA, fill= sample, color = sample)) + 
    geom_violin(scale = 'width', alpha = 0.2) + 
    geom_jitter(size = 0.1, alpha = 0.5, color = 'black') +
    scale_fill_discrete() + 
    scale_y_log10() + 
    geom_boxplot(width=0.2, outlier.size = 0, fill = 'white', color = 'black', coef = 0, outlier.color = NA) +
    geom_hline(yintercept = cutoff.features) + 
    ylab('Feature counts') +
    xlab('') + 
    theme(legend.position = 'right')

qc.features.density + qc.features.violin + 
  plot_annotation(
    tag_levels = 'A') 

#tag

qc.mitoRatio.density = sc.tagged@meta.data %>% 
  ggplot(aes(color=sample, x=mitoRatio, fill= sample)) + 
    geom_density(alpha = 0.2) + 
    scale_x_log10() + 
    ylab("Cell density") +
    geom_vline(xintercept = cutoff.mitoRatio) + 
    theme(legend.position = 'none')

qc.mitoRatio.violin = sc.tagged@meta.data %>% 
  ggplot(aes(x=sample, y=mitoRatio, fill= sample, color = sample)) + 
    geom_violin(scale = 'width', alpha = 0.2) + 
    geom_jitter(size = 0.1, alpha = 0.5, color = 'black') +
    scale_fill_discrete() + 
    scale_y_log10() + 
    geom_boxplot(width=0.2, outlier.size = 0, fill = 'white', color = 'black', coef = 0, outlier.color = NA) +
    geom_hline(yintercept = cutoff.mitoRatio) + 
    ylab('Feature counts') +
    xlab('') + 
    theme(legend.position = 'right')

qc.mitoRatio.density + qc.mitoRatio.violin + 
  plot_annotation(
    tag_levels = 'A') 

Summary

Summary plotting read counts vs. number of genes, colored by ratio of mitochondrial genes.

#tag

# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
qc.combined = sc.tagged@meta.data %>% 
    ggplot(aes(x=nCount_RNA, y=nFeature_RNA)) + 
        geom_jitter(aes(color=mitoRatio), size = 1) + 
        scale_colour_gradient(low = "gray90", high = "black", limits = c(0,cutoff.mitoRatio),na.value = 'red') +
      stat_smooth(method=lm) +
        scale_x_log10() + 
        scale_y_log10() + 
        theme_classic() +
        geom_vline(xintercept = cutoff.reads) +
        geom_hline(yintercept = cutoff.features) +
      facet_wrap(~sample)
qc.combined + plot_annotation(tag_levels = 'A') 

Filtering

Filtering for high-quality cells is done according to cutoffs stated above.

#tag

sc.filt = sc.tagged %>% 
            subset(nFeature_RNA > cutoff.features) %>% 
            subset(nCount_RNA > cutoff.reads) %>% 
            subset(mitoRatio < cutoff.mitoRatio) 

Dimensional reduction

Normalization

Count data is scaled to 10.000 counts/cell and log2p normalized.

#tag

sc.norm = sc.filt %>% 
          NormalizeData(normalization.method = "LogNormalize", scale.factor = 1e4) %>%
          ScaleData()

Variable genes

2000 most variable genes are selected for downstream analysis.

#tag

sc.norm = FindVariableFeatures(sc.norm)
VariableFeaturePlot_scCustom(sc.norm,label = TRUE) 

PCA

Data is normalized and PCA calculated for 2000 most variable genes. Elbow plot shows %-variance explained by each PC. There is no clear shoulder. 30 PCs will be used for downstream analysis.

#tag

sc.norm = RunPCA(sc.norm)
ElbowPlot(sc.norm,ndims = 50)

UMAP

For visualization, UMAP is calculated using the first 30 PC’s and default parameters (n.neighbors = 30, min.dist = 0.3, seed.use = 42, etc.). Plotted below colored by sample.

#tag

sc.norm = sc.norm %>% RunUMAP(dims = 1:30,n.neighbors = 30,min.dist = 0.3, seed.use = 42)
DimPlot(sc.norm,group.by = 'sample', split.by = 'sample')&NoAxes()

Unsupervised clustering

Nearest-neighbor and shared nearest neighbor graphs are constructed using the first 30 dimensions of PCA and standard parameters (k=20, nn.method = ‘annoy’, annoy.metric = ‘euclidean’, prune.SNN = 1/15, n.trees = 50,…) Cells are clustered by modularity optimization on the Louvain algorithm (Waltman and van Eck (2013). Standard parameters are used (resolution = 0.8).

#tag

sc.norm = sc.norm %>% FindNeighbors(dims = 1:30,k=20, nn.method  = 'annoy', annoy.metric = 'euclidean', prune.SNN  = 1/15, n.trees = 50)
sc.norm = sc.norm %>% FindClusters(algorithm = 1)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 10710
Number of edges: 364263

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8374
Number of communities: 20
Elapsed time: 1 seconds
DimPlot_scCustom(sc.norm)&NoAxes()

Known markers

In order to assign cell types two approaches are used jointly: (1) Plotting of canonical immune cells markers (2) Calculating marker genes of individual clusters and manual search in databases with cell type annotations (proteinatlas.org, genecards.org)

CD45+ immune cells

Plotting Ptprc (CD45) expression shows low background from CD45- cells. Clusters 16 and 17 look like contamination.

#tag

clPlot = DimPlot_scCustom(sc.norm)&NoAxes()
#Ptprc: CD45+, the cells that don't express are background 
FeaturePlot(sc.norm, features = c('Ptprc')) + clPlot + plot_layout(ncol = 2)

CD14+ Monocytes/Macrophages/Microglia

Plotting Cd14,C1qa,Aif1 (IBA1), Trem2 shows as signal both in macrophages and microglia

#tag

wrap_plots(FeaturePlot(sc.norm, features = c('Cd14','C1qa','Aif1','Trem2'))&NoAxes())+clPlot

Plotting P2ry12 and Tmem 119 as markers for Microglia. Both are fairly specific to Clusters 4,5,9. C1qa and Cx3cr also have higher expression in these clusters.

#tag

wrap_plots(FeaturePlot(sc.norm, features = c('P2ry12','Tmem119','C1qa','Cx3cr1'))&NoAxes())+clPlot

Plotting Fcgr1 (CD64), Ccr2, Cxcr4 as markers for macrophages. They are all strongest in Clusters 0,1,2,3,13

#tag

#macrophages: Fcgr1: CD64, Cx3cr1
wrap_plots(FeaturePlot(sc.norm, features = c('Fcgr1','Ccr2','Cxcr4'))&NoAxes())+clPlot

CD3+ T-cells

Plotting Cd3g, Cd3d (CD3) and Trbc1, Trbc2 (TCR) shows that clusters 7,6,12 are T-Cells. (1) Cd4 (CD4) and Cd8a (CD8) are often not well detected in scRNAseq data but can see both populations present (2) NK cells are defined by absence of CD3 and e.g. Klrk1 (CD314), Klrb1 (CD161), Ncr1 (NKp46) we can see a population that is CD3-,CD314+,NKp46+ in one of the T-cell clusters

#tag

#T Cells Cd3g/Cd3d: CD3
wrap_plots(FeaturePlot(sc.norm, features = c('Cd3g','Cd3d','Trbc1', 'Trbc2'))&NoAxes())+clPlot 

CD19+ B-cells

Plotting Cd19 (CD19), Ms4a1 (CD20), Cd79a+Cd79b (CD79) shows B-cells in Cluster 18. Looks like a very small percentage, there might be some selection bias.

#tag

wrap_plots(FeaturePlot(sc.norm, features = c('Cd19','Ms4a1','Cd79a','Cd79b'))&NoAxes())+clPlot

Conventional DCs

Plotting Itgax (CD11c), H2-Aa, H2-ab1, H2-D1 (MHCII) as markers for conventional dendritic cells (DC). Both are not very specific, strongest in Cluster 10. Additionally, Xcr1 and Clec9a (CD370) are supposed to be markers for DC presenting to CD8+ T-cells which are also concentrated in Cluster 10.

#tag

wrap_plots(FeaturePlot(sc.norm, features = c('Itgax','H2-Aa','Xcr1','Clec9a'))&NoAxes())+clPlot

Siglec-H+ plasmacytoid DCs

Plotting Siglech (Siglec-H), which also marks Microglia and Bst2 (CD317) as Markers of Plasmacytoid DCs. Both are strongest in Cluster 14.

#tag
wrap_plots(FeaturePlot(sc.norm, features = c('Siglech','Bst2'),ncol = 1) & NoAxes()) + clPlot

Granulocytes

Expression of Ngp (Neutrophilic granule protein), S100a8 and Retnlg identfy Cluster 15 as granulocytes –> these are neutrophils that are important for EAE additional markers: Cxcr2: main chemokine receptor for neutrophils, Ly6g

#tag
wrap_plots(FeaturePlot(sc.norm, features = c('S100a8','Ngp','Retnlg'))&NoAxes()&NoAxes())+clPlot

Identifying remaining clusters

Clusters 16, 11 and 17 still remain unclear. Checking by calculating unqiue clustermarkers:

#tag
FindMarkers(sc.norm, group.by = 'seurat_clusters',ident.1 = 17, logfc.threshold = 2, min.diff.pct = 0.7, only.pos = TRUE)
                p_val avg_log2FC pct.1 pct.2     p_val_adj
Tm4sf1   0.000000e+00   2.805910 0.707 0.002  0.000000e+00
Id3      0.000000e+00   2.596465 0.747 0.016  0.000000e+00
Sparcl1  0.000000e+00   3.010900 0.813 0.003  0.000000e+00
Ptn      0.000000e+00   3.544829 0.787 0.008  0.000000e+00
Nedd4    0.000000e+00   2.546426 0.773 0.014  0.000000e+00
Igfbp7  5.368829e-211   4.577987 0.813 0.047 1.667183e-206
Tsc22d1 6.104530e-184   2.607984 0.813 0.052 1.895640e-179
Sparc    1.052539e-68   2.639328 0.987 0.220  3.268449e-64

Cluster 17 seems to be oligendrocytes (Mag, Klk6, Ptgds)

#tag
FindMarkers(sc.norm, group.by = 'seurat_clusters',ident.1 = 16, logfc.threshold = 2, min.diff.pct = 0.7, only.pos = TRUE)
              p_val avg_log2FC pct.1 pct.2   p_val_adj
S100a9 0.000000e+00   8.055196 0.901 0.013 0.00000e+00
S100a8 9.247286e-81   7.028866 0.938 0.228 2.87156e-76
sc.norm %>% FeaturePlot('Cldn5')

Tm4sf1 points to endothelial cells for cluster 16, which can be confirmed by looking at Cldn5

One cluster remains unannotated. By calculating marker genes we find Fscn1 and Ccr7 among the top markers which are annotated in literature as highly specific markers for activation of dendritic cells.

#tag
sc.norm %>% FindMarkers(ident.1 = '11',logfc.threshold = 0.5, only.pos = TRUE,min.diff.pct = 0.7)
                  p_val avg_log2FC pct.1 pct.2     p_val_adj
Fscn1      0.000000e+00   4.467155 0.905 0.058  0.000000e+00
Ramp3      0.000000e+00   2.848859 0.709 0.008  0.000000e+00
Ccr7       0.000000e+00   4.102384 0.921 0.007  0.000000e+00
Serpinb6b  0.000000e+00   3.729916 0.852 0.034  0.000000e+00
Traf1     1.047943e-240   2.608671 0.820 0.100 3.254177e-236
Cytip     6.252751e-192   2.987493 0.931 0.183 1.941667e-187
Tmem123   4.074869e-170   3.310465 0.942 0.241 1.265369e-165

Assigning preliminary cell type labels.

#tag
#assigning original cell type labels based on old seurat clusters
sc.norm$clusters = sc.norm$seurat_clusters
Idents(sc.norm) = sc.norm$clusters
sc.norm = RenameIdents(sc.norm,
'0' = 'Monocytes/Macrophages',
'1' = 'Monocytes/Macrophages',
'2' = 'Monocytes/Macrophages',
'3' = 'Monocytes/Macrophages',
'13' = 'Monocytes/Macrophages',
'4' = 'Microglia',
'5' = 'Microglia',
'9' = 'Microglia',
'6' = 'T-Cells',
'7' = 'T-Cells',
'12' = 'T-Cells',
'8' = 'Cycling cells',
'10' = 'Conventional DC',
'11' = 'Activated DC',
'14' = 'Plasmacytoid DC',
'15' = 'Granulocytes',
'16' = 'Endothelial cells',
'17' = 'Glia cells',
'18' = 'B-Cells')
sc.norm$celltypes = Idents(sc.norm)

Cell cycle

Cell cycle phase scoring (using an enrichment approach adapted from Tirosh et al. 2016) shows actively dividing cells only in Cluster 8 which is recapitulated by expression of canonical markers for proliferating cells Top2a and Mki67.This cluster shows markers for both T-cells and microglia so we have to regress out cell cycle phase to put each cell type in their corresponding clusters.

cell_cycle_genes = read.csv('Mus_musculus_CC.csv')

# Download the appropriate Ensembldb database
annotations = read.csv('geneInfoEns105.csv')
# Get gene names for Ensembl IDs for each gene
cell_cycle_markers = left_join(cell_cycle_genes, annotations, by = c("geneID" = "gene_id"))

# Acquire the S phase genes
s_genes = cell_cycle_markers %>%
        dplyr::filter(phase == "S") %>%
        pull("gene_name")
        
# Acquire the G2M phase genes        
g2m_genes = cell_cycle_markers %>%
        dplyr::filter(phase == "G2/M") %>%
        pull("gene_name")
# Perform cell cycle scoring
sc.norm = CellCycleScoring(sc.norm,
                           g2m.features = g2m_genes,
                           s.features = s_genes)


# Visualize the UMAP, grouping by cell cycle phase
wrap_plots(DimPlot(sc.norm,group.by= "Phase", split.by = 'Phase',ncol = 1)&NoAxes()) + wrap_plots( FeaturePlot(sc.norm,features = c('Top2a','Mki67'), ncol = 1)&NoAxes()) 
scCC <- ScaleData(sc.norm, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(sc.norm))

scCC = scCC %>% 
        RunPCA %>% 
        RunUMAP(dims = 1:30,n.neighbors = 30,min.dist = 0.3, seed.use = 42)
SaveH5Seurat(scCC,paste0(outdir,'scCC.h5seurat',overwrite = TRUE))

Now cycling cells are closer to the microglia and t-cell clusters

#tag
#Here we can see that 'cycling cells' cluster is now split mostly between Microglia and T-Cells 
scCC %>% DimPlot_scCustom(label=TRUE,group.by = 'celltypes') + 
scCC %>% FeaturePlot(c('Top2a','Mki67'))

## Doublet & contamination removal

In order to improve the clustering we will also remove doublet cells using scDoubletFinder and filter out any clusters that are not immune cells. As scDoubletFinder relies on randomization, the clustering results downstream of that will not be 100% reproducible via this notebook. Total number of clusters and assignment of cells to (unsupervised) clusters will vary slightly. Therefore we provide the code we used and will then load a list containing the filtered cell barcodes as they were output when we ran the function at the end of the snippet in order to generate exactly the same figures as presented in the paper.

#tag

sce = as.SingleCellExperiment(scCC)
sce.dbl = scDblFinder(sce)

(plotUMAP(sce,colour_by = 'celltypes') + 
 plotUMAP(sce.dbl,colour_by = 'scDblFinder.score')) / 
(plotUMAP(sce.dbl,colour_by = 'nCount_RNA') + 
 plotUMAP(sce.dbl,colour_by = 'nFeature_RNA')) 

#tag

#scdblfinder also predicts which cells are singlets absolutly 
plotUMAP(sce.dbl,colour_by = 'scDblFinder.class')

#and we can use this information to subset the seurat object 
sce.noDbl = sce.dbl[,sce.dbl$scDblFinder.class == 'singlet']
scCC.noDbl = scCC[,sce.dbl$scDblFinder.class == 'singlet']


#after throwing out doublets recompute dimensional reductions and clustering 
scCC.noDblRE = scCC.noDbl %>% 
                RunPCA %>%  
                RunUMAP(dims = 1:30,n.neighbors = 30,min.dist = 0.3, seed.use = 42) %>%  
                FindNeighbors(dims =  1:30) %>% 
                FindClusters(resolution = 1.1) %>% 
                BuildClusterTree()
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9095
Number of edges: 309425

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7922
Number of communities: 21
Elapsed time: 1 seconds
#tag

scCC.noDblRE.clean  = scCC.noDblRE[,scCC.noDblRE$seurat_clusters %!in% c(19,20)]
#scCC.noDblRE.clean %>% colnames %>% 
#                       as.data.frame() %>% 
#                       write_csv(file = 'scCC.noDblRE.clean_barcodes.csv')
filtered_bc = read_csv(file = 'scCC.noDblRE.clean_barcodes.csv')$.
scCC.noDblRE.clean = scCC[,filtered_bc]
scCC.noDblRE.clean = scCC.noDblRE.clean %>% 
                      RunPCA %>%  
                      RunUMAP(dims = 1:30,n.neighbors = 30,min.dist = 0.3, seed.use = 42) %>%  
                      FindNeighbors(dims =  1:30) %>% 
                      FindClusters(resolution = 0.8) %>% 
                      BuildClusterTree() 
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8954
Number of edges: 307147

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8154
Number of communities: 16
Elapsed time: 1 seconds
scCC.noDblRE.clean %>% subset(celltypes %!in% levels(scCC.noDblRE.clean$celltypes)[9])
An object of class Seurat 
31053 features across 8953 samples within 1 assay 
Active assay: RNA (31053 features, 2000 variable features)
 2 dimensional reductions calculated: pca, umap
scCC.noDblRE.clean %>% DimPlot_scCustom(label = TRUE) + scCC.noDblRE.clean %>% DimPlot_scCustom(group.by = 'celltypes', label = TRUE)

Now we assign final cell type labels.

#tag

scCC.noDblRE.clean$clusters = scCC.noDblRE.clean$seurat_clusters
Idents(scCC.noDblRE.clean) = scCC.noDblRE.clean$clusters
scCC.noDblRE.clean = RenameIdents(scCC.noDblRE.clean,
'0' = 'Macrophages',
'1' = 'Macrophages',
'2' = 'Macrophages',
'3' = 'Macrophages',

'4' = 'Microglia',
'5' = 'Microglia',
'12' = 'Microglia',
'14' = 'Microglia',

'6' = 'T-Cells',
'8' = 'T-Cells',
'11' = 'T-Cells',

'7' = 'Conventional DC',

'9' = 'Activated DC',

'10' = 'Plasmacytoid DC',

'13' = 'Granulocytes',

'15' = 'B-Cells')
scCC.noDblRE.clean$celltypesNew = Idents(scCC.noDblRE.clean)
scCC.noDblRE.clean %>% DimPlot_scCustom(group.by = 'celltypesNew',label = TRUE) 

#tag
SaveH5Seurat(scCC.noDblRE.clean, file = paste0(outdir,'/MQ_EAE_processed.h5seurat'),overwrite = TRUE)

Subclustering analysis

  sc.IC = LoadH5Seurat(file = paste0(indir,'GSE222063_MQ_EAE_processed.h5seurat'))
  sc.mg = sc.IC %>% subset(celltypesNew == 'Microglia')
  sc.mac = sc.IC %>% subset(celltypesNew == 'Macrophages')

Macrophages

Integration & reanalysis of Macrophages

sc.split = SplitObject(sc.mac,split.by = 'sample')

# normalize and identify variable features for each dataset independently
sc.split <- lapply(sc.split,function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(sc.split)

# find features for integration
mac.anchors <- FindIntegrationAnchors(sc.split, anchor.features = features)

# create integrated assay
mac.combined <- IntegrateData(anchorset = mac.anchors)

# reanalyse integrated data 
DefaultAssay(mac.combined) = 'integrated'
mac.combined = mac.combined %>%       ScaleData() %>%
                                      RunPCA(npcs = 30) %>% 
                                      RunUMAP(dims = 1:15) %>% 
                                      FindNeighbors(dims = 1:15) %>% 
                                      FindClusters(resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6102
Number of edges: 202931

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7706
Number of communities: 6
Elapsed time: 0 seconds

Identify contaminating cell types

genes_celltypes = list(
  genes_b = c('Cd19','Ms4a1'),
  genes_gran = c('S100a8','Retnlg'),
  genes_pdc = c('Siglech','Bst2'),
  genes_cdcAct = c('Fscn1','Ccr7'),
  genes_cdc = c('Itgax','Btla'),
  genes_t = c('Cd3g','Trbc1'),
  genes_mg = c('Tmem119','Cx3cr1'),
  gene_mac = c('Arg1','Cd68')
)
mac.combined %>% DimPlot_scCustom(label = TRUE) / wrap_plots(
  lapply(genes_celltypes,function(x){
    FeaturePlot_scCustom(mac.combined,x,colors_use = viridis_light_high) + 
      theme(legend.position = 'right',
            legend.box = 'horizontal',
            legend.title = element_text(size = 4))
  })) 

Based on marker genes for the individual cell types identified in our dataset no single cluster seems to contain a contamination.

Identification of specific macrophage cell states

Cell states were analyzed based on discussion and marker gene expression reported in Giladi et al. 2021. Here they state, that Ly6c+ monocytes (human: CD14hi-CD16lo) carry chemokine receptors that allow tissue infiltration and subsequent generation of different monocyte-derived cells. After infiltration they gain expression of MHCII-related genes. Monocytes expressing Ccr2 were identified as main drivers of EAE pathogenesis.

In their single-cell analysis they could identify a more fine-grained classification of different cell states:

  • One cluster showed expression of interferon responsive genes and labeled Ifi2+
  • Two subsets expressed Arg1, Apoc2 and C1qb and were designated as Arg1+ I and
  • A cluster was characterized by expression of Nos2, Gpnmb, Arg1 and Fabp5 and designated Nos2+
  • One population that expressed inflammatory genes such as Saa3, Plac8 and Gbp2 was called Saa3+
  • A cluster expressing Cxcl9, Cxcl10 and Il1b was designated as Cxcl10+
genes_mac = list(
  genes_general = c('Ly6c2', 'Ccr2','H2-Ab1','H2-D1','H2-Eb1','Top2a','Mki67'),
  genes_arg1 = c('Arg1','Ecm1','Tmem37','Apoc2','C1qb','Itgb5','Itga6','Anxa3','Ptgs1','C3ar1'),
  genes_arg2 = c('Cd14','Arg1','Thbs1','Mmp19','Klf2','Ecm1','Lmna'),
  genes_nos2 = c('Gpnmb','Fabp5','Nos2','Slc7a2','Slc7a11','Fth1','Cstb'),
  genes_ifit1 = c('Ifit1','Ifit2','Ifit3', 'Usp18','Irf7'),
  genes_saa3 = c('Saa3','Saal1','Tgm2','Gbp2','Upp1','Plac8','Clec5a'),
  genes_cxcl10 = c('Cxcl10','Cxcl9','Cxcl11','Socs1','Gbp2','Tgm2','Nod1','Upp1','Il1b','Smox'))

mac_states_plots = lapply(genes_mac,function(x){
                      wrap_plots(design = 'ABBB
                                           #BBB',
                        DimPlot_scCustom(seurat_object = mac.combined,
                                         group.by = 'seurat_clusters',
                                         label = TRUE,
                                         label.size = 10), 
                        FeaturePlot_scCustom(seurat_object = mac.combined,
                                             features = x,
                                             colors_use = viridis_light_high,) & 
                        theme(legend.position = 'right',
                              legend.box = 'horizontal',
                              legend.title = element_text(size = 4)))})
iwalk(mac_states_plots, ~ {
  cat('#### ', .y, '\n\n')
  
  print(.x)
  
  cat('\n\n')
  
})

genes_general

genes_arg1

genes_arg2

genes_nos2

genes_ifit1

genes_saa3

genes_cxcl10

While there is no perfect match of the markers, we can still see a good correspondance our subclustering and the respective substate markers described by Giladi et al. It seems not to be possible to retrieve the the two individual Arg1 populations. according to these markers, our clusters correspond to: - 0: Arg1+ - 1: Arg1+ - 2: Nos2+ - 3: Cxcl10+ - 4: Saa3+ (by the markers other than Saa3) - 5: Ifi2+

rename subtypes

mac.combined$clusters = mac.combined$seurat_clusters
Idents(mac.combined) = mac.combined$clusters
mac.combined = RenameIdents(mac.combined,
'0' = 'Arg1+',
'1' = 'Arg1+',
'2' = 'Nos2+',
'3' = 'Cxcl10+',
'4' = 'Saa3+', # (by other markers,)
'5' = 'Ifi2+')
mac.combined$macTypes = Idents(mac.combined)
SaveH5Seurat(mac.combined,filename = paste0(outdir,'mac.combined.h5seurat'),overwrite = TRUE)

Microglia

Integration of Microglia

sc.split = SplitObject(sc.mg,split.by = 'sample')

# normalize and identify variable features for each dataset independently
sc.split <- lapply(sc.split,function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(sc.split)

mg.anchors <- FindIntegrationAnchors(sc.split, anchor.features = features)
# this command creates an 'integrated' data assay
mg.combined <- IntegrateData(anchorset = mg.anchors)

DefaultAssay(mg.combined) = 'integrated'
mg.combined = mg.combined %>%         ScaleData() %>%
                                      RunPCA(npcs = 30) %>% 
                                      RunUMAP(dims = 1:15) %>% 
                                      FindNeighbors(dims = 1:15) %>% 
                                      FindClusters(resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1446
Number of edges: 55041

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7837
Number of communities: 7
Elapsed time: 0 seconds

check for contamination in Microglia sample

mg.combined %>% DimPlot_scCustom(label = TRUE) / wrap_plots(
  lapply(genes_celltypes,function(x){
    FeaturePlot_scCustom(seurat_object = mg.combined,
                         features = x,
                         colors_use = viridis_light_high) + 
      theme(legend.position = 'right',
            legend.box = 'horizontal',
            legend.title = element_text(size = 4))
  })) 

markers_mg = c('Bhlhe41','Gpr34', 'Hexb', 'Olfml3', 'P2ry12','P2ry13','Sall1','Serpine2','Siglech','Sparc')
mg.combined %>% DimPlot_scCustom(label = TRUE) / wrap_plots(
  lapply(markers_mg,function(x){
    FeaturePlot_scCustom(seurat_object = mg.combined,
                         features = x,
                         colors_use = viridis_light_high) + 
      theme(legend.position = 'right',
            legend.box = 'horizontal',
            legend.title = element_text(size = 4))
  })) 

clean contamiting cells

Cluster 6 seems to be a contamination as there are several markers from other celltypes expressed Cluster 5 seems to be a contamination as well as expression of sevarl Microglia markers is missing

mg.combined = mg.combined %>%     subset(seurat_clusters %!in% c(5,6)) %>% 
                                  ScaleData() %>%
                                  RunPCA(npcs = 30) %>% 
                                  RunUMAP(dims = 1:15) %>% 
                                  FindNeighbors(dims = 1:15) %>% 
                                  FindClusters(resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1382
Number of edges: 54150

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7769
Number of communities: 5
Elapsed time: 0 seconds

Identification of specific Microglia cell states

The following analysis of microglia subclusters is based on the findings by Jordao et al. 2019. - In their analysis all MG populations expressed Bhlhe41, Gpr34, Hexb, Olfml3, P2ry12, P2ry13, Sall1, Serpine2, Siglech, and Sparc. - But daMG clusters showed lower expression of P2ry12, Maf,Slc2a5 and higher expression of Ccl2, Cxcl10, Ly86,and Mki67,indicating a proliferative capacity of daMG alongside the production of chemokines. - From the paper by Jordao et al.:The most inflammatory disease-associated microglial subsets (daMG2, daMG3, and daMG4) strongly downregulated the core microglial genes P2ry12, Tmem119,and Selplg and up-regulated Ly86. Both P2RY12 and TMEM119 immunoreactivities were clearly down-regulated within the core of spinal cord lesions, whereas CD162 (encoded by Selplg) was only weakly reduced. By contrast, microglial MD-1 (encoded by Ly86) was strongly up-regulated in the lesions. Because of their transcriptional profile and their P2RY12loTMEM119loMD-1hi phenotype, we determined that only daMG2, daMG3, and daMG4 localized within the lesion sites.

genes_mg = list(
  genes_mg = c('Bhlhe41','Gpr34', 'Hexb', 'Olfml3', 'P2ry12','P2ry13','Sall1','Serpine2','Siglech','Sparc'),
  genes_daMG_Up = c('Ccl2', 'Cxcl10','Ly86','Mki67'),
  genes_daMG_Down = c('P2ry12','Tmem119', 'Selplg','Maf', 'Slc2a5'),
  genes_hMG = c('Gpr34','Siglech','P2ry12','Tmem119'),
  genes_repMG = c('Top2a','Mki67'),
  genes_daMG1 = c('Cd83','Ctsd','Cd81'),
  genes_daMG2 = c('Ctsb','Apoe','B2m','Cst7','Cd74','Ly86'),
  genes_daMG3 = c('Cxcl10','Tnf','Ccl4','Il1a','B2m','Ly86'),
  genes_daMG4 = c('Itm2b','Ctss','Ccl5','Naaa','Ly86'))
mg_states_plots = lapply(genes_mg,function(x){
                      wrap_plots(design = 'ABBB
                                           #BBB',
                        DimPlot_scCustom(seurat_object = mg.combined,
                                         group.by = 'seurat_clusters',
                                         label = TRUE,
                                         label.size = 10), 
                        FeaturePlot_scCustom(seurat_object = mg.combined,
                                             features = x,
                                             colors_use = viridis_light_high,) & 
                        theme(legend.position = 'right',
                              legend.box = 'horizontal',
                              legend.title = element_text(size = 4)))})
iwalk(mg_states_plots, ~ {
  cat('#### ', .y, '\n\n')
  
  print(.x)
  
  cat('\n\n')
  
})

genes_mg

genes_daMG_Up

genes_daMG_Down

genes_hMG

genes_repMG

genes_daMG1

genes_daMG2

genes_daMG3

genes_daMG4

  • All cells express most of the the micglia markers
  • Clusters 1,2,4 express more daMG associated genes
  • Cluster 0 containss hMG (has stronger expression in all genes downregulated in daMG and less expression in genes upregulated in daMG)
  • Cluster 3 contains Microglia with an active cell cylce
  • Genes that mark daMG1 don’t show variation across our data set so daMG1 population is probably not captured
  • Cluster 2 is most similar to daMG2
  • Cluster 1 seems to be an intermediate between hMG and daMG2, but also has some dMG3 genes expressed
  • Cluster 4 is most similar to daMG3
  • the daMG4 signature doesn’t show as a clear subpopulation, might be not well captured here

assign mg type names

mg.combined$clusters = mg.combined$seurat_clusters
Idents(mg.combined) = mg.combined$clusters
mg.combined = RenameIdents(mg.combined,
'0' = 'hMG',
'1' = 'hMG/daMG',
'2' = 'daMG2',
'3' = 'replicating MG',
'4' = 'daMG3')
mg.combined$mgTypes = Idents(mg.combined)
SaveH5Seurat(mg.combined,paste0(outdir,'mg.combined.h5seurat'),overwrite = TRUE)

Figures

The following blocks generate the figures as found in the paper.

Load datasets

Initialize figures

#set graphical plot paramters 
cols = c('#D4D4D4','#CCF9E8')
colsDots = c('black','#008000')
jWidth = 0.3
dWidth = 0.6
pSize = 0.2
pAlpha = 0.3 

theme_umap = theme(axis.line = element_blank(), 
                              axis.ticks = element_blank(),
                              axis.text = element_blank(),
                              axis.title.x = element_text(hjust = 0),
                              axis.title.y = element_text(hjust = 0)) 

Overview

sc.IC$sample = factor(sc.IC$sample, levels = c('EAE_IC_WT','EAE_IC_HMBP'))
sc.IC$celltypesNew = sc.IC$celltypesNew %>% recode(Monocytes = 'Activated DC')

#changed SingleDimPlot using trace('SingleDimPlot',edit = TRUE) with geom_point(..., stroke = 0) to get finer detail 
umapCelltypes = sc.IC %>% DimPlot_scCustom(group.by = 'celltypesNew',
                                           label = FALSE,
                                           pt.size = 0.5) & 
                            NoAxes() &
                            theme(legend.position = c(0.45,0.10),
                                 # legend.position = 'none',
                                  plot.title = element_blank(),
                                  legend.text = element_text(size = 6),
                                  legend.key.size = unit(0.6,'lines'),
                                  plot.margin = margin(0,0,0,0))  &
                            guides(color = guide_legend(ncol = 2,override.aes = list(size = 2)))
umapSamples = sc.IC %>% 
                  DimPlot(split.by = 'sample', 
                  group.by = 'sample', 
                  cols = colsDots,
                  pt.size = 0.2)  & 
                  NoAxes() & 
                  theme(legend.position = 'none',
                        plot.title = element_blank(),
                        strip.text = element_text(size = 8))

umap = umapCelltypes + inset_element(umapSamples,top = 1,
                                                 right = 1,
                                                 left = 0.4,
                                                 bottom = 0.5)

ggsave(paste0(figdir,'umap.svg'),plot = umap,dpi = 600,width = 9.22,height = 8.2,units = 'cm')
umap

Gene expression plots

VlnPlotTheme = theme(panel.background = element_blank(),
                     axis.line.x = element_line(),
                     text=element_text(size=8),
                     axis.text = element_text(size=8,color = 'black'))

#define datasets
sc.t  = sc.IC %>% subset(celltypesNew == 'T-Cells')
sc.mac = sc.IC %>% subset(celltypesNew == 'Macrophages')
sc.mg = sc.IC %>% subset(celltypesNew == 'Microglia')

#define markers
#CD25 = Il2ra, CD69, CD71 = Tfrc,HLA-DR,Cd137 = Tnfrsf9, CD107a = Lamp1, 
tActMarker = c('Il12a','Ifng','Spp1','Il17a','Cd44','Cd69','Ccl2')

mgActMarker = c('Hexb','Tmem119','P2ry12','Axl','Cybb','Cd68','H2-Aa','Cd74','Cd86','Trem2','Tyrobp','Apoe','Itgax','Lgals3')

cytokines = c('Ccl5','Ccl6','Cxcl10','Il6','Csf2','Tnf')

macActGenes = c('Ly6c2','Ccr2','Fn1','Cd44')
macHomGenes = c('Fcgr1','Ms4a7','Mertk')
macGenes = c(macHomGenes,macActGenes)

Microglia

#Microglia 
dfMgWt = FetchData(sc.mg,vars = mgActMarker, slot = 'data',cells = colnames(sc.mg)[sc.mg$sample =='EAE_IC_WT']) %>% gather() %>% mutate(genotype = 'WT')
dfMgHmbp = FetchData(sc.mg,vars = mgActMarker, slot = 'data',cells = colnames(sc.mg)[sc.mg$sample =='EAE_IC_HMBP']) %>% gather() %>% mutate(genotype = 'hMBP')
dfMg = rbind(dfMgWt,dfMgHmbp)

mgPlot = ggplot(dfMg, aes(x = factor(key,level = mgActMarker), 
                          y = value,
                          fill = factor(genotype,levels = c('WT','hMBP')))) +
                geom_split_violin(scale = 'width') + 
                geom_jitter(position = position_jitterdodge(
                                          jitter.width = jWidth, 
                                          dodge.width = dWidth),
                            size = pSize,
                            alpha = pAlpha,
                            stroke = 0,
                            aes(color = factor(genotype,levels = c('WT','hMBP')))) + 
                scale_fill_discrete(type = cols) + 
                scale_color_discrete(type = colsDots) + 
                ylim(0,7) +
                theme(axis.text.x = element_text(angle = 45, hjust = 1),
                      legend.position = 'none',
                      axis.line.y = element_line(),
                      axis.title.y = element_text(hjust = 1)) + 
                xlab('Microglia activation genes') + 
                ylab('Log1p-normalized expression') + 
                VlnPlotTheme

T-cells

#T-Cells
dfTWt = FetchData(sc.t,vars = tActMarker, slot = 'data',cells = colnames(sc.t)[sc.t$sample =='EAE_IC_WT']) %>% gather() %>% mutate(genotype = 'WT')
dfTHmbp = FetchData(sc.t,vars = tActMarker, slot = 'data',cells = colnames(sc.t)[sc.t$sample =='EAE_IC_HMBP']) %>% gather() %>% mutate(genotype = 'hMBP')
dfT = rbind(dfTWt,dfTHmbp)

tPlot = ggplot(dfT, aes(x = factor(key,level = tActMarker), y = value,fill = factor(genotype,levels = c('WT','hMBP')))) + 
              geom_split_violin(scale = 'width') + 
                geom_jitter(position = position_jitterdodge(
                                          jitter.width = jWidth, 
                                          dodge.width = dWidth),
                            size = pSize,
                            alpha = pAlpha,
                            stroke = 0,
                          aes(color = factor(genotype,levels = c('WT','hMBP')))) + 
              scale_fill_discrete('Genotype',type = cols) + 
              scale_color_discrete('Genotype',type = colsDots) + 
              ylim(0,7) + 
              theme(axis.text.x = element_text(angle = 45, hjust = 1),
                    axis.title.y = element_blank(),
                    axis.ticks.y = element_blank(),
                    axis.text.y = element_blank(),
                    axis.line.y.left = element_blank(),
                    legend.position = 'none') + 
              xlab('T-Cell activation genes') + 
              VlnPlotTheme

Cytokines

dfCkWt = FetchData(sc.IC,vars = cytokines, slot = 'data',cells = colnames(sc.IC)[sc.IC$sample =='EAE_IC_WT']) %>% gather() %>% mutate(genotype = 'WT')
dfCkHmbp = FetchData(sc.IC,vars = cytokines, slot = 'data',cells = colnames(sc.IC)[sc.IC$sample =='EAE_IC_HMBP']) %>% gather() %>% mutate(genotype = 'hMBP')
dfCk = rbind(dfCkWt,dfCkHmbp)

ckPlot = ggplot(dfCk, aes(x = factor(key,level = cytokines), 
                          y = value,
                          fill = factor(genotype,levels = c('WT','hMBP')))) +
                geom_split_violin(scale = 'width') + 
                geom_jitter(position = position_jitterdodge(
                                          jitter.width = jWidth, 
                                          dodge.width = dWidth),
                            size = pSize,
                            alpha = pAlpha,
                            stroke = 0,
                            aes(color = factor(genotype,levels = c('WT','hMBP')))) + 
                scale_fill_discrete('Genotype',type = cols) + 
                scale_color_discrete('Genotype',type = colsDots) + 
                ylim(0,7) +
                theme(axis.text.x = element_text(angle = 45, hjust = 1),
                      legend.position = 'right',
                      axis.line.y = element_blank(),
                      axis.text.y = element_blank(),
                      axis.title.y = element_blank(),
                      axis.ticks.y = element_blank()) + 
                xlab('Cytokines') + 
                ylab('Log1p-normalized expression') +  #as ln(counts/counts per cell * 10e4 +1
                VlnPlotTheme

Macrophages

dfMacWt = FetchData(sc.mac,vars = macGenes, slot = 'data',cells = colnames(sc.mac)[sc.mac$sample =='EAE_IC_WT']) %>% gather() %>% mutate(genotype = 'WT')
dfMacHmbp = FetchData(sc.mac,vars = macGenes, slot = 'data',cells = colnames(sc.mac)[sc.mac$sample =='EAE_IC_HMBP']) %>% gather() %>% mutate(genotype = 'hMBP')
dfMac = rbind(dfMacWt,dfMacHmbp)

macPlot = ggplot(dfMac, aes(x = factor(key,level = macGenes), 
                          y = value,
                          fill = factor(genotype,levels = c('WT','hMBP')))) +
                geom_split_violin(scale = 'width') + 
                geom_jitter(position = position_jitterdodge(
                                          jitter.width = jWidth, 
                                          dodge.width = dWidth),
                            size = pSize,
                            alpha = pAlpha,
                            stroke = 0,
                            aes(color = factor(genotype,levels = c('WT','hMBP')))) + 
                scale_fill_discrete('Genotype',type = cols) + 
                scale_color_discrete('Genotype',type = colsDots) + 
                ylim(0,7) +
                theme(axis.text.x = element_text(angle = 45, hjust = 1),
                      legend.position = 'none',
                      axis.line.y = element_line(),
                      axis.title.y = element_text(hjust = 0.85 )) + 
                xlab('Macrophage activation genes') + 
                ylab('Log1p-normalized expression') +  #as ln(counts/counts per cell * 10e4 +1
                VlnPlotTheme

Output Fig. X

mgPlot %>% ggsave(paste0(figdir,'mg.svg'), plot = ., dpi = 600, width = 10, height = 4, units = 'cm')
mgPlot

(macPlot + tPlot + ckPlot) %>% ggsave(paste0(figdir,'t_mac_ck.svg'),plot = .,dpi = 600,width = 18,height = 4,units = 'cm')

(macPlot + tPlot + ckPlot)

Extended Figures

QC

themeVln = theme_classic() + 
           theme(
                  legend.position = 'none',
                  text = element_text(size = 8)
                )
pSize = 0.3
lwd = 0.2
bpWidth = 0.1
linetype = 'dashed'

qc.counts.violin = sc.tagged@meta.data %>% 
  ggplot(aes(x=genotype, y=nCount_RNA, fill= genotype, color = genotype)) + 
    geom_violin(scale = 'width',lwd = lwd) + 
    geom_jitter(size = pSize, alpha = pAlpha, stroke = 0) +
    scale_color_discrete('Genotpye', type = colsDots) + 
    scale_fill_discrete('Genotpye',type = cols)  + 
    geom_boxplot(width = bpWidth, outlier.size = 0, coef = 0, outlier.color = NA,lwd = lwd) +
    scale_y_log10() + 
    geom_hline(yintercept = cutoff.reads, linetype=linetype) + 
    ylab('Read Counts') +
    xlab('') + 
    themeVln
    
qc.features.violin = sc.tagged@meta.data %>% 
  ggplot(aes(x=genotype, y=nFeature_RNA, fill= genotype, color = genotype)) + 
    geom_violin(scale = 'width',lwd = lwd) + 
    geom_jitter(size = pSize, alpha = pAlpha, stroke = 0) +
    scale_color_discrete('Genotype', type = colsDots) + 
    scale_fill_discrete('Genotype',type = cols)  + 
    geom_boxplot(width = bpWidth, outlier.size = 0, coef = 0, outlier.color = NA,lwd = lwd) +
    scale_y_log10() + 
    geom_hline(yintercept = cutoff.features, linetype = linetype) + 
    ylab('Feature Counts') +
    xlab('') + 
    themeVln

qc.mitoRatio.violin = sc.tagged@meta.data %>% 
  ggplot(aes(x=genotype, y=mitoRatio, fill= genotype, color = genotype)) + 
    geom_violin(scale = 'width',lwd = lwd) + 
    geom_jitter(size = pSize, alpha = pAlpha, stroke = 0) +
    scale_color_discrete('Genotpye', type = colsDots) + 
    scale_fill_discrete('Genotpye',type = cols)  + 
    geom_boxplot(width = bpWidth, outlier.size = 0, coef = 0, outlier.color = NA,lwd = lwd) +
    scale_y_log10() + 
    geom_hline(yintercept = cutoff.mitoRatio, linetype=linetype) + 
    ylab('Mitochondrial Read Ratio') +
    xlab('') + 
    themeVln

    qcplot = qc.counts.violin / qc.features.violin / qc.mitoRatio.violin
qcplot

ggsave(paste0(figdir,'qc_plots.svg'),qcplot,dpi = 600,width = 4,height = 12,units = 'cm')

PCA

Plotting the biplots for first 10 PCs in pairs. No obvious separation is visible based on sample identity.

#with PCAtools to do some deeper analyses 
set.seed(42)
VarFeatNorm = sc.IC@assays$RNA@scale.data[VariableFeatures(sc.IC),]

#shuffle colnames around to avoid overplotting in the biplot
x = colnames(VarFeatNorm)
set.seed(42)
x = sample(x)
VarFeatNorm = VarFeatNorm[,x]
metaData = sc.IC@meta.data[x,]

#run PCA
p = PCAtools::pca(VarFeatNorm,rank = 10,metadata = metaData)
colr = c('grey','darkgreen')
cols = c('#D4D4D4','#CCF9E8')

pcaPlot = pairsplot(p,
                    colby = 'sample',
                    components = getComponents(p, seq_len(10)), 
                    gridlines.major = FALSE, 
                    gridlines.minor = FALSE, 
                    lab = NULL, 
                    hline = 0, 
                    vline = 0, 
                    plotaxes = FALSE,
                    pointSize = 0.1, 
                    colkey = colr) & 
        theme(plot.title = element_text(size = 8))
pcaPlot

ggsave(paste0(figdir,'pca_pairsplot.svg'),pcaPlot,dpi = 600,width = 27,height = 27,units = 'cm')

CellTypeMarkers

CellTypeMarkers = c(
                    'Cd19','Ms4a1',
                    'S100a8','Retnlg',
                    'Siglech','Bst2',
                    'Ccr7','Fscn1',
                    'Itgax','Btla',
                    'Cd3d','Trbc2',
                    'Tmem119','Cx3cr1',
                    'Arg1','Cd68')


Idents(sc.IC) = sc.IC$celltypesNew
celltypeDot = DotPlot_scCustom(sc.IC,features = CellTypeMarkers,
                               x_lab_rotate = TRUE,
                               dot.scale = 5,
                               flip_axes = TRUE,
                               scale = TRUE) + 
                scale_color_viridis() &
                theme(legend.position = c(-0.12,-0.25),
                      legend.direction = 'horizontal',
                      legend.box = 'horizontal',
                      legend.key.size = unit(0.6,'lines'),
                      text = element_text(size = 6),
                      axis.text = element_text(size = 8),
                      plot.margin = margin(0,0,40,0))
celltypeDot

ggsave(paste0(figdir,'celltypes_dotplot.svg'),celltypeDot,dpi = 600,width = 11,height = 12.9,units = 'cm')
celltypeFeature = FeaturePlot(sc.IC,
                              rev(CellTypeMarkers),ncol = 2,
                              pt.size = 0.1) & 
                    NoAxes() & 
                    scale_color_viridis() & 
                    theme(legend.position = 'right', 
                          legend.key.size = unit(0.3,'lines'),
                          legend.text = element_text(size = 4),
                          plot.title = element_text(size = 8))
celltypeFeature

ggsave(paste0(figdir,'celltypes_feature.svg'),celltypeFeature,dpi = 600,width = 7,height = 12.9,units = 'cm')

Subcluster analysis

Macrophages Fig X.

genes_mac = list(
  genes_general = c('Ly6c2', 'Ccr2', 'Top2a'),
  genes_arg1 = c('Arg1','Ecm1','C1qb'),
  genes_nos2 = c('Nos2','Gpnmb','Arg1'),
  genes_cxcl10 = c('Cxcl10','Cxcl9','Il1b'),
  genes_saa3 = c('Saa3','Plac8','Gbp2'),
  genes_ifit1 = c('Ifit1','Ifit2','Ifit3')
)

#give the order of the cluster levels 
levels_mac = c("Arg1+","Nos2+","Cxcl10+","Saa3+","Ifi2+")

#set the cluster levels to identiy of the macrophage object
Idents(mac.combined) = mac.combined$macTypes
levels(mac.combined) = levels_mac
mac.combined$macTypes = Idents(mac.combined)

#plot the umaps split by sample
mac.cellStateUmap = DimPlot_scCustom(mac.combined,group.by = 'macTypes',label = TRUE,split.by = 'sample')

#style the umap to have tiny arrows instead of axes 
boundaries_mac = list(x = c(-7,5),y = c(-5,5))
arr <- list(x = boundaries_mac$x[1], y = boundaries_mac$y[1], x_len = 1, y_len = 2,size = 1)

mac.cellStateUmap[[1]] = mac.cellStateUmap[[1]] +
                        theme_umap + 
                        xlim(boundaries_mac$x) + 
                        ylim(boundaries_mac$y) + 
                          annotate("segment", 
                             x = arr$x, xend = arr$x + c(arr$x_len, 0), 
                             y = arr$y, yend = arr$y + c(0, arr$y_len),
                             size = arr$size,
                             arrow = arrow(type = "closed", length = unit(10, 'pt'))) 
mac.cellStateUmap[[2]] = mac.cellStateUmap[[2]] +
                        theme_umap + 
                        xlim(boundaries_mac$x) + 
                        ylim(boundaries_mac$y) + 
                          annotate("segment", 
                             x = arr$x, xend = arr$x + c(arr$x_len, 0), 
                             y = arr$y, yend = arr$y + c(0, arr$y_len), 
                             size = arr$size,
                             arrow = arrow(type = "closed", length = unit(10, 'pt'))) 

#switch order of levels to have the plot read from top to bottom 
levels(mac.combined) = rev(levels_mac)

mac.cellStateDot = wrap_plots(
                    lapply(genes_mac,function(x){
                       DotPlot_scCustom(mac.combined,
                                        features = x,
                                        x_lab_rotate = 90,
                                        y_lab_rotate = 90,
                                        colors_use = viridis_light_high) + 
                          theme(legend.position = 'right',
                                legend.box = 'vertical',
                                legend.key.size = unit(0.75,'line'),
                                legend.text = element_text(size = unit(8,'pt')),
                                legend.title = element_blank())     
                    }),ncol = length(genes_mac))

mac.ptable = pTable(mac.combined,'macTypes','sample')

mac.abundancePlot = mac.ptable$freq %>% ggplot(aes(x = Freq,
                            y = sample, 
                            fill = macTypes)) + 
                  geom_bar(stat="identity") + 
                  theme(axis.text.x = element_blank(),
                        axis.ticks = element_blank(),
                        axis.title = element_blank(),
                        panel.background = element_blank()) +  
                  geom_text(aes(label = paste0(trunc(Freq),'%')), 
                            position = position_stack(vjust = 0.5),
                            size=2,
                            angle = 90) +
                  scale_fill_manual(values = palette36.colors(n=8) %>% unname()) 

fig_mac_sub = 
mac.cellStateDot/mac.cellStateUmap/mac.abundancePlot+plot_layout(design = 'A
                                                                           B
                                                                           B
                                                                           C')
ggsave(paste0(figdir,'fig_mac_sub.svg'),plot = fig_mac_sub,dpi = 600,width = 50,height = 25,units = 'cm')
ggsave(paste0(figdir,'fig_mac_sub.svg'),plot = fig_mac_sub,dpi = 600,width = 50,height = 25,units = 'cm')

fig_mac_sub

Microglia Fig X.

genes_MG = list(
  genes_hMG = c('Gpr34','Siglech','P2ry12','Tmem119'),
  genes_daMG2 = c('Ctsb','Apoe','B2m','Cst7','Cd74','Ly86'),
  genes_daMG3 = c('Cxcl10','Tnf','Ccl4','Il1a','B2m','Ly86'),
  genes_repMG = c('Top2a','Mki67'))

#give the order of the cluster levels 
levels_mg = c('hMG','hMG/daMG','daMG2','daMG3','replicating MG')

#set the cluster levels to identiy of the macrophage object
Idents(mg.combined) = mg.combined$mgTypes
levels(mg.combined) = levels_mg
mg.combined$mgTypes = Idents(mg.combined)


#generate umap of mg subtypes
mg.cellStateUmap = DimPlot_scCustom(mg.combined,label = TRUE,split.by = 'sample') 

#style the umaps 
boundaries_mg = list(x = c(-7.5,5),y = c(-4.5,6))
arr <- list(x = boundaries_mg$x[1], y = boundaries_mg$y[1], x_len = 1, y_len = 2,size = 1)

mg.cellStateUmap[[1]] = mg.cellStateUmap[[1]] +
                        theme_umap + 
                        xlim(boundaries_mg$x) + 
                        ylim(boundaries_mg$y) + 
                          annotate("segment", 
                             x = arr$x, xend = arr$x + c(arr$x_len, 0), 
                             y = arr$y, yend = arr$y + c(0, arr$y_len), 
                             size = arr$size,
                             arrow = arrow(type = "closed", length = unit(10, 'pt'))) 
mg.cellStateUmap[[2]] = mg.cellStateUmap[[2]] +
                        theme_umap + 
                        xlim(boundaries_mg$x) + 
                        ylim(boundaries_mg$y) + 
                          annotate("segment", 
                             x = arr$x, xend = arr$x + c(arr$x_len, 0), 
                             y = arr$y, yend = arr$y + c(0, arr$y_len), 
                             size = arr$size,
                             arrow = arrow(type = "closed", length = unit(10, 'pt'))) 

#reverse cluster levels for dotplot to read from top to bottom 
levels(mg.combined) = rev(levels_mg)

mg.cellStateDot =  wrap_plots(
                    lapply(genes_MG,function(x){
                       DotPlot_scCustom(mg.combined,
                                        features = x,
                                        x_lab_rotate = 90,
                                        y_lab_rotate = 90,
                                        colors_use = viridis_light_high) + 
                          theme(legend.position = 'right',
                                          legend.box = 'vertical',
                                          legend.key.size = unit(0.75,'line'),
                                          legend.text = element_text(size = unit(8,'pt')),
                                          legend.title = element_blank())
                      }), ncol = length(genes_MG))

mg.cellStateDot = mg.cellStateDot + theme(legend.position = 'right',
                                          legend.box = 'vertical',
                                          legend.key.size = unit(0.75,'line'),
                                          legend.text = element_text(size = unit(8,'pt')),
                                          legend.title = element_blank())

mg.ptable = pTable(mg.combined,'mgTypes','sample')

mg.abundancePlot = mg.ptable$freq %>% ggplot(aes(x = Freq,
                            y = sample, 
                            fill = mgTypes)) + 
                  geom_bar(stat="identity") + 
                  theme(axis.text.x = element_blank(),
                        axis.ticks = element_blank(),
                        axis.title = element_blank(),
                        panel.background = element_blank()) +  
                  geom_text(aes(label = paste0(trunc(Freq),'%')), 
                            position = position_stack(vjust = 0.5),
                            size=2,
                            angle = 90) +
                  scale_fill_manual(values = palette36.colors(n=8) %>% unname()) 
fig_mg_sub = 
  mg.cellStateDot/
  mg.cellStateUmap/
  mg.abundancePlot + 
  plot_layout(design = 'A
                        B
                        B
                        C')
ggsave(paste0(figdir,'fig_mg_sub.svg'),plot = fig_mg_sub,dpi = 600,width = 50,height = 25,units = 'cm')
ggsave(paste0(figdir,'fig_mg_sub.svg'),plot = fig_mg_sub,dpi = 600,width = 50,height = 25,units = 'cm')

fig_mg_sub