-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathS01_QCFilt.R
executable file
·87 lines (63 loc) · 3.38 KB
/
S01_QCFilt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
###### Dimensionality reduction and clustering of immune cells ##
## author: Antonietta Salerno
## date: 22/12/2022
library("Seurat") # Run with v4
library("ggplot2")
setwd("~/Library/CloudStorage/OneDrive-UNSW/TEPA_project")
#immune <- LoadSeuratRds("TEPA_results/S00_immune.Rds")
#### 1 - QC and filtering of immune cells ####
immune <- NormalizeData(immune)
# Add percent mito data in seurat object
immune[["percent.mt"]] <- PercentageFeatureSet(immune, pattern = "^mt.")
# Add percent ribo genes
immune[["percent.ribo"]] <- PercentageFeatureSet(immune, pattern = "^Rp.")
png("TEPA_plots/S01_preFilterQC_scatter.png", w = 6000, h = 2000, res = 300)
plot1 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "percent.mt", pt.size = 0.0005)
plot2 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "percent.ribo", pt.size = 0.0005)
plot3 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", pt.size = 0.0005)
plot1 + plot2 + plot3
dev.off()
# Visualize the distribution of mitochondrial and ribosomal gene expression detected per cell
png("TEPA_plots/S01_preFilterQC_mito.png", h = 3000, w = 4200, res = 300)
ggplot(aes(color=orig.ident, x=percent.mt, fill=orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
geom_vline(xintercept = 25)
dev.off()
png("TEPA_plots/S01_preFilterQC_ribo.png", h = 3000, w = 4200, res = 300)
ggplot(aes(color=orig.ident, x=percent.ribo, fill=orig.ident)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
geom_vline(xintercept = 30)
dev.off()
immune <- subset(immune, subset = nFeature_RNA > 200 & nFeature_RNA < 5500 &
percent.mt < 25 & percent.ribo < 20 & nCount_RNA < 3000) # 12127 cells left
png("TEPA_plots/S01_postFilterQC_violin.png", h = 3000, w = 4200, res = 300)
VlnPlot(immune, features = c("nFeature_RNA", "nCount_RNA", "Mycn"), ncol = 3, pt.size = 0.000005)
dev.off()
png("TEPA_plots/S01_postFilterQC_Mycn_Cd45.png", h = 3000, w = 4200, res = 300)
FeatureScatter(immune, feature1 = "Mycn", feature2 = "Ptprc", pt.size = 0.0005)
dev.off()
png("TEPA_plots/S01_postFilterQC_scatter.png", h = 3000, w = 4200, res = 300)
plot1 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "percent.mt", pt.size = 0.0005)
plot2 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "percent.ribo", pt.size = 0.0005)
plot3 <- FeatureScatter(immune, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", pt.size = 0.0005)
plot1 + plot2 + plot3 # We can spot two separate data clouds because of the bimodal distribution of the data: tumour vs immune cells
dev.off()
#SaveSeuratRds(immune,"TEPA_results/S01_immuneFilt.Rds")
#### 2 - Batch effect correction ####
#immune <- LoadSeuratRds("S01_immuneFilt.Rds")
samples.list <- SplitObject(immune, split.by = "condition")
# Normalize and identify variable features for each dataset independently (Treatment vs Control)
samples.list <- lapply(X = samples.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
features <- SelectIntegrationFeatures(object.list = samples.list)
immune.anchors <- FindIntegrationAnchors(object.list = samples.list,anchor.features = features)
immune.combined <- IntegrateData(anchorset = immune.anchors)
#SaveSeuratRds(immune.combined, "TEPA_results/S01_immuneInt.Rds")