-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_2_tSNE.R
143 lines (91 loc) · 3.91 KB
/
cluster_2_tSNE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Author: Subinoy Biswas
## Load Seurat library
library(Seurat)
## Load other libraries needed fr Seurat
library(dplyr)
library(Matrix)
library(ggpubr)
library(gplots)
library(RColorBrewer)
library(tibble)
# PBLs_TILs_HD_filt
cluster_2 <- SubsetData(object = PBLs_TILs_HD_filt , ident.use = 2, do.clean = TRUE, do.scale = T)
table(cluster_2@ident)
cluster_2_df <- as.data.frame(as.matrix(cluster_2@data))
#write.csv(cluster_2_df, file="lung_cluster_2.csv")
cluster_2_df
head([email protected])
is.numeric(cluster_2_df)
str(cluster_2_df)
colnames(cluster_2_df)
dim(cluster_2_df)
filt_clust_2_df <- cluster_2_df %>%
rownames_to_column(., var="genes") %>%
mutate(gene_sum=rowSums(.)) %>%
column_to_rownames(., var= "genes") %>%
filter(gene_sum >0)
write.csv(filt_clust_0_df, file="cluster_2_all_genes_expressed.csv")
dim(filt_clust_2_df)
cluster_2 <- NormalizeData(cluster_2)
cluster_2 <- FindVariableGenes(cluster_2, num.bin=10)
length([email protected])
#[1] 206
str([email protected])
slotNames(cluster_2)
# Select variable genes from [email protected]
selected.var.genes <- [email protected][[email protected],]
head(selected.var.genes)
# Order variable gene data by scaled gene dispersion
selected.var.genes <- selected.var.genes[order(selected.var.genes$gene.dispersion.scaled, decreasing = T),]
selected.var.genes
dim(selected.var.genes)
write.csv(selected.var.genes, file="cluster_2_variable_genes.csv")
top_200 <- selected.var.genes %>%
rownames_to_column(., var= "row_name") %>%
top_n(500) %>%
column_to_rownames(., var="row_name")
head(top_200)
#cluster_2.var.genes <- rownames(top_200)
cluster_2.var.genes <- rownames([email protected])[1:206]
cluster_2.var.genes
# mitochondria genes conveniently start with MT
cluster_2_mito.genes <- grep(pattern = "^MT-", x = rownames(x = cluster_2@data), value = TRUE)
length(cluster_2_mito.genes)
cluster_2_percent.mito <- Matrix::colSums([email protected][mito.genes, ]) / Matrix::colSums([email protected])
str(cluster_2_percent.mito)
summary(cluster_2_percent.mito)
# check out the meta data
head([email protected])
dim([email protected])
# add some more meta data
cluster_2 <- AddMetaData(object = cluster_2,
metadata = percent.mito,
col.name = "cluster_2_percent.mito")
cluster_2=ScaleData(cluster_2,genes.use=cluster_2.var.genes, vars.to.regress = c("nGene","cluster_2_percent.mito"))
cluster_2 <- RunPCA(cluster_2,[email protected])
PCElbowPlot(cluster_2)
PCHeatmap(cluster_2, pc.use=1:10,cells.use=500,do.balanced=T)
PCAPlot(cluster_2, dim.1=1, dim.2=2)
#Run TSNE
cluster_2 <- RunTSNE(cluster_2,reduction.use="pca",dims.use=1:15,do.fast=T)
TSNEPlot(cluster_2)
#Clustering
cluster_2 <- FindClusters(cluster_2,reduction.type="pca",dims.use=1:15,res=c(0.3,0.5, 0.7,1))
# cluster_2 <- FindClusters(cluster_2,reduction.type="pca",dims.use=1:15,
# res=c(0.3,0.5, 0.7,1), save.SNN = T, force.recalc = T)
TSNEPlot(cluster_2,do.label=T,group.by="res.0.5")
TSNEPlot(cluster_2,do.label=T,group.by="res.0.7")
TSNEPlot(cluster_2,do.label=T,group.by="res.1")
cluster_2.markers <- FindAllMarkers(object = cluster_2, only.pos = TRUE, min.pct = 0.25,
thresh.use = 0.25)
write.csv(cluster_2.markers, file="cluster_2.markers_lung_genes_April_8.csv")
cluster_2.markers_20 <-cluster_2.markers %>% group_by(cluster) %>% top_n(20, avg_logFC)
write.csv(cluster_2.markers_20, file="cluster_2.markers_20_lung_genes_April_8.csv")
cluster_2.markers_top10 <- cluster_2.markers %>% group_by(cluster) %>% top_n(10, avg_logFC)
# ****************
# setting slim.col.label to TRUE will print just the cluster IDS instead of
# every cell name
# *****************
DoHeatmap(object = cluster_2, genes.use = cluster_2.markers_top10$gene, slim.col.label = TRUE, remove.key = TRUE)
# Takes all the unique cell type specific genes
cluster_2_cell_type_genes <- unique(cluster_2.markers)