-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharchR.Rmd
189 lines (154 loc) · 6.38 KB
/
archR.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
---
title: "archR"
author: "Irzam Sarfraz"
date: "`r Sys.Date()`"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Workflow
For healthy:
- Process scRNA-seq data (normalization + LSI + do clustering + find cell-types using markers + generate umap)
```{r}
library(Seurat)
library(SingleCellExperiment)
library(singleCellTK)
scRNA_healthy_1 <- readRDS("/projectnb/paxlab/isarfraz/Data/GSM4138876_scRNA_PBMC_D4T1.rds")
seurat_healthy_rna <- CreateSeuratObject(counts = scRNA_healthy_1)
seurat_healthy_rna <- FindVariableFeatures(seurat_healthy_rna)
seurat_healthy_rna <- NormalizeData(seurat_healthy_rna)
seurat_healthy_rna <- ScaleData(seurat_healthy_rna)
# seurat_healthy_rna <- RunSLSI(seurat_healthy_rna) # this wont work because specifically designed for atac-data
#using fixed optimizeLSI code
# which input matrix (using counts)
sce <- SingleCellExperiment(list(counts=scRNA_healthy_1))
healthy_rna_lsi <- optimizeLSI(inSCE = sce, mat = scRNA_healthy_1)
embeddings <- healthy_rna_lsi$iter3$lsiObj$matSVD
healthy_rna_lsi <- healthy_rna_lsi$iter3$matNorm # sure that this mat should be used?
assay(sce, "lsi") <- healthy_rna_lsi
sce <- singleCellTK::runUMAP(inSCE = sce, useAssay = "lsi", useReducedDim = NULL, reducedDimName = "UMAP")
singleCellTK::plotUMAP(sce, reducedDimName = "UMAP")
# do clusters
# create seurat object
pca <- CreateDimReducObject(embeddings = embeddings, key = "PC_")
seurat_healthy_rna <- CreateSeuratObject(counts = assay(sce, "counts"), assay = "RNA")
seurat_healthy_rna[["pca"]] <- pca
```
- Process scATAC-seq data (gene activity scores first + LSI + do clustering + integrate with RNA for cell-types + make UMAP)
- do we need a combined UMAP for RNA and atac?
For mpal:
- do all same as above
- but create projection for healthy umap (this is not in archR - https://github.com/GreenleafLab/ArchR/discussions/1147) - can we use seurat here? yes (https://satijalab.org/seurat/articles/integration_mapping.html) (issue comes with LSI)
```{r}
scRNA_mpal_1 <- readRDS("/projectnb/paxlab/isarfraz/Data/GSM4138878_scRNA_MPAL1_T1.rds")
sce <- SingleCellExperiment(list(counts=scRNA_mpal_1))
mpal_rna_lsi <- optimizeLSI(inSCE = sce, mat = scRNA_mpal_1)
embeddings <- mpal_rna_lsi$iter3$lsiObj$matSVD
mpal_rna_lsi <- mpal_rna_lsi$iter3$matNorm # sure that this mat should be used?
assay(sce, "lsi") <- mpal_rna_lsi
sce <- singleCellTK::runUMAP(inSCE = sce, useAssay = "lsi", useReducedDim = NULL, reducedDimName = "UMAP")
singleCellTK::plotUMAP(sce, reducedDimName = "UMAP")
pca <- CreateDimReducObject(embeddings = embeddings, key = "PC_")
seurat_mpal_rna <- CreateSeuratObject(counts = assay(sce, "counts"), assay = "RNA")
seurat_mpal_rna[["pca"]] <- pca
```
projection
```{r}
pancreas.anchors <- FindIntegrationAnchors(object.list = list(seurat_healthy_rna_1, seurat_mpal_rna), dims = 1:30)
pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30)
library(ggplot2)
library(cowplot)
library(patchwork)
# switch to integrated assay. The variable features of this assay are automatically set during
# IntegrateData
DefaultAssay(pancreas.integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE)
pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE)
pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = 1:30, verbose = FALSE)
tech <- c(rep("healthy", ncol(seurat_healthy_rna_1)), rep("mpal", ncol(seurat_mpal_rna)))
pancreas.integrated$tech <- tech
p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech")
p1
pancreas.query <- seurat_mpal_rna
pancreas.anchors <- FindTransferAnchors(reference = pancreas.integrated, query = pancreas.query,
dims = 1:30, reference.reduction = "pca")
predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.integrated$tech,
dims = 1:30)
pancreas.query <- AddMetaData(pancreas.query, metadata = predictions)
pancreas.integrated <- RunUMAP(pancreas.integrated, dims = 1:30, reduction = "pca", return.model = TRUE)
pancreas.query <- MapQuery(anchorset = pancreas.anchors, reference = pancreas.integrated, query = pancreas.query,
refdata = list(tech = "tech"), reference.reduction = "pca", reduction.model = "umap")
p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech", label = TRUE, label.size = 3,
repel = TRUE) + NoLegend() + ggtitle("Reference annotations")
p2 <- DimPlot(pancreas.query, reduction = "ref.umap", group.by = "predicted.tech", label = TRUE,
label.size = 3, repel = TRUE) + NoLegend() + ggtitle("Query transferred labels")
p1 + p2
```
For ameya archR:
healthy
```{r}
```
mpal
```{r}
setwd("/projectnb/paxlab/isarfraz/Greenleaf_ATAC_Fragments/healthy/")
inputFiles <- list.files("/projectnb/paxlab/isarfraz/Greenleaf_ATAC_Fragments/healthy/")
ArrowFiles <- createArrowFiles(
inputFiles = inputFiles,
sampleNames = inputFiles,
filterTSS = 4, #Dont set this too high because you can always increase later
filterFrags = 1000,
addTileMat = TRUE,
addGeneScoreMat = TRUE
)
doubScores <- addDoubletScores(
input = ArrowFiles,
k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search with doublet projection.
LSIMethod = 1
)
projHeme1 <- ArchRProject(
ArrowFiles = ArrowFiles,
outputDirectory = "HemeTutorial",
copyArrows = TRUE #This is recommened so that if you modify the Arrow files you have an original copy for later usage.
)
projHeme1 <- addIterativeLSI(
ArchRProj = projHeme1,
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = 1:30
)
projHeme1 <- addClusters(
input = projHeme1,
reducedDims = "IterativeLSI",
method = "Seurat",
name = "Clusters",
resolution = 0.8
)
projHeme1 <- addUMAP(
ArchRProj = projHeme1,
reducedDims = "IterativeLSI",
name = "UMAP",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine"
)
projHeme1 <- addReproduciblePeakSet(
ArchRProj = projHeme1,
groupBy = "Clusters",
peakMethod = "Tiles",
method = "p"
)
projHeme1 <- addPeak2GeneLinks(
ArchRProj = projHeme1,
reducedDims = "IterativeLSI"
)
```