-
Notifications
You must be signed in to change notification settings - Fork 4
/
seurat-run-pca.R
executable file
·205 lines (185 loc) · 6.09 KB
/
seurat-run-pca.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env Rscript
# Load optparse we need to check inputs
suppressPackageStartupMessages(require(optparse))
# Load common functions
suppressPackageStartupMessages(require(workflowscriptscommon))
# parse options
option_list = list(
make_option(
c("-i", "--input-object-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which a serialized R matrix object may be found."
),
make_option(
c("--input-format"),
action = "store",
default = "seurat",
type = 'character',
help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
),
make_option(
c("--output-format"),
action = "store",
default = "seurat",
type = 'character',
help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
),
make_option(
c("-e", "--pc-genes"),
action = "store",
default = NULL,
type = 'character',
help = "File with gene names to scale/center. Default is all genes in object@data."
),
make_option(
c("-c", "--pc-cells"),
action = "store",
default = NULL,
type = 'character',
help = "File with cell names to scale/center. Default is all cells in object@data."
),
make_option(
c("-p", "--pcs-compute"),
action = "store",
default = 50,
type = 'integer',
help = "Total Number of PCs to compute and store (50 by default)."
),
make_option(
c("-r", "--reverse-pca"),
action = "store_true",
default = FALSE,
type = 'logical',
help = "By default computes the PCA on the cell x gene matrix. Setting to true will compute it on gene x cell matrix."
),
make_option(
c("-o", "--output-object-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store serialized R object of type 'Seurat'.'"
),
make_option(
c("-b", "--output-embeddings-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store a csv-format embeddings table with PCs by cell."
),
make_option(
c("-l", "--output-loadings-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store a csv-format loadings table with PCs by gene."
),
make_option(
c("-s", "--output-stdev-file"),
action = "store",
default = NA,
type = 'character',
help = "File name in which to store PC stdev values (one per line)."
),
make_option(
c("--no-weight-by-var"),
action = "store_false",
default = TRUE,
metavar = "Do not weight by variance of each PC",
type = 'logical',
help = "Do not weight the cell embeddings by the variance of each PC (weights the gene loadings if rev.pca is TRUE)"
),
make_option(
c("--ndims-print"),
action = "store",
default = NULL,
metavar = "Num of dims. print",
type = 'integer',
help = "PCs to print genes for"
),
make_option(
c("--nfeatures-print"),
action = "store",
default = NULL,
metavar = "N features print",
type = 'integer',
help = "Number of genes to print for each PC"
),
make_option(
c("--reduction-key"),
action = "store",
default = "PC",
metavar = "Reduction key",
type = 'character',
help = "dimensional reduction key, specifies the string before the number for the dimension names. PC by default"
),
make_option(
c("--reduction-name"),
action = "store",
default = "pca",
metavar = "Reduction name",
type = 'character',
help = "dimensional reduction name, pca by default"
)
)
opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file', 'output_embeddings_file', 'output_loadings_file', 'output_stdev_file'))
# Check parameter values
if ( ! file.exists(opt$input_object_file)){
stop((paste('File', opt$input_object_file, 'does not exist')))
}
pc_genes <- NULL
if (! is.null(opt$pc_genes)){
if (! file.exists(opt$pc_genes)){
stop((paste('Supplied genes file', opt$pc_genes, 'does not exist')))
}else{
pc_genes <- readLines(opt$pc_genes)
}
}
pc_cells <- NULL
if (! is.null(opt$pc_cells)){
if (! file.exists(opt$pc_cells)){
stop((paste('Supplied cells file', opt$pc_cells, 'does not exist')))
}else{
pc_cells <- readLines(opt$pc_cells)
}
}
# Now we're hapy with the arguments, load Seurat and do the work
suppressPackageStartupMessages(require(Seurat))
if(opt$input_format == "loom" | opt$output_format == "loom") {
suppressPackageStartupMessages(require(SeuratDisk))
} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
suppressPackageStartupMessages(require(scater))
}
# Input from serialized R object
seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
features<-pc_genes
if(opt$reverse_pca) {
features<-pc_cells
}
pca_seurat_object <- RunPCA(seurat_object,
features = features,
npcs = opt$pcs_compute,
rev.pca = opt$reverse_pca,
weight.by.var = opt$no_weight_by_var,
ndims.print = opt$ndims_print,
nfeatures.print = opt$nfeatures_print,
reduction.key = opt$reduction_key,
reduction.name = opt$reduction_name,
verbose = FALSE)
# Output to text-format components
# Review question: Do we need to revert this for the reverse PCA case?
write.csv(pca_seurat_object[['pca']]@cell.embeddings, file = opt$output_embeddings_file)
write.csv(pca_seurat_object[['pca']]@feature.loadings, file = opt$output_loadings_file)
writeLines(con=opt$output_stdev_file, as.character(pca_seurat_object[['pca']]@stdev))
cat(c(
'# Object summary',
capture.output(print(seurat_object)),
'\n# Metadata sample',
capture.output(head([email protected]))
),
sep = '\n')
# Output to a serialized R object
write_seurat4_object(seurat_object = pca_seurat_object,
output_path = opt$output_object_file,
format = opt$output_format)