-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnextflow_schema.json
459 lines (459 loc) · 27 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "[email protected]:bwh-bioinformatics-hub/nextflow-RNAseq.git/main/nextflow_schema.json",
"title": "pipeline parameters",
"description": "Nextflow pipeline for RNAseq analysis",
"type": "object",
"definitions": {
"input_output_options": {
"title": "Input/output options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and write results.",
"required": [
"input",
"input_type",
"outdir"
],
"properties": {
"input": {
"type": "string",
"fa_icon": "fas fa-copy",
"description": "Path to directory containing FASTQ/BAM files or a CSV file containing the absolute path to FASTQ/BAM files.",
"help_text": "There are two ways to supply input data to nf-core/circrna:\n\n1. Provide the path to the directory containing FASTQ or BAM files, with the appropriate wildcard glob pattern *e.g:*\n```bash\n--input \"/data/*_r{1,2}.fastq.gz\"\n```\n2. Provide a CSV file containing the absolute paths to FASTQ or BAM files *e.g:*\n\n| Sample_ID | Read1 | Read2 | Bam |\n|------------- |------------------------------- |------------------------------- |----- |\n| control_rep1 | /data/control_rep1_r1.fastq.gz | /data/control_rep1_r2.fastq.gz | NA |\n| control_rep2 | /data/control_rep2_r1.fastq.gz | /data/control_rep2_r2.fastq.gz | NA |\n| control_rep3 | /data/control_rep3_r1.fastq.gz | /data/control_rep3_r2.fastq.gz | NA |\n| lung_rep1 | /data/lung_rep1_r1.fastq.gz | /data/lung_rep1_r2.fastq.gz | NA |\n| lung_rep2 | /data/lung_rep2_r1.fastq.gz | /data/lung_rep2_r2.fastq.gz | NA |\n| lung_rep3 | /data/lung_rep3_r1.fastq.gz | /data/lung_rep3_r2.fastq.gz | NA |\n| melanoma_rep1 | /data/melanoma_rep1_r1.fastq.gz | /data/melanoma_rep1_r2.fastq.gz | NA |\n| melanoma_rep2 | /data/melanoma_rep2_r1.fastq.gz | /data/melanoma_rep2_r2.fastq.gz | NA |\n| melanoma_rep3 | /data/melanoma_rep3_r1.fastq.gz | /data/melanoma_rep3_r2.fastq.gz | NA |\n\n When supplying BAM files to the CSV file, set Read1 & Read2 columns to 'NA'."
},
"input_type": {
"type": "string",
"fa_icon": "fas fa-dna",
"description": "Input data type, 'fastq' or 'bam'.",
"enum": [
"fastq",
"bam"
]
},
"outdir": {
"type": "string",
"description": "The output directory where the results will be saved.",
"default": "./results",
"fa_icon": "fas fa-folder-open"
},
"phenotype": {
"type": "string",
"description": "Phenotype CSV file specifying the experimental design for DESeq2.",
"fa_icon": "fas fa-journal-whills",
"help_text": "The response variable containing the phenotype of primary interest in the experiment must have the column name condition. An example phenotype file is given below:\n\n| Sample_ID | condition | replicates |\n|---------|-----------|------------|\n| control_rep1 | control | 1 |\n| control_rep2 | control | 2 |\n| control_rep3 | control | 3 |\n| lung_rep1 | lung | 1 |\n| lung_rep2 | lung | 2 |\n| lung_rep3 | lung | 3 |\n| melanoma_rep1 | melanoma | 1 |\n| melanoma_rep2 | melanoma | 2 |\n| melanoma_rep3 | melanoma | 3 |\n\nThis will produce the DESeq2 design formula '~ replicates + condition' i.e all columns not named condition will be controlled for in the linear mixed model.",
"pattern": "\\.csv$"
}
}
},
"reference_genome_files": {
"title": "Reference genome files",
"type": "object",
"fa_icon": "fas fa-dna",
"description": "Reference genome files supplied to the workflow.",
"help_text": "The workflow has been configured to use iGenomes and is thus the recommended route for nf-core/circrna.\n\nAlternatively, the user must provide reference FASTA, GTF files at a minimum, miRbase mature.fa file for miRNA prediction and a species ID for differential expression analysis. ",
"properties": {
"genome": {
"type": "string",
"fa_icon": "fas fa-clone",
"description": "iGenome version to use.",
"help_text": "Required if --fasta, --gtf set to null."
},
"fasta": {
"type": "string",
"format": "file-path",
"mimetype": "text/plain",
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
"description": "Path to FASTA genome file.",
"help_text": "Must be provided if --genome null"
},
"gtf": {
"type": "string",
"fa_icon": "fas fa-address-book",
"description": "Path to reference GTF file.",
"help_text": "If left empty, the parameter `--genome` must be supplied and the reference GTF file will be automatically downloaded.\n```bash\n--gtf \"/reference/GRCh38.gtf\"\n```\n\n*N.B:* The pipleine has been developed using reference files, UCSC/ENSEMBL files have not been tested.",
"pattern": "\\.gtf$"
},
"mature": {
"type": "string",
"description": "Path to FASTA file with mature miRNAs.",
"help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.",
"fa_icon": "fas fa-wheelchair"
},
"species": {
"type": "string",
"fa_icon": "fas fa-dog",
"description": "String identifying species.",
"help_text": "Check conf/igenomes.config file & bin/ensemblDatabase_map.txt for inspiration.. "
},
"bowtie": {
"type": "string",
"fa_icon": "fas fa-bold",
"description": "Path to Bowtie index files.",
"help_text": "Automatically generated if set to null. Alternatively, provide the absolute path to Bowtie indices directory e.g:\n/data/reference_genome/BowtieIndex"
},
"bowtie2": {
"type": "string",
"fa_icon": "fas fa-bold",
"description": "Path to Bowtie2 index files.",
"help_text": "Automatically generated if left empty. Alternatively, provide the absolute path to the Bowtie2 indices directory e.g:\n/data/reference_genome/Bowtie2Index"
},
"bwa": {
"type": "string",
"fa_icon": "fas fa-bold",
"description": "Path to BWA index directory.",
"help_text": "Automatically generated if left empty. Alternatively, provide the absolute path to BWA indices directory e.g:\n/data/reference_genome/BWAIndex\n"
},
"fasta_fai": {
"type": "string",
"description": "Path to SAMtools index file.",
"fa_icon": "fab fa-stripe-s"
},
"segemehl": {
"type": "string",
"fa_icon": "fab fa-stripe-s",
"description": "Path to Segemehl Index file",
"help_text": "Automatically generated if set null. Alternatively, provide path to Segemehl index file."
},
"star": {
"type": "string",
"fa_icon": "far fa-star",
"description": "Path to STAR index directory.",
"help_text": "Automatically generated if left empty. Alternatively, provide the absolute path to STAR indices directory e.g:\n/data/reference_genome/STARIndex"
},
"igenomes_base": {
"type": "string",
"description": "Directory / URL base for iGenomes references.",
"default": "s3://ngi-igenomes/igenomes",
"fa_icon": "fas fa-cloud-download-alt",
"hidden": false
},
"igenomes_ignore": {
"type": "boolean",
"description": "Do not load the iGenomes reference config.",
"fa_icon": "fas fa-ban",
"hidden": false,
"help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
}
}
},
"star_general": {
"title": "STAR",
"type": "object",
"description": "Define parameters for STAR 2 pass mode",
"help_text": "STAR 2 pass mode is performed to identify novel splice sites in *all* samples. STAR takes the novel splice sites into account when performig re-alignment during the second pass. STAR is used for CIRCexplorer2, circRNA_finder & DCC",
"properties": {
"alignIntronMax": {
"type": "integer",
"default": 1000000,
"description": "The maximum intron length is set to 1,000,000",
"fa_icon": "fas fa-sliders-h"
},
"alignIntronMin": {
"type": "integer",
"default": 20,
"description": "The minimum intron length is set to 20. If the genomic gap is smaller than this value, it is considered as a deletion",
"fa_icon": "fas fa-sliders-h"
},
"alignMatesGapMax": {
"type": "integer",
"default": 1000000,
"description": "The maximum genomic distance between mates is 1,000,000",
"fa_icon": "fas fa-sliders-h"
},
"alignSJDBoverhangMin": {
"type": "integer",
"default": 1,
"description": "The number of minimum overhang for annotated junctions",
"fa_icon": "fas fa-sliders-h"
},
"alignSJoverhangMin": {
"type": "integer",
"default": 1,
"description": "The number of minimum overhang for unannotated junctions",
"fa_icon": "fas fa-sliders-h"
},
"alignSoftClipAtReferenceEnds": {
"type": "string",
"default": "No",
"description": "Allow the soft-clipping of the alignments past the end of chromosomes",
"fa_icon": "fas fa-sliders-h"
},
"alignTranscriptsPerReadNmax": {
"type": "integer",
"default": 10000,
"description": "Max number of different alignments per read to consider",
"fa_icon": "fas fa-sliders-h"
},
"chimJunctionOverhangMin": {
"type": "integer",
"default": 15,
"description": "Minimum overhang for a chimeric junction",
"fa_icon": "fas fa-sliders-h"
},
"chimScoreMin": {
"type": "integer",
"default": 15,
"description": "Minimum total (summed) score of the chimeric segments",
"fa_icon": "fas fa-sliders-h"
},
"chimScoreSeparation": {
"type": "integer",
"default": 15,
"description": "Minimum difference (separation) between the best chimeric score and the next one",
"fa_icon": "fas fa-sliders-h"
},
"chimSegmentMin": {
"type": "integer",
"default": 10,
"description": " Minimum length of chimeric segment length, if == 0, no chimeric output",
"fa_icon": "fas fa-sliders-h",
"help_text": "Do not set to 0, this will disable outputs compatible with circRNA quantification."
},
"genomeLoad": {
"type": "string",
"default": "NoSharedMemory",
"description": "Mode of shared memory usage for the genome files",
"fa_icon": "fas fa-sliders-h",
"help_text": "Users can select a variety of options depending on their resource configuration:\n\n1. `LoadAndKeep`: load genome into shared and keep it in memory after run\n2. `LoadAndRemove`: load genome into shared but remove it after run\n3. `LoadAndExit`: load genome into shared memory and exit, keeping the genome in memory for future runs\n4. `Remove`: do not map anything, just remove loaded genome from memory\n5. `NoSharedMemory`: do not use shared memory, each job will have its own private copy of the genome",
"enum": [
"LoadAndKeep",
"LoadAndRemove",
"LoadAndExit",
"Remove",
"NoSharedMemory"
]
},
"limitSjdbInsertNsj": {
"type": "integer",
"default": 1000000,
"description": "Maximum number of junction to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run",
"fa_icon": "fas fa-sliders-h"
},
"outFilterMatchNminOverLread": {
"type": "number",
"default": 0.33,
"description": "Alignment output if ratio of matched bases relative to read length is equal to or higher than this value",
"fa_icon": "fas fa-sliders-h",
"help_text": "Consider 75bp paired end reads with sum matched bases of 120bp. Matched bp is summmed over combined read length (120/150 = 0.8). Simply put, lowering this ratio reduces the number of required matches in reads.\n\n`outFilterMatchNminOverLread` is preferred over `outFilterMatchNmin` as it considers read length, suitable for experiments with varying read length."
},
"outFilterMismatchNoverLmax": {
"type": "number",
"default": 0.05,
"description": "Alignment output if ratio of mismatched bases relative to **mapped** read length is lower than value",
"fa_icon": "fas fa-sliders-h",
"help_text": "For example, for reads <20b no mismatches are allowed (1/19 = 0.0526), 20-39b: 1 mismatch, 40-59b 2 mismatches and so on. Simply put, increasing this value will allow for more mismatches in the mapped reads.\n\n`outFilterMismatchNoverLmax` is preferred over `outFilterMismatchNmax` as it considers the mapped read length, suitable for experiments with varying read length."
},
"outFilterMultimapNmax": {
"type": "integer",
"default": 20,
"description": "Max number of multiple alignments allowed for a read: if exceeded, the read is considered unmapped",
"fa_icon": "fas fa-sliders-h"
},
"outFilterMultimapScoreRange": {
"type": "integer",
"default": 1,
"description": " Score range below the maximum score for multimapping alignments",
"fa_icon": "fas fa-sliders-h"
},
"outFilterScoreMinOverLread": {
"type": "number",
"default": 0.33,
"description": "Alignment will be output only if its score relative to read length is higher than or equal to this value",
"fa_icon": "fas fa-sliders-h"
},
"outSJfilterOverhangMin": {
"type": "string",
"default": "15 15 15 15",
"description": "Minimum overhang length for novel splice junctions",
"fa_icon": "fas fa-sliders-h",
"help_text": "4 integers: minimum overhang length for splice junctions on both sides for:\n1. non-canonical motifs\n2. GT/AG and CT/AC motif\n3. GC/AG and CT/GC motif\n4. AT/AC and GT/AT motif\n\n-1 means no output for that motif"
},
"sjdbOverhang": {
"type": "integer",
"default": 100,
"description": "Option to specify the length of the donor/acceptor sequence on each side of the junctions used in constructing the splice junctions database",
"fa_icon": "fas fa-sliders-h",
"help_text": "By default the option is set to 100. However, we recommend setting a value depending on the read length: read/mate length - 1"
},
"sjdbScore": {
"type": "integer",
"default": 2,
"description": "Alignment score for alignmets that cross database junctions",
"fa_icon": "fas fa-sliders-h"
},
"winAnchorMultimapNmax": {
"type": "integer",
"default": 999,
"description": "The maximum number of loci anchors that are allowed to map. By default, the pipeline uses a large number 999 to switch this filter off.",
"fa_icon": "fas fa-sliders-h"
}
},
"fa_icon": "fas fa-star"
},
"generic_options": {
"title": "Generic options",
"type": "object",
"fa_icon": "fas fa-file-import",
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"help": {
"type": "boolean",
"description": "Display help text.",
"hidden": true,
"fa_icon": "fas fa-question-circle"
},
"publish_dir_mode": {
"type": "string",
"default": "copy",
"hidden": true,
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
]
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
},
"name": {
"type": "string",
"description": "Workflow name.",
"fa_icon": "fas fa-fingerprint",
"hidden": true,
"help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles."
},
"email": {
"type": "string",
"description": "Email address for completion summary.",
"fa_icon": "fas fa-envelope",
"hidden": true,
"help_text": "An email address to send a summary email to when the pipeline is completed.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"email_on_fail": {
"type": "string",
"description": "Email address for completion summary, only when pipeline fails.",
"fa_icon": "fas fa-exclamation-triangle",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
"hidden": true,
"help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful."
},
"plaintext_email": {
"type": "boolean",
"description": "Send plain-text email instead of HTML.",
"fa_icon": "fas fa-remove-format",
"hidden": true,
"help_text": "Set to receive plain-text e-mails instead of HTML formatted."
},
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true,
"help_text": "If file generated by pipeline exceeds the threshold, it will not be attached."
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
"fa_icon": "fas fa-palette",
"hidden": true,
"help_text": "Set to disable colourful command line output and live life in monochrome."
},
"multiqc_config": {
"type": "string",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"tracedir": {
"type": "string",
"description": "Directory to keep pipeline Nextflow logs and reports.",
"default": "${params.outdir}/pipeline_info",
"fa_icon": "fas fa-cogs",
"hidden": true
},
"show_hidden_params": {
"type": "boolean",
"fa_icon": "far fa-eye-slash",
"description": "Show all params when using `--help`",
"hidden": true,
"help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
}
}
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
"fa_icon": "fab fa-acquisitions-incorporated",
"description": "Set the top limit for requested resources for any single job.",
"help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
"properties": {
"max_cpus": {
"type": "integer",
"description": "Maximum number of CPUs that can be requested for any single job.",
"default": 16,
"fa_icon": "fas fa-microchip",
"help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
},
"max_memory": {
"type": "string",
"description": "Maximum amount of memory that can be requested for any single job.",
"default": "128.GB",
"fa_icon": "fas fa-memory",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
},
"max_time": {
"type": "string",
"description": "Maximum amount of time that can be requested for any single job.",
"default": "240.h",
"fa_icon": "far fa-clock",
"pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$",
"help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input_output_options"
},
{
"$ref": "#/definitions/pipeline_options"
},
{
"$ref": "#/definitions/save_intermediates"
},
{
"$ref": "#/definitions/reference_genome_files"
},
{
"$ref": "#/definitions/read_trimming_and_adapter_removal"
},
{
"$ref": "#/definitions/star_general"
},
{
"$ref": "#/definitions/generic_options"
},
{
"$ref": "#/definitions/max_job_request_options"
},
{
"$ref": "#/definitions/institutional_config_options"
}
]
}