-
Notifications
You must be signed in to change notification settings - Fork 0
/
nextflow_schema.json
214 lines (214 loc) · 9.13 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
"title": ". pipeline parameters",
"description": "",
"type": "object",
"defs": {
"input_output_options": {
"title": "input_output_options",
"type": "object",
"description": "Define where the pipeline should find input data and save output data.",
"default": "",
"properties": {
"input": {
"type": "string",
"default": "/nfs/production/rdf/metagenomics/users/vangelis/plp_flatfiles_pgsql_2/sequence_explorer_protein.csv.bz2",
"description": "The pipeline main input data. Either sequence_explorer_protein from MGnify proteins, or a fasta file with fasta_input_mode set to true."
},
"fasta_input_mode": {
"type": "boolean",
"default": false,
"description": "If this flag is set, the pipeline starts from a fasta-file, without executing the preprocessing steps required for the sequence_explorer_protein file from MGnify proteins."
},
"esm_conda_path": {
"type": "string",
"default": "/hps/nobackup/rdf/metagenomics/service-team/users/vangelis/miniconda3/envs/esmfold_gpu",
"description": "predict_structures ESMfold conda env"
},
"db_config_file": {
"type": "string",
"default": "/nfs/production/rdf/metagenomics/users/vangelis/mgnifams/assets/db_config.ini",
"description": "secrets for sqlite db"
},
"db_schema_file": {
"type": "string",
"default": "/hps/nobackup/rdf/metagenomics/service-team/users/vangelis/mgnifams/assets/db_schema.sqlite",
"description": "schema for sqlite db"
},
"outdir": {
"type": "string",
"default": "/nfs/production/rdf/metagenomics/users/vangelis/mgnifams/data/output"
}
},
"required": [
"input",
"esm_conda_path",
"outdir"
]
},
"module_parameters": {
"title": "module_parameters",
"type": "object",
"description": "Input values for modules",
"default": "",
"properties": {
"compress_mode": {
"type": "string",
"default": "none",
"description": "compression mode of starting CSV input"
},
"min_sequence_length": {
"type": "integer",
"default": 100,
"description": "Sequence (and slice) length threshold to continue to clustering"
},
"linclust_seq_identity": {
"type": "number",
"default": 0.5,
"description": "mmseqs/linclust parameter for minimum sequence identity"
},
"linclust_coverage": {
"type": "number",
"default": 0.9,
"description": "mmseqs/linclust parameter for minimum sequence coverage ratio"
},
"linclust_cov_mode": {
"type": "integer",
"default": 0,
"description": "mmseqs/linclust parameter for coverage mode: 0 for both, 1 for target and 2 for query sequence"
},
"input_csv_chunk_size": {
"type": "integer",
"default": 50000000,
"description": "Chunk size for parallel slicing and filtering of sequence space"
},
"minimum_members": {
"type": "integer",
"default": 50,
"description": "Minimum number of members a cluster is allowed to have to continue with family generation"
},
"num_cluster_chunks": {
"type": "integer",
"description": "Number of chunks the linclust clustering file will be divided into for parallel family generation"
},
"deeptmhmm_chunk_size": {
"type": "integer",
"default": 100,
"description": "Chunk size for parallel tm prediction"
},
"tm_fraction_threshold": {
"type": "number",
"description": "Predicted transmembrane fraction of total length allowed for families"
},
"redundant_threshold": {
"type": "number",
"default": 0.95,
"description": "AA Jaccard Index threshold to deem two families similar, hence removing one as redundant"
},
"similarity_threshold": {
"type": "number",
"default": 0.5,
"description": "Family Jaccard Index threshold to deem two families similar, requiring further investigation at AA level"
},
"db_name": {
"type": "string",
"default": "pfam",
"description": "db used in hhblits/hhsearch"
},
"hh_mode": {
"type": "string",
"default": "hhblits",
"description": "hhblits (fast) or hhsearch (sensitive)"
},
"pdb_chunk_size": {
"type": "integer",
"default": 50,
"description": "Chunk size for parallel structure prediction"
},
"pdb_chunk_size_long": {
"type": "integer",
"default": 10,
"description": "Chunk size for parallel structure prediction for long sequences"
},
"compute_mode": {
"type": "string",
"default": "gpu",
"description": "GPU or CPU mode, depending on sequence length"
}
}
},
"reference_databases": {
"title": "reference_databases",
"type": "object",
"description": "Paths to downloaded databases used by the pipeline.",
"default": "",
"properties": {
"hhdb_folder_path": {
"type": "string",
"description": "annotate_models",
"default": "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/hh/pfamA_35.0"
},
"foldseek_db_path": {
"type": "string",
"description": "annotate_structures",
"default": "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/foldseek/8-ef4e960"
}
},
"required": [
"hhdb_folder_path",
"foldseek_db_path"
]
},
"generic_options": {
"title": "generic_options",
"type": "object",
"description": "General",
"default": "",
"properties": {
"help": {
"type": "boolean",
"hidden": true,
"description": "Display help text."
},
"publish_dir_mode": {
"type": "string",
"default": "copy",
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/defs/input_output_options"
},
{
"$ref": "#/defs/module_parameters"
},
{
"$ref": "#/defs/reference_databases"
},
{
"$ref": "#/defs/generic_options"
}
]
}