-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNextflowForge.py
308 lines (266 loc) · 10.7 KB
/
NextflowForge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import streamlit as st
# Helper functions to generate content for the Nextflow file
def generate_nextflow_file(
project_info, parameters, processes, environment, output_config, scheduler
):
"""
Generates the content of a Nextflow configuration file or script based on the collected inputs.
:param project_info: Dictionary containing project information (name, description, author).
:param parameters: List of dictionaries, each containing parameter details.
:param processes: List of dictionaries, each representing a Nextflow process.
:param environment: Dictionary containing environment setup details (Docker, Singularity, etc.).
:param output_config: Dictionary with output configuration settings.
:param scheduler: Dictionary with cluster or cloud scheduler settings.
:return: String representing the content of the Nextflow file.
"""
# Initialize the content with project information
content = f"// Nextflow Workflow - {project_info['name']}\n"
content += f"// Description: {project_info['description']}\n"
content += (
f"// Author: {project_info['author_name']} ({project_info['author_email']})\n\n"
)
# Include workflow parameters
if parameters:
content += "params {\n"
for param in parameters:
param_line = f" {param['name']} = "
if param["type"] == "String":
param_line += f"'{param['default']}'"
else:
param_line += f"{param['default']}"
content += param_line + f" // {param['description']}\n"
content += "}\n\n"
# Include environment setup
if environment["container"] == "Docker":
content += f"process.container = '{environment['docker_image']}'\n\n"
elif environment["container"] == "Conda" and environment["conda_file_name"]:
content += f"process.conda = '{environment['conda_file_name']}'\n\n"
# Output configuration
if output_config:
content += f"process.publishDir = '{output_config['output_dir']}'\n"
if output_config["generate_logs"]:
content += "process.debug = true\n"
if output_config["file_naming"]:
content += f"process.filePattern = '{output_config['file_naming']}'\n"
content += "\n"
# Define processes
for process in processes:
content += f"process {process['name']} {{\n"
content += " input:\n"
content += f" {process['input']}\n"
content += " output:\n"
content += f" {process['output']}\n"
content += " script:\n"
content += f" \"\"\"\n{process['command']}\n\"\"\"\n"
content += "}\n\n"
# Scheduler/Cluster settings
if scheduler["scheduler"] != "None":
content += "// Scheduler Settings\n"
content += f"process.executor = '{scheduler['scheduler']}'\n"
if scheduler["queue"]:
content += f"process.queue = '{scheduler['queue']}'\n"
content += "\n"
return content
def collect_parameters():
"""
Collect user-defined parameters for the Nextflow workflow.
:return: List of parameter dictionaries.
"""
parameters = []
param_name = st.text_input("Parameter Name")
param_type = st.selectbox(
"Parameter Type", ["String", "Integer", "Boolean", "Float"]
)
param_default = st.text_input("Default Value")
param_description = st.text_area("Parameter Description")
if st.button("Add Parameter"):
if param_name and param_default:
parameters.append(
{
"name": param_name,
"type": param_type,
"default": param_default,
"description": param_description,
}
)
st.success(f"Parameter '{param_name}' added!")
return parameters
def collect_processes():
"""
Collect process definitions for the Nextflow workflow.
:return: List of process dictionaries.
"""
processes = []
process_name = st.text_input("Process Name")
command = st.text_area("Command to Run")
input_files = st.text_area("Input Files Dependencies (e.g., file1 from input1)")
output_files = st.text_area("Output Files Declarations (e.g., file1 into output1)")
if st.button("Add Process"):
if process_name and command:
processes.append(
{
"name": process_name,
"command": command,
"input": input_files,
"output": output_files,
}
)
st.success(f"Process '{process_name}' added!")
return processes
# Streamlit App
st.title("Nextflow Workflow Generator")
# Step 1: Workflow Information
st.markdown("### Step 1: Workflow Information")
st.markdown(
"""
**What is this step?**
Here, you provide basic information about the workflow:
- **Project Name**: A unique name for the workflow.
- **Description**: A brief description of the analysis or tasks the workflow will perform.
- **Author**: Information about the creator of the workflow.
**Example**:
- Project Name: `RNA_Seq_Analysis`
- Description: `This workflow analyzes RNA-Seq data to identify differential gene expression between conditions.`
- Author: `Jane Doe`, `[email protected]`
"""
)
project_info = {
"name": st.text_input("Project Name"),
"description": st.text_area("Workflow Description"),
"author_name": st.text_input("Author Name"),
"author_email": st.text_input("Author Email"),
}
# Step 2: Input Files
st.markdown("### Step 2: Input Files")
st.markdown(
"""
**What is this step?**
Here, you upload the files that will be used in your analysis:
- **Primary Data Files**: These are the main datasets you'll be analyzing (e.g., FASTQ files for sequencing data).
- **Reference Files**: Files used as a reference (e.g., a genome reference in `.fa` format).
- **Config Files**: Additional configuration settings in a file.
**Example**:
- Primary Data File: `sample_data.fastq`
- Reference File: `human_genome.fa`
- Config File: `workflow_settings.config`
"""
)
uploaded_data_files = st.file_uploader(
"Upload Primary Data Files", accept_multiple_files=True
)
uploaded_reference_files = st.file_uploader(
"Upload Reference Files", accept_multiple_files=True
)
uploaded_config_files = st.file_uploader(
"Upload Additional Config Files", accept_multiple_files=True
)
# Step 3: Pipeline Parameters
st.markdown("### Step 3: Pipeline Parameters")
st.markdown(
"""
**What is this step?**
This is where you define parameters that the workflow will use. Parameters allow you to change how the workflow behaves:
- **Parameter Name**: A unique name for the parameter.
- **Parameter Type**: The type of value (String, Integer, Boolean, etc.).
- **Default Value**: The initial value, which you can modify later.
- **Description**: A short explanation of what the parameter does.
**Example**:
- Parameter Name: `read_length`
- Type: `Integer`
- Default Value: `150`
- Description: `The length of reads in the sequencing data.`
"""
)
parameters = collect_parameters()
# Step 4: Environment Setup
st.markdown("### Step 4: Environment Setup")
st.markdown(
"""
**What is this step?**
This step determines how the software and tools are managed in your workflow:
- **Docker/Singularity**: Choose a container image to ensure consistent execution.
- **Conda**: Upload a Conda environment file for software management without containers.
**Example**:
- Using Docker: `biocontainers/samtools:v1.9.0_cv4`
"""
)
containerization = st.selectbox(
"Container Option", ["None", "Docker", "Singularity", "Conda"]
)
environment = {"container": containerization}
if containerization == "Docker":
environment["docker_image"] = st.text_input("Docker Image Name")
elif containerization == "Conda":
conda_file = st.file_uploader("Upload Conda Environment YAML")
if conda_file:
environment["conda_file_name"] = conda_file.name
# Step 5: Output Configuration
st.markdown("### Step 5: Output Configuration")
st.markdown(
"""
**What is this step?**
This step configures the output of the workflow:
- **Output Directory**: Where the results should be saved.
- **Generate Debug Logs**: Whether to generate additional log files for debugging.
- **File Naming Pattern**: A pattern for naming the output files.
**Example**:
- Output Directory: `results/`
- Generate Debug Logs: `Yes`
- File Naming Pattern: `sample_{sample_id}.txt`
"""
)
output_config = {
"output_dir": st.text_input("Output Directory"),
"generate_logs": st.checkbox("Generate Debug Logs"),
"file_naming": st.text_input("Result File Naming Pattern (Optional)"),
}
# Step 6: Process Steps Definition
st.markdown("### Step 6: Process Steps Definition")
st.markdown(
"""
**What is this step?**
Define the tasks (processes) that make up your workflow:
- **Process Name**: The name of the task.
- **Command**: The specific command to be executed.
- **Input Files**: Files needed for the command.
- **Output Files**: Expected output files from the command.
**Example**:
- Process Name: `align_reads`
- Command: `bwa mem -t 8 ref.fa sample.fastq > aligned.bam`
- Input Files: `ref.fa, sample.fastq`
- Output Files: `aligned.bam`
"""
)
processes = collect_processes()
# Step 7: Additional Configurations
st.markdown("### Step 7: Additional Configurations")
st.markdown(
"""
**What is this step?**
Here, you can specify advanced configurations like using a cluster scheduler for running the workflow:
- **Scheduler**: Choose a scheduler like SLURM if running on a cluster.
- **Queue Name**: The name of the queue (if applicable).
**Example**:
- Scheduler: `SLURM`
- Queue Name: `bioinformatics_queue`
"""
)
scheduler = {
"scheduler": st.selectbox("Scheduler (Cluster)", ["None", "SLURM", "SGE"]),
"queue": st.text_input("Queue Name (If Applicable)"),
}
# Preview & Download
st.header("Preview & Download")
if st.button("Generate Nextflow File"):
nextflow_content = generate_nextflow_file(
project_info=project_info,
parameters=parameters,
processes=processes,
environment=environment,
output_config=output_config,
scheduler=scheduler,
)
st.text_area("Nextflow File Preview", value=nextflow_content, height=300)
st.download_button(
"Download Nextflow File", data=nextflow_content, file_name="workflow.nf"
)