forked from lconde-ucl/merge_fastq
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.nf
155 lines (125 loc) · 4.75 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env nextflow
/*
========================================================================================
nf-core/merge_fastq
========================================================================================
nf-core/merge_fastq Analysis Pipeline.
#### Homepage / Documentation
https://github.com/nf-core/merge_fastq
----------------------------------------------------------------------------------------
*/
def helpMessage() {
log.info"""
=======================================================
,--./,-.
___ __ __ __ ___ /,-._.--~\'
|\\ | |__ __ / ` / \\ |__) |__ } {
| \\| | \\__, \\__/ | \\ |___ \\`-._,-`-,
`._,._,\'
nf-core/merge_fastq v${workflow.manifest.version}
=======================================================
Usage:
The typical command for running the pipeline is as follows:
nextflow run nf-core/merge_fastq --inputdir fastq_files --outputdir merged_fastq_files
Optional arguments:
--inputdir Path to input data [fastq_files] - multiple directories separated by commas
--outdir The output directory where the results will be saved [merged_fastq_files]
--toremove Optional suffix to remove from input sample names, e.g. sample_toremove_S1_L001_R1_001.fastq.gz
--suffix Optional suffix for output sample names, e.g. sample_suffix_R[1,2].fastq.gz
--readlen Optional maximum read length for hard trimming with fastp
""".stripIndent()
}
// Show help emssage
params.help = false
if (params.help){
helpMessage()
exit 0
}
// Defines reads and outputdir
params.inputdir = "fastq_files"
params.outdir = 'merged_fastq_files'
params.suffix = ''
params.toremove = ''
params.readlen = -1
// Header
println "========================================================"
println " M E R G E _ F A S T Q P I P E L I N E "
println "========================================================"
println "['Pipeline Name'] = ameynert/merge_fastq"
println "['Pipeline Version'] = workflow.manifest.version"
println "['Inputdir'] = $params.inputdir"
println "['Output dir'] = $params.outdir"
println "['Working dir'] = workflow.workDir"
println "['Container Engine'] = workflow.containerEngine"
println "['Current home'] = $HOME"
println "['Current user'] = $USER"
println "['Current path'] = $PWD"
println "['Working dir'] = workflow.workDir"
println "['Script dir'] = workflow.projectDir"
println "['Config Profile'] = workflow.profile"
println "========================================================"
// Separate input directories by comma on the command line, convert to file objects and pass
// to merge_fastq
input_dir_strings = params.inputdir.split(',')
input_dir_files = file(input_dir_strings[0])
for (i = 1; i < input_dir_strings.size(); i = i + 1) {
input_dir_files += ',' + file(input_dir_strings[i])
}
// Identify groups of FastQ files
process identify_groups {
output:
stdout into group_output_ch
script:
"""
identify_fastq_files_to_merge.py ${input_dir_files} ${params.toremove}
"""
}
// Split the output by lines
group_output_ch
.splitCsv()
.map { row -> tuple(row[0], row[1], row[2]) }
.set { group_ch }
// Merge FastQ files
process merge_fastq {
if (params.readlen < 0) {
publishDir params.outdir, mode: 'move'
} else {
publishDir params.outdir, mode: 'copy',
saveAs: { filename ->
if (filename.indexOf("merge.log") > 0) filename
else null
}
}
input:
tuple val(sample_name), val(read_end), val(files) from group_ch
output:
file("*.log")
tuple val(sample_name), file("*.gz") into reads_output_ch
script:
"""
merge_and_rename_NGI_fastq_files.py ${files} ${sample_name} ${read_end} ./ ${params.suffix} > ${sample_name}.merge.log
"""
}
reads_output_ch
.groupTuple()
.set { reads_ch }
if (params.readlen > 0) {
// Trim FASTQ files
process trim_reads {
publishDir params.outdir, mode: 'move'
input:
set val(sample_name), file(reads) from reads_ch
output:
file(reads)
script:
"""
fastp -b ${params.readlen} -i ${reads[0]} -I ${reads[1]} -o tmp.${reads[0]} -O tmp.${reads[1]}
mv tmp.${reads[0]} ${reads[0]}
mv tmp.${reads[1]} ${reads[1]}
"""
}
}
workflow.onComplete {
println ( workflow.success ? "Merging done! transferring merged files and wrapping up..." : "Oops .. something went wrong" )
log.info "[nf-core/test] Pipeline Complete"
}