diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
new file mode 100644
index 0000000..e01bb42
--- /dev/null
+++ b/assets/methods_description_template.yml
@@ -0,0 +1,35 @@
+id: "ebi-metagenomics/emg-viral-pipeline-methods-description"
+description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication."
+section_name: "ebi-metagenomics/emg-viral-pipeline Methods Description"
+section_href: "https://github.com/EBI-Metagenomics/emg-viral-pipeline"
+plot_type: "html"
+data: |
+
Methods
+ Data was processed using ebi-metagenomics/genomes-generation v${workflow.manifest.version} (${doi_text}; Krakau et al., 2022) of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (GrĂ¼ning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.
+ The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:
+ ${workflow.commandLine}
+ ${tool_citations}
+ References
+
+ -
+ Informative Regions In Viral Genomes
+ Viruses (2021)
+ doi: 10.3390/v13061164
+ Moreno-Gallego, Jaime Leonardo, and Alejandro Reyes
+
+ -
+ VIRify: an integrated detection, annotation and taxonomic classification pipeline using virus-specific protein profile hidden Markov models
+ bioRxiv
+ doi: 10.1101/2022.08.22.504484
+ Rangel-Pineros, Guillermo, et al.
+
+ ${tool_bibliography}
+
+
+
Notes:
+
+ ${nodoi_text}
+ - The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
+ - You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
+
+
diff --git a/assets/mgnify_logo.png b/assets/mgnify_logo.png
new file mode 100644
index 0000000..fe6112b
Binary files /dev/null and b/assets/mgnify_logo.png differ
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
new file mode 100644
index 0000000..986fed1
--- /dev/null
+++ b/assets/multiqc_config.yml
@@ -0,0 +1,61 @@
+report_comment: >
+
+ This report has been generated by the ebi-metagenomics/emg-viral-pipeline pipeline.
+
+report_section_order:
+ "ebi-metagenomics/emg-viral-pipeline-methods-description":
+ order: -1000
+ software_versions:
+ order: -1001
+ "ebi-metagenomics/emg-viral-pipeline-summary":
+ order: -1002
+
+export_plots: true
+
+data_format: "yaml"
+
+run_modules:
+ - fastqc
+ - fastp
+
+## Module order
+module_order:
+ - fastqc
+ - fastp
+
+## File name cleaning
+extra_fn_clean_exts:
+ - "_fastp"
+
+## Prettification
+custom_logo: "mgnify_logo.png"
+custom_logo_url: https://github.com/ebi-metagenomics/emg-viral-pipeline/
+custom_logo_title: "ebi-metagenomics/emg-viral-pipeline"
+
+## General Stats customisation
+table_columns_visible:
+ "fastp":
+ pct_duplication: False
+ after_filtering_q30_rate: False
+ after_filtering_q30_bases: False
+ filtering_result_passed_filter_reads: 3300
+ after_filtering_gc_content: False
+ pct_surviving: True
+ pct_adapter: True
+
+table_columns_placement:
+ "fastp":
+ pct_duplication: 3000
+ after_filtering_q30_rate: 3100
+ after_filtering_q30_bases: 3200
+ filtering_result_passed_filter_reads: 3300
+ after_filtering_gc_content: 3400
+ pct_surviving: 3500
+ pct_adapter: 3600
+
+custom_table_header_config:
+ general_stats_table:
+ "Total length":
+ hidden: True
+ N50:
+ hidden: True
diff --git a/configs/conda.config b/configs/conda.config
index 5afceea..a77e536 100644
--- a/configs/conda.config
+++ b/configs/conda.config
@@ -1,24 +1,20 @@
process {
- withLabel: annotation { conda = "$baseDir/envs/python3.yaml" }
- withLabel: assign { conda = "$baseDir/envs/python3.yaml" }
- withLabel: balloon { conda = "$baseDir/envs/balloon.yaml" }
- withLabel: basics { conda = "$baseDir/envs/python3.yaml" }
- withLabel: blast { conda = "$baseDir/envs/blast.yaml" }
- withLabel: fastp { conda = "$baseDir/envs/fastp.yaml" }
- withLabel: fastqc { conda = "$baseDir/envs/fastqc.yaml" }
- withLabel: hmmscan { conda = "$baseDir/envs/hmmer.yaml" }
- withLabel: kaiju { conda = "$baseDir/envs/kaiju.yaml" }
- withLabel: krona { conda = "$baseDir/envs/krona.yaml" }
- withLabel: plot_contig_map { conda = "$baseDir/envs/r.yaml" }
- withLabel: multiqc { conda = "$baseDir/envs/multiqc.yaml" }
- withLabel: parse { conda = "$baseDir/envs/python3.yaml" }
- withLabel: prodigal { conda = "$baseDir/envs/prodigal.yaml" }
- withLabel: phanotate { conda = "$baseDir/envs/phanotate.yaml" }
- withLabel: python3 { conda = "$baseDir/envs/python3.yaml" }
- withLabel: ratio_evalue { conda = "$baseDir/envs/python3.yaml" }
- withLabel: ruby { conda = "$baseDir/envs/ruby.yaml" }
- withLabel: spades { conda = "$baseDir/envs/spades.yaml" }
- withLabel: virsorter { conda = "$baseDir/envs/virsorter.yaml" }
- withLabel: virfinder { conda = "$baseDir/envs/virfinder.yaml" }
- withLabel: checkV { conda = "$baseDir/envs/checkv.yaml" }
+ withNAME: ANNOTATION { conda = "$baseDir/envs/python3.yaml" }
+ withNAME: ASSIGN { conda = "$baseDir/envs/python3.yaml" }
+ withNAME: BALLOON { conda = "$baseDir/envs/balloon.yaml" }
+ withNAME: basics { conda = "$baseDir/envs/python3.yaml" }
+ withNAME: BLAST { conda = "$baseDir/envs/blast.yaml" }
+ withNAME: HMMSCAN { conda = "$baseDir/envs/hmmer.yaml" }
+ withNAME: KAIJU { conda = "$baseDir/envs/kaiju.yaml" }
+ withNAME: KRONA { conda = "$baseDir/envs/krona.yaml" }
+ withNAME: PLOT_CONTIG_MAP { conda = "$baseDir/envs/r.yaml" }
+ withNAME: PARSE { conda = "$baseDir/envs/python3.yaml" }
+ withNAME: PRODIGAL { conda = "$baseDir/envs/prodigal.yaml" }
+ withNAME: PHANOTATE { conda = "$baseDir/envs/phanotate.yaml" }
+ withNAME: python3 { conda = "$baseDir/envs/python3.yaml" }
+ withNAME: RATIO_EVALUE { conda = "$baseDir/envs/python3.yaml" }
+ withNAME: ruby { conda = "$baseDir/envs/ruby.yaml" }
+ withNAME: VIRSORTER { conda = "$baseDir/envs/virsorter.yaml" }
+ withNAME: VIRFINDER { conda = "$baseDir/envs/virfinder.yaml" }
+ withNAME: CHECKV { conda = "$baseDir/envs/checkv.yaml" }
}
\ No newline at end of file
diff --git a/configs/modules.config b/configs/modules.config
index d0c3d8c..09936f8 100644
--- a/configs/modules.config
+++ b/configs/modules.config
@@ -9,10 +9,19 @@
*/
process {
- withName: ANNOTATION {
+ withName: 'ANNOTATION' {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/annotation/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/annotation/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_annotation.tsv"
@@ -20,16 +29,34 @@ process {
]
}
- withName: ASSIGN {
+ withName: 'ASSIGN' {
publishDir = [
[
- path: "${params.output}/${name}/${params.taxdir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.taxdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_taxonomy.tsv"
],
[
- path: "${params.output}/${name}/${params.finaldir}/taxonomy",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/taxonomy/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_taxonomy.tsv"
@@ -40,7 +67,16 @@ process {
withName: BALLOON {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/balloon/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/balloon/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.{pdf,svg}"
@@ -49,17 +85,33 @@ process {
}
withName: BLAST {
- errorStrategy 'retry'
- maxRetries 1
publishDir = [
[
- path: "${params.output}/${assembly_name}/${params.blastdir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.blastdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.blast"
],
[
- path: "${params.output}/${assembly_name}/${params.finaldir}/blast/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/blast/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.filtered.blast"
@@ -68,17 +120,33 @@ process {
}
withName: BLAST_FILTER {
- errorStrategy 'retry'
- maxRetries 1
publishDir = [
[
- path: "${params.output}/${assembly_name}/${params.blastdir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.blastdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.meta"
],
[
- path: "${params.output}/${assembly_name}/${params.finaldir}/blast/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/blast/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.meta"
@@ -89,16 +157,19 @@ process {
withName: CHECKV {
publishDir = [
[
- path: "${params.output}/${name}/${params.checkvdir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.checkvdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${confidence_set_name}"
- ],
- [
- path: "${params.output}/${name}/${params.checkvdir}/",
- mode: params.publish_dir_mode,
- failOnError: false,
- pattern: "*.tsv"
+ pattern: "*_quality_summary.tsv"
],
]
}
@@ -106,10 +177,19 @@ process {
withName: GENERATE_CHROMOMAP_TABLE {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/chromomap/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/chromomap/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${id}.filtered-*.contigs.txt"
+ pattern: "*.filtered-*.contigs.txt"
]
]
}
@@ -117,13 +197,31 @@ process {
withName: GENERATE_KRONA_TABLE {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.plotdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.tsv"
],
[
- path: "${params.output}/${name}/${params.finaldir}/krona/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/krona/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.tsv"
@@ -134,16 +232,34 @@ process {
withName: GENERATE_SANKEY_TABLE {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.plotdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}.sankey.*"
+ pattern: "*.sankey.*"
],
[
- path: "${params.output}/${name}/${params.finaldir}/sankey/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/sankey/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}.sankey.filtered-${params.sankey}.json"
+ pattern: "*.sankey.filtered-${params.sankey}.json"
]
]
}
@@ -151,7 +267,16 @@ process {
withName: CHROMOMAP {
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/chromomap/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/chromomap/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.html"
@@ -159,24 +284,22 @@ process {
]
}
- withName: FILTER_READS {
- publishDir = [
- [
- path: "${params.output}/${name}/",
- mode: params.publish_dir_mode,
- failOnError: false,
- pattern: "${name}.filtered.fastq"
- ]
- ]
- }
-
withName: HMM_POSTPROCESSING {
publishDir = [
[
- path: "${params.output}/${name}/${params.hmmerdir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.hmmerdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}_modified.tsv"
+ pattern: "*_modified.tsv"
]
]
}
@@ -184,10 +307,19 @@ process {
withName: HMMSCAN {
publishDir = [
[
- path: "${params.output}/${name}/${params.hmmerdir}/${params.db}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.hmmerdir}/${params.databases}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}_${params.db}_hmmscan.tbl"
+ pattern: "*_${params.databases}_hmmscan.tbl"
]
]
}
@@ -195,10 +327,19 @@ process {
withName: KAIJU {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.out"
+ pattern: "*.out"
]
]
}
@@ -206,13 +347,31 @@ process {
withName: KRONA {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}/krona/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.plotdir}/krona/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.html"
],
[
- path: "${params.output}/${name}/${params.finaldir}/krona/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/krona/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.krona.html"
@@ -223,10 +382,19 @@ process {
withName: LENGTH_FILTERING {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}*filt*.fasta"
+ pattern: "*filt*.fasta"
]
]
}
@@ -234,13 +402,31 @@ process {
withName: MASHMAP {
publishDir = [
[
- path: "${params.output}/${assembly_name}/",
+ path: "${params.output}/",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.tsv"
],
[
- path: "${params.output}/${assembly_name}/${params.finaldir}/mashmap",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/mashmap/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.tsv"
@@ -251,10 +437,19 @@ process {
withName: MULTIQC {
publishDir = [
[
- path: "${params.output}/${name}/${params.assemblydir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.assemblydir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_multiqc_report.html"
+ pattern: "*_multiqc_report.html"
]
]
}
@@ -262,22 +457,49 @@ process {
withName: PARSE {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.fna"
],
[
- path: "${params.output}/${name}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "virsorter_metadata.tsv"
],
[
- path: "${params.output}/${name}/${params.finaldir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_virus_predictions.log"
+ pattern: "*_virus_predictions.log"
]
]
}
@@ -285,7 +507,16 @@ process {
withName: PHANOTATE {
publishDir = [
[
- path: "${params.output}/${name}/${params.phanotatedir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.phanotatedir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.faa"
@@ -294,21 +525,36 @@ process {
}
withName: PLOT_CONTIG_MAP {
- errorStrategy {
- task.exitStatus = 1 ? 'ignore' : 'terminate'
- }
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.plotdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}_mapping_results"
+ pattern: "*_mapping_results"
],
[
- path: "${params.output}/${name}/${params.finaldir}/annotation/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/annotation/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}_prot_ann_table_filtered.tsv"
+ pattern: "*_prot_ann_table_filtered.tsv"
]
]
}
@@ -316,27 +562,51 @@ process {
withName: PPRMETA {
publishDir = [
[
- path: "${params.output}/${name}/${params.virusdir}/pprmeta",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.virusdir}/pprmeta/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_pprmeta.csv"
+ pattern: "*_pprmeta.csv"
]
]
}
withName: PRODIGAL {
- errorStrategy {
- task.exitStatus = 18 ? 'ignore' : 'terminate'
- }
publishDir = [
[
- path: "${params.output}/${assembly_name}/${params.prodigaldir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.prodigaldir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.faa"
],
[
- path: "${params.output}/${assembly_name}/${params.finaldir}/cds/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/cds/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.faa"
@@ -345,15 +615,21 @@ process {
}
withName: RATIO_EVALUE {
- errorStrategy {
- task.exitStatus = 1 ? 'ignore' : 'terminate'
- }
publishDir = [
[
- path: "${params.output}/${name}/ratio_evalue_tables",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/ratio_evalue_tables/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${set_name}_modified_informative.tsv"
+ pattern: "*_modified_informative.tsv"
]
]
}
@@ -361,10 +637,19 @@ process {
withName: RENAME {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}_renamed.fasta"
+ pattern: "*_renamed.fasta"
]
]
}
@@ -372,13 +657,31 @@ process {
withName: RESTORE {
publishDir = [
[
- path: "${params.output}/${name}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_original.fasta"
],
[
- path: "${params.output}/${name}/${params.finaldir}/contigs/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/contigs/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_original.fasta"
@@ -389,13 +692,31 @@ process {
withName: SANKEY {
publishDir = [
[
- path: "${params.output}/${name}/${params.plotdir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.plotdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.sankey.html"
],
[
- path: "${params.output}/${name}/${params.finaldir}/sankey/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/sankey/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.sankey.html"
@@ -406,24 +727,39 @@ process {
withName: SPADES {
publishDir = [
[
- path: "${params.output}/${name}/${params.assemblydir}",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.assemblydir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.fasta"
+ pattern: "*.fasta"
]
]
}
withName: VIRFINDER {
- errorStrategy {
- task.exitStatus = 1 ? 'ignore' : 'terminate'
- }
publishDir = [
[
- path: "${params.output}/${name}/${params.virusdir}/virfinder",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.virusdir}/virfinder/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
- pattern: "${name}.txt"
+ pattern: "*.txt"
]
]
}
@@ -431,7 +767,16 @@ process {
withName: VIRSORTER {
publishDir = [
[
- path: "${params.output}/${name}/${params.virusdir}/",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.virusdir}/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false
]
@@ -439,10 +784,18 @@ process {
}
withName: WRITE_GFF {
- errorStrategy 'ignore'
publishDir = [
[
- path: "${params.output}/${name}/${params.finaldir}/gff",
+ path: "${params.output}",
+ saveAs: {
+ filename -> {
+ if ( filename.equals('versions.yml') ) {
+ return null;
+ }
+ def output_file = new File(filename);
+ return "${meta.id}/${params.finaldir}/gff/${output_file.name}";
+ }
+ },
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.gff"
diff --git a/configs/node.config b/configs/node.config
deleted file mode 100644
index 75da6a8..0000000
--- a/configs/node.config
+++ /dev/null
@@ -1,30 +0,0 @@
-process {
- //errorStrategy = "retry"
- //maxRetries = 1
- withName: ANNOTATION { cpus = 1; memory = '4.0 GB' }
- withName: ASSIGN { cpus = 1; memory = '4.0 GB' }
- withName: BALLOON { cpus = 1; memory = '2.0 GB' }
- withLabel: basics { cpus = 1; memory = '4.0 GB' }
- withName: BLAST { cpus = 12; memory = '12.0 GB' }
- withName: CHROMOMAP { cpus = 1; memory = '4.0 GB' }
- withName: CHECKV { cpus = 24; memory = '16.0 GB' }
- withName: FASTP { cpus = 12; memory = '12.0 GB' }
- withName: FASTQC { cpus = 4; memory = '8.0 GB' }
- withName: HMMSCAN { cpus = 24; memory = '24.0 GB' }
- withName: KAIJU { cpus = 12; memory = '40.0 GB' }
- withName: KRONA { cpus = 2; memory = '4.0 GB' }
- withName: PLOT_CONTIG_MAP { cpus = 1; memory = '4.0 GB' }
- withName: PPRMETA { cpus = 8; memory = '16.0 GB' }
- withName: MULTIQC { cpus = 4; memory = '8.0 GB' }
- withName: PARSE { cpus = 1; memory = '4.0 GB' }
- withName: PRODIGAL { cpus = 8; memory = '8.0 GB' }
- withName: PHANONATE { cpus = 1; memory = '4.0 GB' }
- withLabel: python3 { cpus = 1; memory = '4.0 GB' }
- withName: RATIO_EVALUE { cpus = 1; memory = '4.0 GB' }
- withLabel: ruby { cpus = 1; memory = '4.0 GB' }
- withName: SPADES { cpus = 12; memory = '40.0 GB' }
- withName: SANKEY { cpus = 1; memory = '2.0 GB' }
- withName: VIRSORTER { cpus = 12; memory = '12.0 GB' }
- withName: VIRFINDER { cpus = 1; memory = '12.0 GB' }
- withName: MASHMAP { cpus = 4; memory = '4.0 GB' }
-}
diff --git a/modules/local/annotation/main.nf b/modules/local/annotation/main.nf
index 7aedbc2..fa798e4 100644
--- a/modules/local/annotation/main.nf
+++ b/modules/local/annotation/main.nf
@@ -15,16 +15,16 @@ process ANNOTATION {
help="Name of processing .fna file to write correct output name")
*/
- tag "${name}"
- label 'process_low'
+ tag "${meta.id} ${set_name}"
+ label 'process_single'
container 'quay.io/microbiome-informatics/virify-python3:1.1'
input:
- tuple val(name), val(set_name), file(tab), file(faa)
+ tuple val(meta), val(set_name), path(tab), path(faa)
output:
- tuple val(name), val(set_name), file("*_annotation.tsv")
+ tuple val(meta), val(set_name), path("*_annotation.tsv"), emit: annotations
script:
"""
diff --git a/modules/local/assign/main.nf b/modules/local/assign/main.nf
index 4fda3a2..da4445e 100644
--- a/modules/local/assign/main.nf
+++ b/modules/local/assign/main.nf
@@ -4,18 +4,18 @@ process ASSIGN {
provides the taxonomic lineage of each viral contig, based on the corresponding ViPhOG annotations'''
*/
- tag "${name}"
- label 'process_low'
+ tag "${meta.id} ${set_name}"
+ label 'process_single'
container 'quay.io/microbiome-informatics/virify-python3:1.1'
input:
- tuple val(name), val(set_name), file(tab)
- file(db)
- file(factor)
+ tuple val(meta), val(set_name), path(tab)
+ path(db)
+ path(factor)
output:
- tuple val(name), val(set_name), file("*_taxonomy.tsv")
+ tuple val(meta), val(set_name), path("*_taxonomy.tsv")
script:
"""
diff --git a/modules/local/balloon/main.nf b/modules/local/balloon/main.nf
index 06c8ee6..c0f934c 100644
--- a/modules/local/balloon/main.nf
+++ b/modules/local/balloon/main.nf
@@ -1,14 +1,14 @@
process BALLOON {
- tag "${name}"
- label 'process_medium'
+ tag "${meta.id}"
+ label 'process_single'
container 'nanozoo/r_balloon:3.1.1--64f0f7d'
input:
- tuple val(name), val(set_name), file(tbl)
+ tuple val(meta), val(set_name), path(tbl)
output:
- path ("*.{pdf,svg}") optional true
+ path("*.{pdf,svg}"), optional: true
script:
"""
@@ -22,16 +22,16 @@ process BALLOON {
fi
# genus
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv"
# subfamily
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
# family
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
# order
- grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
+ grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv"
if [ -s \$NAME"_summary.tsv" ]; then
balloon.R "\${NAME}_summary.tsv" "\${NAME}_balloon.svg" 10 8
diff --git a/modules/local/blast/main.nf b/modules/local/blast/main.nf
index 73b162c..48e24e3 100644
--- a/modules/local/blast/main.nf
+++ b/modules/local/blast/main.nf
@@ -1,15 +1,15 @@
process BLAST {
label 'process_high'
- tag "${assembly_name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/blast:2.9.0'
input:
- tuple val(assembly_name), val(confidence_set_name), file(fasta)
+ tuple val(meta), val(confidence_set_name), path(fasta)
file(db)
output:
- tuple val(assembly_name), val(confidence_set_name), file("${confidence_set_name}.blast"), file("${confidence_set_name}.filtered.blast")
+ tuple val(meta), val(confidence_set_name), path("${confidence_set_name}.blast"), path("${confidence_set_name}.filtered.blast")
script:
if (task.attempt.toString() == '1')
diff --git a/modules/local/blast_filter/main.nf b/modules/local/blast_filter/main.nf
index 623aa4d..620a4d3 100644
--- a/modules/local/blast_filter/main.nf
+++ b/modules/local/blast_filter/main.nf
@@ -1,14 +1,14 @@
process BLAST_FILTER {
- label 'process_low'
- tag "${assembly_name}"
+ label 'process_single'
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(assembly_name), val(confidence_set_name), file(blast), file(blast_filtered)
- file(db)
+ tuple val(meta), val(confidence_set_name), path(blast), path(blast_filtered)
+ path(db)
output:
- tuple val(assembly_name), val(confidence_set_name), file("*.meta")
+ tuple val(meta), path(confidence_set_name), path("*.meta")
script:
if (task.attempt.toString() == '1')
diff --git a/modules/local/checkv/main.nf b/modules/local/checkv/main.nf
index 11bcea9..049ea4e 100644
--- a/modules/local/checkv/main.nf
+++ b/modules/local/checkv/main.nf
@@ -1,27 +1,22 @@
process CHECKV {
- label 'process_medium'
- tag "${name}"
+ label 'process_high'
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/checkv:0.8.1__1'
input:
- tuple val(name), val(confidence_set_name), file(fasta), file(contigs)
- file(database)
+ tuple val(meta), val(confidence_set_name), path(fasta)
+ path(database)
output:
- tuple val(name), val(confidence_set_name), file("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/")
+ tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv")
script:
- if (confidence_set_name == 'prophages') {
- """
- checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name}
- cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv
- """
- } else {
+
"""
- checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name}
+ checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name}
cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv
"""
- }
+
stub:
"""
mkdir negative_result_${confidence_set_name}.tsv
diff --git a/modules/local/chromomap/main.nf b/modules/local/chromomap/main.nf
index 5d7af19..7fb4393 100644
--- a/modules/local/chromomap/main.nf
+++ b/modules/local/chromomap/main.nf
@@ -1,17 +1,17 @@
process GENERATE_CHROMOMAP_TABLE {
- label 'process_low'
- tag "${name}"
+ label 'process_single'
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/bioruby:2.0.1'
input:
- tuple val(name), val(set_name), file(assembly), file(annotation_table)
+ tuple val(meta), val(set_name), path(assembly), path(annotation_table)
output:
- tuple val(name), val(set_name), file("${id}.filtered-*.contigs.txt"), file("${id}.filtered-*.anno.txt")
+ tuple val(meta), val(set_name), path("${id}.filtered-*.contigs.txt"), path("${id}.filtered-*.anno.txt")
script:
id = set_name
- if (set_name == "all") { id = name }
+ if (set_name == "all") { id = meta.id }
"""
# combine
if [[ ${set_name} == "all" ]]; then
@@ -29,18 +29,19 @@ process GENERATE_CHROMOMAP_TABLE {
}
process CHROMOMAP {
- label 'process_medium'
+ label 'process_low'
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/r_chromomap:0.3'
input:
- tuple val(name), val(set_name), file(contigs), file(annotations)
+ tuple val(meta), val(set_name), file(contigs), file(annotations)
output:
- tuple val(name), val(set_name), file("*.html") optional true
+ tuple val(meta), val(set_name), file("*.html") optional true
script:
id = set_name
- if (set_name == "all") { id = name }
+ if (set_name == "all") { id = meta.id }
"""
#!/usr/bin/env Rscript
diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf
deleted file mode 100644
index c85d5d4..0000000
--- a/modules/local/fastp/main.nf
+++ /dev/null
@@ -1,21 +0,0 @@
-process FASTP {
-
-/* Comments:
- -m, --merge
- for paired-end input, merge each pair of reads into a single read if they are overlapped.
- The merged reads will be written to the file given by --merged_out, the unmerged reads will be
- written to the files specified by --out1 and --out2. The merging mode is disabled by default.
-*/
- tag "${name}"
- label 'process_medium'
- container 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0'
-
- input:
- tuple val(name), file(reads)
- output:
- tuple val(name), file("${name}*.fastp.fastq.gz")
- script:
- """
- fastp -i ${reads[0]} -I ${reads[1]} --thread ${task.cpus} -o ${name}.R1.fastp.fastq.gz -O ${name}.R2.fastp.fastq.gz
- """
-}
\ No newline at end of file
diff --git a/modules/local/fastqc/main.nf b/modules/local/fastqc/main.nf
deleted file mode 100644
index 199adb3..0000000
--- a/modules/local/fastqc/main.nf
+++ /dev/null
@@ -1,15 +0,0 @@
-process FASTQC {
- tag "${name}"
- label 'process_low'
- container 'quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1'
-
- input:
- tuple val(name), file(reads)
- output:
- tuple val(name), file("fastqc/${name}*fastqc*")
- script:
- """
- mkdir fastqc
- fastqc -t ${task.cpus} -o fastqc *.fastq.gz
- """
-}
\ No newline at end of file
diff --git a/modules/local/filter_reads/main.nf b/modules/local/filter_reads/main.nf
deleted file mode 100644
index 5454154..0000000
--- a/modules/local/filter_reads/main.nf
+++ /dev/null
@@ -1,19 +0,0 @@
-process FILTER_READS {
- tag "${name}"
- label 'process_low'
-
- input:
- tuple val(name), file(kaiju_filtered), file(fastq)
-
- output:
- tuple val(name), file("${name}.filtered.fastq")
- tuple val(name), file("${name}.filtered.fasta")
-
- script:
- """
- sed '/^@/!d;s//>/;N' ${fastq} > ${name}.fasta
- faSomeRecords ${name}.fasta ${kaiju_filtered} ${name}.filtered.fasta
- faToFastq ${name}.filtered.fasta ${name}.filtered.fastq
- rm -f ${name}.fasta
- """
-}
diff --git a/modules/local/get_db/checkv.nf b/modules/local/get_db/checkv.nf
index 315ad8a..6aeff6d 100644
--- a/modules/local/get_db/checkv.nf
+++ b/modules/local/get_db/checkv.nf
@@ -1,7 +1,14 @@
process checkVGetDB {
- label 'noDocker'
- if (params.cloudProcess) { publishDir "${params.databases}/checkv", mode: 'copy' }
- else { storeDir "${params.databases}/checkv" }
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
+ if (params.cloudProcess) {
+ publishDir "${params.databases}/checkv", mode: 'copy'
+ }
+ else {
+ storeDir "${params.databases}/checkv"
+ }
+
output:
path("checkv-db-v*", type: 'dir')
script:
diff --git a/modules/local/get_db/imgvr.nf b/modules/local/get_db/imgvr.nf
index f97e7f1..635c45a 100644
--- a/modules/local/get_db/imgvr.nf
+++ b/modules/local/get_db/imgvr.nf
@@ -1,5 +1,7 @@
process imgvrGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/imgvr/", mode: 'copy', pattern: "IMG_VR_2018-07-01_4"
}
diff --git a/modules/local/get_db/kaiju.nf b/modules/local/get_db/kaiju.nf
index ee10fa4..d9cb9e9 100644
--- a/modules/local/get_db/kaiju.nf
+++ b/modules/local/get_db/kaiju.nf
@@ -1,5 +1,6 @@
process kaijuGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
if (params.cloudProcess) {
publishDir "${params.databases}/kaiju/", mode: 'copy', pattern: "viruses"//pattern: "nr_euk"
}
diff --git a/modules/local/get_db/meta.nf b/modules/local/get_db/meta.nf
index 59cd1e0..187c847 100644
--- a/modules/local/get_db/meta.nf
+++ b/modules/local/get_db/meta.nf
@@ -1,6 +1,7 @@
-
process metaGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/models", mode: 'copy', pattern: "additional_data_vpHMMs_${params.meta_version}.tsv"
}
diff --git a/modules/local/get_db/ncbi.nf b/modules/local/get_db/ncbi.nf
index e1ef496..d66a0f8 100644
--- a/modules/local/get_db/ncbi.nf
+++ b/modules/local/get_db/ncbi.nf
@@ -1,5 +1,7 @@
process ncbiGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/ncbi/", mode: 'copy', pattern: "ete3_ncbi_tax.sqlite"
}
diff --git a/modules/local/get_db/pvogs.nf b/modules/local/get_db/pvogs.nf
index e1e2e61..bf175cb 100644
--- a/modules/local/get_db/pvogs.nf
+++ b/modules/local/get_db/pvogs.nf
@@ -1,5 +1,8 @@
process pvogsGetDB {
- label 'noDocker'
+
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/", mode: 'copy', pattern: "pvogs"
}
diff --git a/modules/local/get_db/rvdb.nf b/modules/local/get_db/rvdb.nf
index 0f850ce..64626f8 100644
--- a/modules/local/get_db/rvdb.nf
+++ b/modules/local/get_db/rvdb.nf
@@ -1,5 +1,7 @@
process rvdbGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/", mode: 'copy', pattern: "rvdb"
}
diff --git a/modules/local/get_db/viphog.nf b/modules/local/get_db/viphog.nf
index 3364a2a..993a990 100644
--- a/modules/local/get_db/viphog.nf
+++ b/modules/local/get_db/viphog.nf
@@ -1,5 +1,7 @@
process viphogGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/", mode: 'copy', pattern: "vpHMM_database_${params.viphog_version}"
}
diff --git a/modules/local/get_db/virfinder.nf b/modules/local/get_db/virfinder.nf
index 6ce014b..21fa3df 100644
--- a/modules/local/get_db/virfinder.nf
+++ b/modules/local/get_db/virfinder.nf
@@ -1,5 +1,7 @@
process virfinderGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/virfinder/", mode: 'copy', pattern: "VF.modEPV_k8.rda"
}
diff --git a/modules/local/get_db/virsorter.nf b/modules/local/get_db/virsorter.nf
index 9ebccd0..6b43242 100644
--- a/modules/local/get_db/virsorter.nf
+++ b/modules/local/get_db/virsorter.nf
@@ -1,5 +1,7 @@
process virsorterGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/virsorter/", mode: 'copy', pattern: "virsorter-data"
}
diff --git a/modules/local/get_db/vogdb.nf b/modules/local/get_db/vogdb.nf
index 4c7fab1..fa9ba22 100644
--- a/modules/local/get_db/vogdb.nf
+++ b/modules/local/get_db/vogdb.nf
@@ -1,5 +1,7 @@
process vogdbGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/", mode: 'copy', pattern: "vogdb"
}
diff --git a/modules/local/get_db/vpf.nf b/modules/local/get_db/vpf.nf
index d8432f0..4020154 100644
--- a/modules/local/get_db/vpf.nf
+++ b/modules/local/get_db/vpf.nf
@@ -1,5 +1,7 @@
process vpfGetDB {
- label 'noDocker'
+ label 'process_low'
+ container 'nanozoo/template:3.8--ccd0653'
+
if (params.cloudProcess) {
publishDir "${params.databases}/", mode: 'copy', pattern: "vpf"
}
diff --git a/modules/local/help.nf b/modules/local/help.nf
index 42d2739..28e3a97 100644
--- a/modules/local/help.nf
+++ b/modules/local/help.nf
@@ -81,7 +81,7 @@ def helpMSG() {
${c_yellow}HPC computing:${c_reset}
Especially for execution of the workflow on a HPC (LSF, SLURM) adjust the following parameters if needed:
- --databases defines the path where databases are stored [default: $params.dbs]
+ --databases defines the path where databases are stored [default: $params.databases]
--workdir defines the path where nextflow writes tmp files [default: $params.workdir]
--singularity_cachedir defines the path where images (singularity) are cached [default: $params.singularity_cachedir]
diff --git a/modules/local/hmm_postprocessing/main.nf b/modules/local/hmm_postprocessing/main.nf
index af99cff..705424d 100644
--- a/modules/local/hmm_postprocessing/main.nf
+++ b/modules/local/hmm_postprocessing/main.nf
@@ -3,16 +3,16 @@ process HMM_POSTPROCESSING {
input: File_hmmer_ViPhOG.tbl
output: File_hmmer_ViPhOG_modified.tbl
*/
- tag "${name}"
- label 'process_low'
+ tag "${meta.id} ${set_name}"
+ label 'process_single'
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), val(set_name), file(hmmer_tbl), file(faa)
+ tuple val(meta), val(set_name), path(hmmer_tbl), path(faa)
output:
- tuple val(name), val(set_name), file("${set_name}_modified.tsv"), file(faa)
+ tuple val(meta), val(set_name), path("${set_name}_modified.tsv"), path(faa)
script:
"""
diff --git a/modules/local/hmmscan/main.nf b/modules/local/hmmscan/main.nf
index 82cf7b7..c8e4473 100644
--- a/modules/local/hmmscan/main.nf
+++ b/modules/local/hmmscan/main.nf
@@ -1,28 +1,28 @@
process HMMSCAN {
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_high'
container 'quay.io/microbiome-informatics/hmmer:3.1b2'
input:
- tuple val(name), val(set_name), file(faa)
- file(db)
+ tuple val(meta), val(set_name), path(faa)
+ path(db)
output:
- tuple val(name), val(set_name), file("${set_name}_${params.db}_hmmscan.tbl"), file(faa)
+ tuple val(meta), val(set_name), path("${set_name}_${params.databases}_hmmscan.tbl"), path(faa)
script:
"""
- if [[ ${params.db} == "viphogs" ]]; then
+ if [[ ${params.databases} == "viphogs" ]]; then
if [[ ${params.version} == "v1" ]]; then
- hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.db}_hmmscan.tbl ${db}/${db}.hmm ${faa}
+ hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa}
else
- hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.db}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa}
+ hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.databases}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa}
#filter evalue for models that dont have any GA cutoff
- awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.db}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl
+ awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.databases}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl
fi
else
- hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.db}_hmmscan.tbl ${db}/${db}.hmm ${faa}
+ hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa}
fi
"""
}
diff --git a/modules/local/kaiju/main.nf b/modules/local/kaiju/main.nf
index c461b2f..c2e2ac9 100644
--- a/modules/local/kaiju/main.nf
+++ b/modules/local/kaiju/main.nf
@@ -5,29 +5,29 @@ process KAIJU {
TODO: include viruses.taxids
*/
- label 'process_medium'
- tag "${name}"
+ label 'process_high'
+ tag "${meta.id}"
container 'quay.io/biocontainers/kaiju:1.7.2--hdbcaa40_0'
input:
- tuple val(name), file(fastq)
- file(database)
+ tuple val(meta), path(fastq)
+ path(database)
output:
- tuple val(name), file("${name}.out")
- tuple val(name), file("${name}.out.krona")
+ tuple val(meta), path("${meta.id}.out")
+ tuple val(meta), path("${meta.id}.out.krona")
shell:
if (params.illumina) {
'''
- kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{name}.out
- kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona
+ kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{meta.id}.out
+ kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona
'''
}
if (params.fasta) {
'''
- kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{name}.out
- kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona
+ kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{meta.id}.out
+ kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona
'''
}
}
diff --git a/modules/local/krona/main.nf b/modules/local/krona/main.nf
index deb8c04..ad18021 100644
--- a/modules/local/krona/main.nf
+++ b/modules/local/krona/main.nf
@@ -1,21 +1,21 @@
process GENERATE_KRONA_TABLE {
- label 'process_low'
- tag "${name}"
+ label 'process_single'
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), val(set_name), file(tbl)
+ tuple val(meta), val(set_name), path(tbl)
output:
- tuple val(name), val(set_name), file("*.krona.tsv")
+ tuple val(meta), val(set_name), path("*.krona.tsv")
script:
"""
if [[ "${set_name}" == "all" ]]; then
- grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${name}.tmp
- grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${name}.tmp
- cp ${name}.tmp ${name}.tsv
- generate_counts_table.py -f ${name}.tsv -o ${name}.krona.tsv
+ grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${meta.id}.tmp
+ grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${meta.id}.tmp
+ cp ${meta.id}.tmp ${meta.id}.tsv
+ generate_counts_table.py -f ${meta.id}.tsv -o ${meta.id}.krona.tsv
else
generate_counts_table.py -f ${tbl} -o ${set_name}.krona.tsv
fi
@@ -24,18 +24,18 @@ process GENERATE_KRONA_TABLE {
process KRONA {
label 'process_low'
-
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/krona:2.7.1'
input:
- tuple val(name), val(set_name), file(krona_file)
+ tuple val(meta), val(set_name), file(krona_file)
output:
file("*.krona.html")
script:
"""
if [[ ${set_name} == "all" ]]; then
- ktImportText -o ${name}.krona.html ${krona_file}
+ ktImportText -o ${meta.id}.krona.html ${krona_file}
else
ktImportText -o ${set_name}.krona.html ${krona_file}
fi
diff --git a/modules/local/length_filtering/main.nf b/modules/local/length_filtering/main.nf
index b7fc090..4c2efd3 100644
--- a/modules/local/length_filtering/main.nf
+++ b/modules/local/length_filtering/main.nf
@@ -1,18 +1,18 @@
process LENGTH_FILTERING {
- label 'process_low'
- tag "${name}"
+ label 'process_single'
+ tag "${meta.id}"
container 'quay.io/biocontainers/biopython:1.75'
input:
- tuple val(name), file(fasta), file(map)
+ tuple val(meta), path(fasta), path(map)
output:
- tuple val(name), file("${name}*filt*.fasta"), env(CONTIGS)
+ tuple val(meta), path("${meta.id}*filt*.fasta"), env(CONTIGS)
script:
"""
filter_contigs_len.py -f ${fasta} -l ${params.length} -o ./
- CONTIGS=\$(grep ">" ${name}*filt*.fasta | wc -l)
+ CONTIGS=\$(grep ">" ${meta.id}*filt*.fasta | wc -l)
"""
}
diff --git a/modules/local/mashmap/main.nf b/modules/local/mashmap/main.nf
index 5312397..e0fac7e 100644
--- a/modules/local/mashmap/main.nf
+++ b/modules/local/mashmap/main.nf
@@ -1,14 +1,14 @@
process MASHMAP {
label 'process_medium'
- tag "${assembly_name}"
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/microbiome-informatics/mashmap:2.0'
input:
- tuple val(assembly_name), val(confidence_set_name), file(fasta)
- file(reference)
+ tuple val(meta), val(confidence_set_name), path(fasta)
+ path(reference)
output:
- file("${confidence_set_name}_mashmap_hits.tsv")
+ path("${confidence_set_name}_mashmap_hits.tsv")
script:
"""
diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf
deleted file mode 100644
index da54e9b..0000000
--- a/modules/local/multiqc/main.nf
+++ /dev/null
@@ -1,15 +0,0 @@
-process MULTIQC {
- label 'process_low'
- tag "${name}"
- container 'quay.io/biocontainers/multiqc:1.9--py_1'
-
- input:
- tuple val(name), file(fastqc)
- output:
- tuple val(name), file("${name}_multiqc_report.html")
-
- script:
- """
- multiqc -i ${name} .
- """
-}
diff --git a/modules/local/parse/main.nf b/modules/local/parse/main.nf
index 41949a7..ec23045 100644
--- a/modules/local/parse/main.nf
+++ b/modules/local/parse/main.nf
@@ -1,21 +1,21 @@
process PARSE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), file(fasta), val(contig_number), file(virfinder), file(virsorter), file(pprmeta)
+ tuple val(meta), path(fasta), val(contig_number), path(virfinder), path(virsorter), path(pprmeta)
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("*.fna"), file('virsorter_metadata.tsv'), file("${name}_virus_predictions.log"), optional: true
+ tuple val(meta), path("*.fna"), path('virsorter_metadata.tsv'), path("${meta.id}_virus_predictions.log"), optional: true
script:
"""
touch virsorter_metadata.tsv
- parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${name}_virus_predictions.log
+ parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${meta.id}_virus_predictions.log
"""
}
diff --git a/modules/local/phanotate/main.nf b/modules/local/phanotate/main.nf
index 5f72fe2..53b1843 100644
--- a/modules/local/phanotate/main.nf
+++ b/modules/local/phanotate/main.nf
@@ -1,13 +1,13 @@
process PHANOTATE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/biocontainers/phanotate:1.5.0--h30d9df9_2'
input:
- tuple val(name), file(fasta)
+ tuple val(meta), path(fasta)
output:
- tuple val(name), stdout, file("*.faa")
+ tuple val(meta), stdout, path("*.faa")
script:
"""
diff --git a/modules/local/plot_contig_map/main.nf b/modules/local/plot_contig_map/main.nf
index bc55bf2..f7eeaaf 100644
--- a/modules/local/plot_contig_map/main.nf
+++ b/modules/local/plot_contig_map/main.nf
@@ -1,14 +1,14 @@
process PLOT_CONTIG_MAP {
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-plot-contig-map:1'
input:
- tuple val(name), val(set_name), file(tab)
+ tuple val(meta), val(set_name), path(tab)
output:
- tuple val(name), val(set_name), file("${set_name}_mapping_results"), file("${set_name}_prot_ann_table_filtered.tsv")
+ tuple val(meta), val(set_name), path("${set_name}_mapping_results"), path("${set_name}_prot_ann_table_filtered.tsv")
script:
"""
diff --git a/modules/local/pprmeta/main.nf b/modules/local/pprmeta/main.nf
index e1f6288..86b5659 100644
--- a/modules/local/pprmeta/main.nf
+++ b/modules/local/pprmeta/main.nf
@@ -1,22 +1,22 @@
process PPRMETA {
label 'process_medium'
- tag "${name}"
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/pprmeta:1.1'
input:
- tuple val(name), file(fasta), val(contig_number)
+ tuple val(meta), path(fasta), val(contig_number)
path(pprmeta_git)
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("${name}_pprmeta.csv")
+ tuple val(meta), path("${meta.id}_pprmeta.csv")
script:
"""
[ -d "pprmeta" ] && cp pprmeta/* .
- ./PPR_Meta ${fasta} ${name}_pprmeta.csv
+ ./PPR_Meta ${fasta} ${meta.id}_pprmeta.csv
"""
}
@@ -24,7 +24,8 @@ process PPRMETA {
// need to implement this so its fixed
process pprmetaGet {
- label 'noDocker'
+ container 'nanozoo/template:3.8--ccd0653'
+ label 'process_single'
if (params.cloudProcess) {
publishDir "${params.databases}/pprmeta", mode: 'copy', pattern: "*"
}
diff --git a/modules/local/prodigal/main.nf b/modules/local/prodigal/main.nf
index 5b429f0..44d1875 100644
--- a/modules/local/prodigal/main.nf
+++ b/modules/local/prodigal/main.nf
@@ -1,13 +1,13 @@
process PRODIGAL {
- label 'process_high'
- tag "${name}"
+ label 'process_medium'
+ tag "${meta.id} ${confidence_set_name}"
container 'quay.io/biocontainers/prodigal:2.6.3--hec16e2b_4'
input:
- tuple val(assembly_name), val(confidence_set_name), file(fasta)
+ tuple val(meta), val(confidence_set_name), path(fasta)
output:
- tuple val(assembly_name), val(confidence_set_name), file("*.faa")
+ tuple val(meta), val(confidence_set_name), path("*.faa")
script:
"""
diff --git a/modules/local/ratio_evalue/main.nf b/modules/local/ratio_evalue/main.nf
index 28ff720..9a73c12 100644
--- a/modules/local/ratio_evalue/main.nf
+++ b/modules/local/ratio_evalue/main.nf
@@ -7,17 +7,17 @@ process RATIO_EVALUE {
out PRJNA530103_small_modified_informative.tsv
*/
- tag "${name}"
+ tag "${meta.id} ${set_name}"
label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.1'
input:
- tuple val(name), val(set_name), file(modified_table), file(faa)
- file(model_metadata)
+ tuple val(meta), val(set_name), path(modified_table), path(faa)
+ path(model_metadata)
output:
- tuple val(name), val(set_name), file("${set_name}_modified_informative.tsv"), file(faa), optional: true
+ tuple val(meta), val(set_name), path("${set_name}_modified_informative.tsv"), path(faa), optional: true
script:
"""
diff --git a/modules/local/rename/main.nf b/modules/local/rename/main.nf
index 8a54128..cb4c87a 100644
--- a/modules/local/rename/main.nf
+++ b/modules/local/rename/main.nf
@@ -3,24 +3,26 @@ process RENAME {
usage: rename_fasta.py [-h] -i INPUT [-m MAP] -o OUTPUT {rename,restore} ...
*/
- label 'process_low'
- tag "${name}"
+ label 'process_single'
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), file(fasta)
+ tuple val(meta), path(fasta)
output:
- tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv")
+ tuple val(meta), path("${meta.id}_renamed.fasta"), path("${meta.id}_map.tsv")
script:
"""
if [[ ${fasta} =~ \\.gz\$ ]]; then
zcat ${fasta} > tmp.fasta
+ echo "compressed"
else
cp ${fasta} tmp.fasta
+ echo "uncompressed"
fi
- rename_fasta.py -i tmp.fasta -m ${name}_map.tsv -o ${name}_renamed.fasta rename
+ rename_fasta.py -i tmp.fasta -m ${meta.id}_map.tsv -o ${meta.id}_renamed.fasta rename
"""
}
diff --git a/modules/local/restore/main.nf b/modules/local/restore/main.nf
index 4327181..a9b9f7d 100644
--- a/modules/local/restore/main.nf
+++ b/modules/local/restore/main.nf
@@ -2,16 +2,16 @@ process RESTORE {
/*
usage: rename_fasta.py [-h] -i INPUT [-m MAP] -o OUTPUT {rename,restore} ...
*/
- tag "${name}"
- label 'process_low'
+ tag "${meta.id}"
+ label 'process_single'
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), file(fasta), file(map)
+ tuple val(meta), path(fasta), path(map)
output:
- tuple val(name), env(BN), file("*_original.fasta")
+ tuple val(meta), env(BN), path("*_original.fasta")
script:
"""
diff --git a/modules/local/sankey/main.nf b/modules/local/sankey/main.nf
index b1f2252..0bc7171 100644
--- a/modules/local/sankey/main.nf
+++ b/modules/local/sankey/main.nf
@@ -1,13 +1,13 @@
process GENERATE_SANKEY_TABLE {
label 'process_low'
- tag "${name}"
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/bioruby:2.0.1'
input:
- tuple val(name), val(set_name), file(krona_table)
+ tuple val(meta), val(set_name), path(krona_table)
output:
- tuple val(name), val(set_name), file("${set_name}.sankey.filtered-${params.sankey}.json"), file("${set_name}.sankey.tsv")
+ tuple val(meta), val(set_name), path("${set_name}.sankey.filtered-${params.sankey}.json"), path("${set_name}.sankey.tsv")
script:
"""
@@ -22,19 +22,19 @@ process GENERATE_SANKEY_TABLE {
process SANKEY {
- label 'process_medium'
-
+ label 'process_low'
+ tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/sankeyd3:0.12.3'
input:
- tuple val(name), val(set_name), file(json), file(tsv)
+ tuple val(meta), val(set_name), path(json), path(tsv)
output:
- tuple val(name), val(set_name), file("*.sankey.html")
+ tuple val(meta), val(set_name), path("*.sankey.html")
script:
id = set_name
- if (set_name == "all") { id = name }
+ if (set_name == "all") { id = meta.id }
"""
#!/usr/bin/env Rscript
diff --git a/modules/local/spades/main.nf b/modules/local/spades/main.nf
deleted file mode 100644
index cdf4483..0000000
--- a/modules/local/spades/main.nf
+++ /dev/null
@@ -1,17 +0,0 @@
-process SPADES {
-
- label 'process_medium'
- tag "${name}"
- container 'quay.io/biocontainers/spades:3.15.5--h95f258a_1'
-
- input:
- tuple val(name), file(reads)
- output:
- tuple val(name), file("${name}.fasta")
-
- script:
- """
- spades.py --meta --only-assembler -1 !{reads[0]} -2 !{reads[1]} -t !{task.cpus} -o assembly
- mv assembly/contigs.fasta !{name}.fasta
- """
-}
\ No newline at end of file
diff --git a/modules/local/virfinder/main.nf b/modules/local/virfinder/main.nf
index f4d8f96..69e645f 100644
--- a/modules/local/virfinder/main.nf
+++ b/modules/local/virfinder/main.nf
@@ -1,22 +1,22 @@
process VIRFINDER {
- tag "${name}"
- label 'process_high'
+ tag "${meta.id}"
+ label 'process_medium'
container 'quay.io/microbiome-informatics/virfinder:1.1__eb8032e'
input:
- tuple val(name), file(fasta), val(contig_number)
+ tuple val(meta), path(fasta), val(contig_number)
path model
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("${name}.txt")
+ tuple val(meta), path("${meta.id}.txt")
script:
"""
run_virfinder.Rscript ${model} ${fasta} .
- awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${name}*.tsv > ${name}.txt
+ awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${meta.id}*.tsv > ${meta.id}.txt
"""
}
diff --git a/modules/local/virsorter/main.nf b/modules/local/virsorter/main.nf
index 3d5a2bd..23b5382 100644
--- a/modules/local/virsorter/main.nf
+++ b/modules/local/virsorter/main.nf
@@ -1,17 +1,17 @@
process VIRSORTER {
- label 'process_high'
- tag "${name}"
+ label 'process_medium'
+ tag "${meta.id}"
container 'quay.io/microbiome-informatics/virsorter:1.0.6_edfeb8c5e72'
input:
- tuple val(name), file(fasta), val(contig_number)
+ tuple val(meta), path(fasta), val(contig_number)
path(database)
when:
contig_number.toInteger() > 0
output:
- tuple val(name), file("*")
+ tuple val(meta), path("*")
script:
if (params.virome)
diff --git a/modules/local/write_gff/main.nf b/modules/local/write_gff/main.nf
index 198b221..e759530 100644
--- a/modules/local/write_gff/main.nf
+++ b/modules/local/write_gff/main.nf
@@ -1,17 +1,14 @@
process WRITE_GFF {
- tag "${name}"
- label 'process_medium'
+ tag "${meta.id}"
+ label 'process_low'
container 'quay.io/microbiome-informatics/virify-python3:1.2'
input:
- tuple val(name), path(fasta)
- path(viphos_annotations)
- path(taxonomies)
- path(quality_summaries)
+ tuple val(meta), path(fasta), path(viphos_annotations), path(taxonomies), path(quality_summaries)
output:
- path("${name}_virify.gff")
+ path("${meta.id}_virify.gff")
script:
"""
@@ -19,9 +16,9 @@ process WRITE_GFF {
-v ${viphos_annotations.join(' ')} \
-c ${quality_summaries.join(' ')} \
-t ${taxonomies.join(' ')} \
- -s ${name} \
+ -s ${meta.id} \
-a ${fasta}
- gt gff3validator ${name}_virify.gff
+ gt gff3validator ${meta.id}_virify.gff
"""
}
diff --git a/modules/local/fastp/fastp.yaml b/modules/nf-core/checkv/endtoend/environment.yml
similarity index 64%
rename from modules/local/fastp/fastp.yaml
rename to modules/nf-core/checkv/endtoend/environment.yml
index b4df7d8..8646fff 100644
--- a/modules/local/fastp/fastp.yaml
+++ b/modules/nf-core/checkv/endtoend/environment.yml
@@ -1,6 +1,5 @@
-name: fastp
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - fastp=0.20.0
+ - bioconda::checkv=1.0.1
\ No newline at end of file
diff --git a/modules/nf-core/checkv/endtoend/main.nf b/modules/nf-core/checkv/endtoend/main.nf
new file mode 100644
index 0000000..635c9fa
--- /dev/null
+++ b/modules/nf-core/checkv/endtoend/main.nf
@@ -0,0 +1,63 @@
+process CHECKV_ENDTOEND {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/checkv:1.0.1--pyhdfd78af_0':
+ 'biocontainers/checkv:1.0.1--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path db
+
+ output:
+ tuple val(meta), path ("${prefix}/quality_summary.tsv") , emit: quality_summary
+ tuple val(meta), path ("${prefix}/completeness.tsv") , emit: completeness
+ tuple val(meta), path ("${prefix}/contamination.tsv") , emit: contamination
+ tuple val(meta), path ("${prefix}/complete_genomes.tsv"), emit: complete_genomes
+ tuple val(meta), path ("${prefix}/proviruses.fna") , emit: proviruses
+ tuple val(meta), path ("${prefix}/viruses.fna") , emit: viruses
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ checkv \\
+ end_to_end \\
+ $args \\
+ -t $task.cpus \\
+ -d $db \\
+ $fasta \\
+ $prefix
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ mkdir -p ${prefix}
+ touch ${prefix}/quality_summary.tsv
+ touch ${prefix}/completeness.tsv
+ touch ${prefix}/contamination.tsv
+ touch ${prefix}/complete_genomes.tsv
+ touch ${prefix}/proviruses.fna
+ touch ${prefix}/viruses.fna
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p')
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/checkv/endtoend/meta.yml b/modules/nf-core/checkv/endtoend/meta.yml
new file mode 100644
index 0000000..c74d091
--- /dev/null
+++ b/modules/nf-core/checkv/endtoend/meta.yml
@@ -0,0 +1,107 @@
+name: "checkv_endtoend"
+description: Assess the quality of metagenome-assembled viral genomes.
+keywords:
+ - checkv
+ - checkm
+ - mag
+ - metagenome
+ - quality
+ - isolates
+ - virus
+ - completeness
+ - contamination
+tools:
+ - "checkv":
+ description: Assess the quality of metagenome-assembled viral genomes.
+ homepage: https://bitbucket.org/berkeleylab/checkv/src/master/
+ documentation: https://bitbucket.org/berkeleylab/checkv/src/master/
+ tool_dev_url: https://bitbucket.org/berkeleylab/checkv/src/master/
+ doi: "10.1038/s41587-020-00774-7"
+ licence: ["BSD License"]
+ identifier: biotools:checkv
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: fasta file
+ pattern: "*.{fasta,fna,fa}"
+ - - db:
+ type: directory
+ description: Directory pointing to checkV database
+output:
+ - quality_summary:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/quality_summary.tsv:
+ type: file
+ description: CheckV's main output containing integrated results from the three
+ main modules (contamination, completeness, complete genomes) with overall
+ quality of contigs
+ pattern: "${prefix}/quality_summary.tsv"
+ - completeness:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/completeness.tsv:
+ type: file
+ description: CheckV's detailed overview table on estimating completeness
+ pattern: "${prefix}/completeness.tsv"
+ - contamination:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/contamination.tsv:
+ type: file
+ description: CheckV's detailed overview table on estimating contamination
+ pattern: "${prefix}/contamination.tsv"
+ - complete_genomes:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/complete_genomes.tsv:
+ type: file
+ description: CheckV's detailed overview table on the identified putative complete
+ genomes
+ pattern: "${prefix}/complete_genomes.tsv"
+ - proviruses:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/proviruses.fna:
+ type: file
+ description: CheckV's extracted proviruses contigs
+ pattern: "${prefix}/proviruses.fna"
+ - viruses:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample', bin:'1' ]
+ - ${prefix}/viruses.fna:
+ type: file
+ description: CheckV's extracted virus contigs
+ pattern: "${prefix}/viruses.fna"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@Joon-Klaps"
+maintainers:
+ - "@Joon-Klaps"
\ No newline at end of file
diff --git a/modules/local/spades/spades.yaml b/modules/nf-core/fastp/environment.yml
similarity index 64%
rename from modules/local/spades/spades.yaml
rename to modules/nf-core/fastp/environment.yml
index b6db0fd..de9463b 100644
--- a/modules/local/spades/spades.yaml
+++ b/modules/nf-core/fastp/environment.yml
@@ -1,6 +1,5 @@
-name: spades
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - spades=3.14
+ - bioconda::fastp=0.23.4
\ No newline at end of file
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
new file mode 100644
index 0000000..08200cd
--- /dev/null
+++ b/modules/nf-core/fastp/main.nf
@@ -0,0 +1,125 @@
+process FASTP {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
+ 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ path adapter_fasta
+ val discard_trimmed_pass
+ val save_trimmed_fail
+ val save_merged
+
+ output:
+ tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
+ tuple val(meta), path('*.json') , emit: json
+ tuple val(meta), path('*.html') , emit: html
+ tuple val(meta), path('*.log') , emit: log
+ tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
+ tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+ def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" )
+ def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz"
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ // Use single ended for interleaved. Add --interleaved_in in config.
+ if ( task.ext.args?.contains('--interleaved_in') ) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --stdout \\
+ --in1 ${prefix}.fastq.gz \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2) \\
+ | gzip -c > ${prefix}.fastp.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --in1 ${prefix}.fastq.gz \\
+ $out_fq1 \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else {
+ def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
+ fastp \\
+ --in1 ${prefix}_1.fastq.gz \\
+ --in2 ${prefix}_2.fastq.gz \\
+ $out_fq1 \\
+ $out_fq2 \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $merge_fastq \\
+ --thread $task.cpus \\
+ --detect_adapter_for_pe \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
+ def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz"
+ def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : ""
+ def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz"
+ """
+ $touch_reads
+ $touch_fail_fastq
+ $touch_merged
+ touch "${prefix}.fastp.json"
+ touch "${prefix}.fastp.html"
+ touch "${prefix}.fastp.log"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
new file mode 100644
index 0000000..bece97e
--- /dev/null
+++ b/modules/nf-core/fastp/meta.yml
@@ -0,0 +1,113 @@
+name: fastp
+description: Perform adapter/quality trimming on sequencing reads
+keywords:
+ - trimming
+ - quality control
+ - fastq
+tools:
+ - fastp:
+ description: |
+ A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
+ documentation: https://github.com/OpenGene/fastp
+ doi: 10.1093/bioinformatics/bty560
+ licence: ["MIT"]
+ identifier: biotools:fastp
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively. If you wish to run interleaved paired-end data, supply as single-end data
+ but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+ - - adapter_fasta:
+ type: file
+ description: File in FASTA format containing possible adapters to remove.
+ pattern: "*.{fasta,fna,fas,fa}"
+ - - discard_trimmed_pass:
+ type: boolean
+ description: Specify true to not write any reads that pass trimming thresholds.
+ | This can be used to use fastp for the output report only.
+ - - save_trimmed_fail:
+ type: boolean
+ description: Specify true to save files that failed to pass trimming thresholds
+ ending in `*.fail.fastq.gz`
+ - - save_merged:
+ type: boolean
+ description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz`
+output:
+ - reads:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fastp.fastq.gz":
+ type: file
+ description: The trimmed/modified/unmerged fastq reads
+ pattern: "*fastp.fastq.gz"
+ - json:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.json":
+ type: file
+ description: Results in JSON format
+ pattern: "*.json"
+ - html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.html":
+ type: file
+ description: Results in HTML format
+ pattern: "*.html"
+ - log:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.log":
+ type: file
+ description: fastq log file
+ pattern: "*.log"
+ - reads_fail:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fail.fastq.gz":
+ type: file
+ description: Reads the failed the preprocessing
+ pattern: "*fail.fastq.gz"
+ - reads_merged:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.merged.fastq.gz":
+ type: file
+ description: Reads that were successfully merged
+ pattern: "*.{merged.fastq.gz}"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
\ No newline at end of file
diff --git a/modules/local/multiqc/multiqc.yaml b/modules/nf-core/fastqc/environment.yml
similarity index 63%
rename from modules/local/multiqc/multiqc.yaml
rename to modules/nf-core/fastqc/environment.yml
index 3bc368b..8b76b92 100644
--- a/modules/local/multiqc/multiqc.yaml
+++ b/modules/nf-core/fastqc/environment.yml
@@ -1,6 +1,5 @@
-name: multiqc
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - multiqc=1.8
+ - bioconda::fastqc=0.12.1
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
new file mode 100644
index 0000000..feee5f0
--- /dev/null
+++ b/modules/nf-core/fastqc/main.nf
@@ -0,0 +1,64 @@
+process FASTQC {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+ 'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.html"), emit: html
+ tuple val(meta), path("*.zip") , emit: zip
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Make list of old name and new name pairs to use for renaming in the bash while loop
+ def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+ def rename_to = old_new_pairs*.join(' ').join(' ')
+ def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+
+ // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory)
+ // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222
+ // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label
+ def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus
+ // FastQC memory value allowed range (100 - 10000)
+ def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
+
+ """
+ printf "%s %s\\n" $rename_to | while read old_name new_name; do
+ [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+ done
+
+ fastqc \\
+ $args \\
+ --threads $task.cpus \\
+ --memory $fastqc_memory \\
+ $renamed_files
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.html
+ touch ${prefix}.zip
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
new file mode 100644
index 0000000..40c8711
--- /dev/null
+++ b/modules/nf-core/fastqc/meta.yml
@@ -0,0 +1,66 @@
+name: fastqc
+description: Run FastQC on sequenced reads
+keywords:
+ - quality control
+ - qc
+ - adapters
+ - fastq
+tools:
+ - fastqc:
+ description: |
+ FastQC gives general quality metrics about your reads.
+ It provides information about the quality score distribution
+ across your reads, the per base sequence content (%A/C/G/T).
+ You get information about adapter contamination and other
+ overrepresented sequences.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+ licence: ["GPL-2.0-only"]
+ identifier: biotools:fastqc
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.html":
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - zip:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.zip":
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
\ No newline at end of file
diff --git a/modules/local/fastqc/fastqc.yaml b/modules/nf-core/multiqc/environment.yml
similarity index 63%
rename from modules/local/fastqc/fastqc.yaml
rename to modules/nf-core/multiqc/environment.yml
index eef652b..e1d226f 100644
--- a/modules/local/fastqc/fastqc.yaml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -1,6 +1,5 @@
-name: fastqc
channels:
- - bioconda
- conda-forge
+ - bioconda
dependencies:
- - fastqc=0.11.8
+ - bioconda::multiqc=1.25.1
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
new file mode 100644
index 0000000..a91446d
--- /dev/null
+++ b/modules/nf-core/multiqc/main.nf
@@ -0,0 +1,63 @@
+process MULTIQC {
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' :
+ 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }"
+
+ input:
+ path multiqc_files, stageAs: "?/*"
+ path(multiqc_config)
+ path(extra_multiqc_config)
+ path(multiqc_logo)
+ path(replace_names)
+ path(sample_names)
+
+ output:
+ path "*multiqc_report.html", emit: report
+ path "*_data" , emit: data
+ path "*_plots" , optional:true, emit: plots
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : ''
+ def config = multiqc_config ? "--config $multiqc_config" : ''
+ def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
+ def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : ''
+ def replace = replace_names ? "--replace-names ${replace_names}" : ''
+ def samples = sample_names ? "--sample-names ${sample_names}" : ''
+ """
+ multiqc \\
+ --force \\
+ $args \\
+ $config \\
+ $prefix \\
+ $extra_config \\
+ $logo \\
+ $replace \\
+ $samples \\
+ .
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ mkdir multiqc_data
+ touch multiqc_plots
+ touch multiqc_report.html
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
new file mode 100644
index 0000000..2621b2a
--- /dev/null
+++ b/modules/nf-core/multiqc/meta.yml
@@ -0,0 +1,78 @@
+name: multiqc
+description: Aggregate results from bioinformatics analyses across many samples into
+ a single report
+keywords:
+ - QC
+ - bioinformatics tools
+ - Beautiful stand-alone HTML report
+tools:
+ - multiqc:
+ description: |
+ MultiQC searches a given directory for analysis logs and compiles a HTML report.
+ It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
+ homepage: https://multiqc.info/
+ documentation: https://multiqc.info/docs/
+ licence: ["GPL-3.0-or-later"]
+ identifier: biotools:multiqc
+input:
+ - - multiqc_files:
+ type: file
+ description: |
+ List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+ - - multiqc_config:
+ type: file
+ description: Optional config yml for MultiQC
+ pattern: "*.{yml,yaml}"
+ - - extra_multiqc_config:
+ type: file
+ description: Second optional config yml for MultiQC. Will override common sections
+ in multiqc_config.
+ pattern: "*.{yml,yaml}"
+ - - multiqc_logo:
+ type: file
+ description: Optional logo file for MultiQC
+ pattern: "*.{png}"
+ - - replace_names:
+ type: file
+ description: |
+ Optional two-column sample renaming file. First column a set of
+ patterns, second column a set of corresponding replacements. Passed via
+ MultiQC's `--replace-names` option.
+ pattern: "*.{tsv}"
+ - - sample_names:
+ type: file
+ description: |
+ Optional TSV file with headers, passed to the MultiQC --sample_names
+ argument.
+ pattern: "*.{tsv}"
+output:
+ - report:
+ - "*multiqc_report.html":
+ type: file
+ description: MultiQC report file
+ pattern: "multiqc_report.html"
+ - data:
+ - "*_data":
+ type: directory
+ description: MultiQC data dir
+ pattern: "multiqc_data"
+ - plots:
+ - "*_plots":
+ type: file
+ description: Plots created by MultiQC
+ pattern: "*_data"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
+maintainers:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
\ No newline at end of file
diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml
new file mode 100644
index 0000000..b9455d6
--- /dev/null
+++ b/modules/nf-core/prodigal/environment.yml
@@ -0,0 +1,6 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::prodigal=2.6.3
+ - conda-forge::pigz=2.6
\ No newline at end of file
diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf
new file mode 100644
index 0000000..916f97e
--- /dev/null
+++ b/modules/nf-core/prodigal/main.nf
@@ -0,0 +1,64 @@
+process PRODIGAL {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' :
+ 'biocontainers/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' }"
+
+ input:
+ tuple val(meta), path(genome)
+ val(output_format)
+
+ output:
+ tuple val(meta), path("${prefix}.${output_format}.gz"), emit: gene_annotations
+ tuple val(meta), path("${prefix}.fna.gz"), emit: nucleotide_fasta
+ tuple val(meta), path("${prefix}.faa.gz"), emit: amino_acid_fasta
+ tuple val(meta), path("${prefix}_all.txt.gz"), emit: all_gene_annotations
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ pigz -cdf ${genome} | prodigal \\
+ $args \\
+ -f $output_format \\
+ -d "${prefix}.fna" \\
+ -o "${prefix}.${output_format}" \\
+ -a "${prefix}.faa" \\
+ -s "${prefix}_all.txt"
+
+ pigz -nm ${prefix}.fna
+ pigz -nm ${prefix}.${output_format}
+ pigz -nm ${prefix}.faa
+ pigz -nm ${prefix}_all.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p')
+ pigz: \$(pigz -V 2>&1 | sed 's/pigz //g')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.fna.gz
+ touch ${prefix}.${output_format}.gz
+ touch ${prefix}.faa.gz
+ touch ${prefix}_all.txt.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p')
+ pigz: \$(pigz -V 2>&1 | sed 's/pigz //g')
+ END_VERSIONS
+ """
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml
new file mode 100644
index 0000000..d59ff5c
--- /dev/null
+++ b/modules/nf-core/prodigal/meta.yml
@@ -0,0 +1,79 @@
+name: prodigal
+description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a
+ microbial (bacterial and archaeal) gene finding program
+keywords:
+ - prokaryotes
+ - gene finding
+ - microbial
+tools:
+ - prodigal:
+ description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm)
+ is a microbial (bacterial and archaeal) gene finding program
+ homepage: https://github.com/hyattpd/Prodigal
+ documentation: https://github.com/hyattpd/prodigal/wiki
+ tool_dev_url: https://github.com/hyattpd/Prodigal
+ doi: "10.1186/1471-2105-11-119"
+ licence: ["GPL v3"]
+ identifier: biotools:prodigal
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - genome:
+ type: file
+ description: fasta/fasta.gz file
+ - - output_format:
+ type: string
+ description: Output format ("gbk"/"gff"/"sqn"/"sco")
+output:
+ - gene_annotations:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.${output_format}.gz:
+ type: file
+ description: gene annotations in output_format given as input
+ pattern: "*.{output_format}"
+ - nucleotide_fasta:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.fna.gz:
+ type: file
+ description: nucleotide sequences file
+ pattern: "*.{fna}"
+ - amino_acid_fasta:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.faa.gz:
+ type: file
+ description: protein translations file
+ pattern: "*.{faa}"
+ - all_gene_annotations:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}_all.txt.gz:
+ type: file
+ description: complete starts file
+ pattern: "*.{_all.txt}"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@grst"
+maintainers:
+ - "@grst"
\ No newline at end of file
diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml
new file mode 100644
index 0000000..569eb73
--- /dev/null
+++ b/modules/nf-core/spades/environment.yml
@@ -0,0 +1,5 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::spades=4.0.0
\ No newline at end of file
diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf
new file mode 100644
index 0000000..46f11c2
--- /dev/null
+++ b/modules/nf-core/spades/main.nf
@@ -0,0 +1,102 @@
+process SPADES {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/spades:4.0.0--h5fb382e_1' :
+ 'biocontainers/spades:4.0.0--h5fb382e_1' }"
+
+ input:
+ tuple val(meta), path(illumina), path(pacbio), path(nanopore)
+ path yml
+ path hmm
+
+ output:
+ tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds
+ tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs
+ tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts
+ tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters
+ tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa
+ tuple val(meta), path('*.warnings.log') , optional:true, emit: warnings
+ tuple val(meta), path('*.spades.log') , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def maxmem = task.memory.toGiga()
+ def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
+ def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
+ def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
+ def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
+ def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads"
+ """
+ spades.py \\
+ $args \\
+ --threads $task.cpus \\
+ --memory $maxmem \\
+ $custom_hmms \\
+ $reads \\
+ -o ./
+ mv spades.log ${prefix}.spades.log
+
+ if [ -f scaffolds.fasta ]; then
+ mv scaffolds.fasta ${prefix}.scaffolds.fa
+ gzip -n ${prefix}.scaffolds.fa
+ fi
+ if [ -f contigs.fasta ]; then
+ mv contigs.fasta ${prefix}.contigs.fa
+ gzip -n ${prefix}.contigs.fa
+ fi
+ if [ -f transcripts.fasta ]; then
+ mv transcripts.fasta ${prefix}.transcripts.fa
+ gzip -n ${prefix}.transcripts.fa
+ fi
+ if [ -f assembly_graph_with_scaffolds.gfa ]; then
+ mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa
+ gzip -n ${prefix}.assembly.gfa
+ fi
+
+ if [ -f gene_clusters.fasta ]; then
+ mv gene_clusters.fasta ${prefix}.gene_clusters.fa
+ gzip -n ${prefix}.gene_clusters.fa
+ fi
+
+ if [ -f warnings.log ]; then
+ mv warnings.log ${prefix}.warnings.log
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def maxmem = task.memory.toGiga()
+ def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : ""
+ def pacbio_reads = pacbio ? "--pacbio $pacbio" : ""
+ def nanopore_reads = nanopore ? "--nanopore $nanopore" : ""
+ def custom_hmms = hmm ? "--custom-hmms $hmm" : ""
+ def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads"
+ """
+ echo "" | gzip > ${prefix}.scaffolds.fa.gz
+ echo "" | gzip > ${prefix}.contigs.fa.gz
+ echo "" | gzip > ${prefix}.transcripts.fa.gz
+ echo "" | gzip > ${prefix}.gene_clusters.fa.gz
+ echo "" | gzip > ${prefix}.assembly.gfa.gz
+ touch ${prefix}.spades.log
+ touch ${prefix}.warnings.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p')
+ END_VERSIONS
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml
new file mode 100644
index 0000000..65d260d
--- /dev/null
+++ b/modules/nf-core/spades/meta.yml
@@ -0,0 +1,151 @@
+name: spades
+description: Assembles a small genome (bacterial, fungal, viral)
+keywords:
+ - genome
+ - assembly
+ - genome assembler
+ - small genome
+ - de novo assembler
+tools:
+ - spades:
+ description: SPAdes (St. Petersburg genome assembler) is intended for both standard
+ isolates and single-cell MDA bacteria assemblies.
+ homepage: http://cab.spbu.ru/files/release3.15.0/manual.html
+ documentation: http://cab.spbu.ru/files/release3.15.0/manual.html
+ tool_dev_url: https://github.com/ablab/spades
+ doi: 10.1089/cmb.2012.0021
+ licence: ["GPL v2"]
+ identifier: ""
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - illumina:
+ type: file
+ description: |
+ List of input FastQ (Illumina or PacBio CCS reads) files
+ of size 1 and 2 for single-end and paired-end data,
+ respectively. This input data type is required.
+ - pacbio:
+ type: file
+ description: |
+ List of input PacBio CLR FastQ files of size 1.
+ - nanopore:
+ type: file
+ description: |
+ List of input FastQ files of size 1, originating from Oxford Nanopore technology.
+ - - yml:
+ type: file
+ description: |
+ Path to yml file containing read information.
+ The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML.
+ File entries in this yml must contain only the file name and no paths.
+ pattern: "*.{yml,yaml}"
+ - - hmm:
+ type: file
+ description: File or directory with amino acid HMMs for Spades HMM-guided mode.
+output:
+ - scaffolds:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.scaffolds.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - contigs:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.contigs.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - transcripts:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.transcripts.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - gene_clusters:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - "*.gene_clusters.fa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.fa.gz"
+ - gfa:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.gfa.gz"
+ - "*.assembly.gfa.gz":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.gfa.gz"
+ - warnings:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.warnings.log":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - log:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.spades.log"
+ - "*.spades.log":
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.spades.log"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@JoseEspinosa"
+ - "@drpatelh"
+ - "@d4straub"
+maintainers:
+ - "@JoseEspinosa"
+ - "@drpatelh"
+ - "@d4straub"
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 34fa99b..8a08d30 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -63,15 +63,30 @@ params {
finaldir = '08-final'
// location for autodownload data like databases
- dbs = 'nextflow-autodownload-databases'
+ databases = 'nextflow-autodownload-databases'
// optional profile configurations, mostly necessary for HPC execution [lsf, slurm]
workdir = 'work'
singularity_cachedir = 'singularity'
publish_dir_mode = 'copy'
+
+ // MultiQC options
+ multiqc_config = null
+ multiqc_title = null
+ multiqc_logo = null
+ multiqc_methods_description = null
+
+ // Max resource options
+ // Defaults only, expecting to be overwritten
+ max_memory = '1.TB'
+ max_cpus = 32
+ max_time = '168.h' // 7 days
}
+includeConfig 'configs/base.config'
+includeConfig 'configs/modules.config'
+
profiles {
//executors
@@ -81,33 +96,28 @@ profiles {
cpus = params.max_cores
}
workDir = params.workdir
- params.databases = params.dbs
params.cloudProcess = false
includeConfig 'configs/local.config'
}
lsf {
workDir = params.workdir
- params.databases = params.dbs
executor {
name = "lsf"
queueSize = 200
}
params.cloudProcess = true
process.cache = "lenient"
- includeConfig 'configs/node.config'
}
slurm {
workDir = params.workdir
- params.databases = params.dbs
executor {
name = "slurm"
queueSize = 200
}
params.cloudProcess = true
process.cache = "lenient"
- includeConfig 'configs/node.config'
}
@@ -137,7 +147,6 @@ profiles {
cpus = params.max_cores
}
workDir = params.workdir
- params.databases = params.dbs
params.cloudProcess = false
includeConfig 'configs/local.config'
docker { enabled = true }
@@ -152,7 +161,6 @@ profiles {
params.cloudProcess = true
process.cache = "lenient"
- includeConfig 'configs/node.config'
singularity {
enabled = true
@@ -177,13 +185,38 @@ profiles {
}
params.cloudProcess = true
- includeConfig 'configs/node.config'
docker { enabled = true }
+ }
+}
- // we need a docker also for basic functionalities in the cloud
- process {
- withLabel: noDocker { cpus = 4; memory = '8.0 GB'; container = 'nanozoo/template:3.8--ccd0653' }
+def check_max(obj, type) {
+ if (type == 'memory') {
+ try {
+ if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
+ return params.max_memory as nextflow.util.MemoryUnit
+ else
+ return obj
+ } catch (all) {
+ println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
+ return obj
+ }
+ } else if (type == 'time') {
+ try {
+ if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
+ return params.max_time as nextflow.util.Duration
+ else
+ return obj
+ } catch (all) {
+ println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
+ return obj
+ }
+ } else if (type == 'cpus') {
+ try {
+ return Math.min( obj, params.max_cpus as int )
+ } catch (all) {
+ println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
+ return obj
}
}
-}
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 47a5461..3e400e2 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -148,19 +148,19 @@
"description": "Input parameters",
"properties": {
"virome": {
- "type": "string",
+ "type": "boolean",
"description": "VirSorter parameter, set when running a data set mostly composed of viruses"
},
"chromomap": {
- "type": "string",
+ "type": "boolean",
"description": "feature to activate chromomap plot"
},
"balloon": {
- "type": "string",
+ "type": "boolean",
"description": "feature to activate balloon plot"
},
"onlyannotate": {
- "type": "string",
+ "type": "boolean",
"description": "Only annotate the input FASTA (no virus prediction, only contig length filtering)"
},
"mashmap_len": {
@@ -169,7 +169,7 @@
"description": "Mashmap mapping segment length, shorter sequences will be ignored"
},
"mashmap": {
- "type": "string",
+ "type": "boolean",
"description": "Map the viral contigs against the provided reference"
},
"evalue": {
@@ -189,7 +189,7 @@
},
"factor": {
"type": "string",
- "default": "/Users/kates/Desktop/EBI/MGnify/pipelines/emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv",
+ "default": "emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv",
"description": "Path to file with viral assemblies metadata, including taxon-specific factors"
},
"sankey": {
@@ -292,7 +292,7 @@
"fa_icon": "fas fa-dna",
"description": "Nextflow arguments",
"properties": {
- "dbs": {
+ "databases": {
"type": "string",
"default": "nextflow-autodownload-databases",
"description": "directory path to databases"
@@ -307,13 +307,8 @@
"default": "singularity",
"description": "singularity folder"
},
- "databases": {
- "type": "string",
- "default": "nextflow-autodownload-databases",
- "description": "directory path to databases"
- },
"cloudProcess": {
- "type": "string",
+ "type": "boolean",
"description": "run on cloud"
}
}
diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf
index 57810b2..6c672ce 100644
--- a/subworkflows/local/annotate.nf
+++ b/subworkflows/local/annotate.nf
@@ -69,10 +69,10 @@ workflow ANNOTATE {
ANNOTATION( RATIO_EVALUE.out )
// plot visuals --> PDFs
- PLOT_CONTIG_MAP( ANNOTATION.out )
+ PLOT_CONTIG_MAP( ANNOTATION.out.annotations )
// assign lineages
- ASSIGN( ANNOTATION.out, ncbi_db, factor_file )
+ ASSIGN( ANNOTATION.out.annotations, ncbi_db, factor_file )
// blast IMG/VR for more information
if (params.blastextend) {
@@ -93,25 +93,22 @@ workflow ANNOTATE {
}
CHECKV(
- predicted_contigs.combine( contigs.map { name, fasta -> fasta }),
- checkv_db
+ predicted_contigs,
+ checkv_db.first()
)
-
- viphos_annotations = ANNOTATION.out.map { _, __, annotations -> annotations }.collect()
- taxonomy_annotations = ASSIGN.out.map { _, __, taxonomy -> taxonomy }.collect()
- checkv_results = CHECKV.out.map { _, __, quality_summary, ___ -> quality_summary }.collect()
+
+ viphos_annotations = ANNOTATION.out.annotations.map{meta, type, annotation -> [meta, annotation]}.groupTuple()
+ taxonomy_annotations = ASSIGN.out.map{meta, type, annotation -> [meta, annotation]}.groupTuple()
+ checkv_results = CHECKV.out.map{meta, type, quality -> [meta, quality]}.groupTuple()
WRITE_GFF(
- contigs.first(),
- viphos_annotations,
- taxonomy_annotations,
- checkv_results
+ contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results)
)
-
- predicted_contigs_filtered = predicted_contigs.map { id, set_name, fasta -> [set_name, id, fasta] }
- plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { id, set_name, dir, table -> [set_name, table] }
+
+ predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] }
+ plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] }
chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]}
-
+
emit:
assign_output = ASSIGN.out
chromomap = chromomap_ch
diff --git a/subworkflows/local/assemble_illumina.nf b/subworkflows/local/assemble_illumina.nf
index 7fe36be..d45aa6c 100644
--- a/subworkflows/local/assemble_illumina.nf
+++ b/subworkflows/local/assemble_illumina.nf
@@ -2,24 +2,39 @@
Optional assembly step, not fully implemented and tested.
*/
-include { FASTP } from '../../modules/local/fastp'
-include { FASTQC } from '../../modules/local/fastqc'
-include { MULTIQC } from '../../modules/local/multiqc'
-include { SPADES } from '../../modules/local/spades'
+include { FASTP } from '../../modules/nf-core/fastp'
+include { FASTQC as FASTQC_BEFORE } from '../../modules/nf-core/fastqc'
+include { FASTQC as FASTQC_AFTER } from '../../modules/nf-core/fastqc'
+include { SPADES } from '../../modules/nf-core/spades'
workflow ASSEMBLE_ILLUMINA {
take: reads
main:
+ // QC before filtering
+ FASTQC_BEFORE(reads)
+
// trimming
- FASTP(reads)
+ FASTP(
+ reads,
+ [],
+ false,
+ false,
+ false
+ )
+
+ // QC after filtering
+ FASTQC_AFTER(FASTP.out.reads)
- // read QC
- MULTIQC(FASTQC(FSATP.out))
-
// assembly
- SPADES(FASTP.out)
+ SPADES(FASTP.out.reads)
+
+ ch_multiqc_files = Channel.empty()
+ ch_multiqc_files = ch_multiqc_files.mix( FASTQC_BEFORE.out.zip.collect{it[1]}.ifEmpty([]) )
+ ch_multiqc_files = ch_multiqc_files.mix( FASTP.out.json.collect{it[1]}.ifEmpty([]) )
+ ch_multiqc_files = ch_multiqc_files.mix( FASTQC_AFTER.out.zip.collect{it[1]}.ifEmpty([]) )
emit:
- assembly = SPADES.out
+ assembly = SPADES.out.contigs
+ ch_multiqc_files = ch_multiqc_files
}
\ No newline at end of file
diff --git a/subworkflows/local/detect.nf b/subworkflows/local/detect.nf
index bc4a37e..ea9fd64 100644
--- a/subworkflows/local/detect.nf
+++ b/subworkflows/local/detect.nf
@@ -18,14 +18,12 @@ workflow DETECT {
main:
- renamed_ch = assembly_renamed_length_filtered.map {name, renamed_fasta, map, _, __ -> {
- tuple(name, renamed_fasta, map)
- }
+ renamed_ch = assembly_renamed_length_filtered.map {
+ meta, renamed_fasta, map, _, __ -> tuple(meta, renamed_fasta, map)
}
- length_filtered_ch = assembly_renamed_length_filtered.map { name, _, __, filtered_fasta, contig_number -> {
- tuple(name, filtered_fasta, contig_number)
- }
+ length_filtered_ch = assembly_renamed_length_filtered.map {
+ meta, _, __, filtered_fasta, contig_number -> tuple(meta, filtered_fasta, contig_number)
}
// virus detection --> VirSorter, VirFinder and PPR-Meta
@@ -37,5 +35,5 @@ workflow DETECT {
PARSE( length_filtered_ch.join( VIRFINDER.out ).join( VIRSORTER.out ).join( PPRMETA.out ) )
emit:
- detect_output = PARSE.out.join(renamed_ch).transpose().map{ name, fasta, vs_meta, log, renamed_fasta, map -> tuple (name, fasta, map) }
+ detect_output = PARSE.out.join(renamed_ch).transpose().map{ meta, fasta, vs_meta, log, renamed_fasta, map -> tuple (meta, fasta, map) }
}
\ No newline at end of file
diff --git a/subworkflows/local/preprocess.nf b/subworkflows/local/preprocess.nf
index 86cb231..efff437 100644
--- a/subworkflows/local/preprocess.nf
+++ b/subworkflows/local/preprocess.nf
@@ -12,12 +12,12 @@ workflow PREPROCESS {
main:
- RENAME(assembly)
+ RENAME(assembly) // out: (meta, renamed.fasta, map)
// filter contigs by length
- LENGTH_FILTERING(RENAME.out)
+ LENGTH_FILTERING(RENAME.out) // out: (meta, filt_fasta, env)
emit:
- // tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv"), file("${name}*filt*.fasta"), env(CONTIGS)
+ // tuple val(meta), file("${meta.id}_renamed.fasta"), file("${meta.id}_map.tsv"), file("${meta.id}*filt*.fasta"), env(CONTIGS)
preprocessed_data = RENAME.out.join(LENGTH_FILTERING.out, by: 0)
}
\ No newline at end of file
diff --git a/workflows/virify.nf b/workflows/virify.nf
index a892dc0..3344c2b 100755
--- a/workflows/virify.nf
+++ b/workflows/virify.nf
@@ -4,19 +4,27 @@
* INPUT CHANNELS
**************************/
-input_ch = Channel.empty()
-mashmap_ref_ch = Channel.empty()
-factor_file = Channel.empty()
+input_ch = Channel.empty()
+mashmap_ref_ch = Channel.empty()
+factor_file = Channel.empty()
+ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
+ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.fromPath("$projectDir/assets/mgnify_logo.png")
+ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
+
include { samplesheetToList } from 'plugin/nf-schema'
if ( params.samplesheet ) {
groupReads = { id, assembly, fq1, fq2 ->
if (fq1 == []) {
- return tuple(id, assembly)
+ return tuple(["id": id],
+ assembly
+ )
} else {
if (params.assemble) {
- return tuple(id, [fq1, fq2])
+ return tuple(["id": id],
+ [fq1, fq2])
}
else {
exit 1, "input missing, use [--assemble] flag with raw reads"
@@ -26,10 +34,11 @@ if ( params.samplesheet ) {
samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
input_ch = samplesheet.map(groupReads)
}
+
// one sample of assembly
if (params.fasta) {
input_ch = Channel.fromPath( params.fasta, checkIfExists: true)
- .map { file -> tuple(file.simpleName, file) }
+ .map { file -> tuple(["id": file.simpleName], file) }
}
// mashmap input
@@ -41,6 +50,11 @@ if (params.mashmap) {
if (params.factor) {
factor_file = file( params.factor, checkIfExists: true)
}
+/**************************
+* SUB WORKFLOWS
+**************************/
+
+include { MULTIQC } from '../modules/nf-core/multiqc'
/**************************
* SUB WORKFLOWS
@@ -83,6 +97,7 @@ workflow VIRIFY {
}
// ----------- rename fasta + length filtering
+ // out: (meta, renamed_fasta, map, filtered_fasta, env)
PREPROCESS( assembly_ch )
// ----------- if --onlyannotate - skip DETECT step
@@ -98,11 +113,12 @@ workflow VIRIFY {
DOWNLOAD_DATABASES.out.virfinder_db,
DOWNLOAD_DATABASES.out.pprmeta_git
)
- postprocess_input_ch = DETECT.out
+ // (meta, fasta, map)
+ postprocess_input_ch = DETECT.out.detect_output
}
// ----------- POSTPROCESS: restore fasta file
- POSTPROCESS(postprocess_input_ch)
+ POSTPROCESS(postprocess_input_ch) // out: (meta, type(HC/LC/PP), fasta)
// ----------- ANNOTATE
ANNOTATE(
@@ -126,5 +142,15 @@ workflow VIRIFY {
ANNOTATE.out.assign_output,
ANNOTATE.out.chromomap
)
+
+ if (params.assemble) {
+ ch_multiqc_files = ASSEMBLE_ILLUMINA.out.ch_multiqc_files
+ MULTIQC(
+ ch_multiqc_files.collect(),
+ ch_multiqc_config.toList(),
+ ch_multiqc_custom_config.toList(),
+ ch_multiqc_logo.toList()
+ )
+ }
}