diff --git a/assets/dummy_file b/assets/dummy_file new file mode 100644 index 0000000..e69de29 diff --git a/modules/local/assigntaxaspecies.nf b/modules/local/assigntaxaspecies.nf index ae29078..c932d7a 100644 --- a/modules/local/assigntaxaspecies.nf +++ b/modules/local/assigntaxaspecies.nf @@ -1,89 +1,101 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process ASSIGNTAXASPECIES { - tag '$bam' - label 'process_me' + label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam - + path(st) + path(ref) + path(sp) + output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions - + path("tax_final.RDS"), emit: taxtab + path("bootstrap_final.RDS"), emit: bootstraps + when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def runSpecies = sp.name != "dummy_file" ? "TRUE" : "FALSE" """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(dada2)) + + seqs <- readRDS("${st}") + seqtab <- seqs\$seq + + # Assign taxonomy + tax <- NULL + boots <- NULL - cat <<-END_VERSIONS > versions.yml - "${task.process}": - assigntaxaspecies: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + if ( ${params.tax_batch} == 0 | length(seqtab) < ${params.tax_batch} ) { # no batch, run normally + cat("Running all samples\\n") + tax <- assignTaxonomy(seqtab, "${ref}", + multithread=${task.cpus}, + tryRC = TRUE, + outputBootstraps = TRUE, + minBoot = ${params.min_boot}, + verbose = TRUE) + boots <- tax\$boot + if (${runSpecies}) { + tax <- addSpecies(tax, "${sp}", + tryRC = TRUE, + verbose = TRUE) + } else { + tax <- tax\$tax + } + } else { + # see https://github.com/benjjneb/dada2/issues/1429 for this + to_split <- seq(1, length(seqtab), by = ${params.tax_batch}) + to_split2 <- c(to_split[2:length(to_split)]-1, length(seqtab)) + + for(i in 1:length(to_split)){ + cat(paste("Running all samples from",to_split[i], "to", to_split2[i], "\\n")) + seqtab2 <- seqtab[to_split[i]:to_split2[i]] + tax2 <- assignTaxonomy(seqtab2, "${ref}", + multithread=${task.cpus}, + tryRC = TRUE, + outputBootstraps = TRUE, + minBoot = ${params.min_boot}, + verbose = TRUE) + + if (is.null(boots)) { + boots <- tax2\$boot + } else { + boots <- rbind(boots, tax2\$boot) + } + + if (${runSpecies}) { + tax2 <- addSpecies(tax2\$tax, + refFasta = "${sp}", + tryRC = TRUE, + verbose = TRUE) + } else { + tax2 <- tax2\$tax + } + if (is.null(tax)) { + tax <- tax2 + } else { + tax <- rbind(tax, tax2) + } + } + } + + # make sure these are the same order + # they should be, but we don't assume this + rownames(tax) <- seqs[rownames(tax),]\$id + rownames(boots) <- seqs[rownames(boots),]\$id + + # Write original data + saveRDS(tax, "tax_final.RDS") + saveRDS(boots, "bootstrap_final.RDS") """ stub: def args = task.ext.args ?: '' - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - assigntaxaspecies: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + """ } diff --git a/modules/local/dadainfer.nf b/modules/local/dadainfer.nf index 40f2f51..79ae2c9 100644 --- a/modules/local/dadainfer.nf +++ b/modules/local/dadainfer.nf @@ -8,8 +8,6 @@ process DADAINFER { tuple val(readmode), file(err), file(reads) output: - // TODO nf-core: List additional required output channels/values here - // path "versions.yml" , emit: versions path("all.dd.${readmode}.RDS"), emit: inferred when: @@ -52,11 +50,6 @@ process DADAINFER { stub: def args = task.ext.args ?: '' - - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ # add some real stuff here touch all.dd.${readmode}.RDS diff --git a/modules/local/decipher.nf b/modules/local/decipher.nf index 7dafc2c..d150814 100644 --- a/modules/local/decipher.nf +++ b/modules/local/decipher.nf @@ -1,47 +1,13 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process DECIPHER { - tag '$bam' - label 'process_me' + label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam + path(seqs) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + path("aligned_seqs.fna"), optional: true, emit: alignment when: task.ext.when == null || task.ext.when @@ -49,41 +15,21 @@ process DECIPHER { script: def args = task.ext.args ?: '' - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - decipher: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(dada2)) + suppressPackageStartupMessages(library(DECIPHER)) + + seqs <- readDNAStringSet("${seqs}") + alignment <- AlignSeqs(seqs, + anchor=NA, + processors = ${task.cpus}) + writeXStringSet(alignment, "aligned_seqs.fna") """ stub: def args = task.ext.args ?: '' - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - decipher: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ } diff --git a/modules/local/fasttree.nf b/modules/local/fasttree.nf index 9297be2..1c035cc 100644 --- a/modules/local/fasttree.nf +++ b/modules/local/fasttree.nf @@ -1,89 +1,30 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process FASTTREE { - tag '$bam' - label 'process_m' + label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "quay.io/biocontainers/fasttree:2.1.10--h14c3975_3" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam + path(aln) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + path("fasttree.newick"), emit: treeGTR when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fasttree: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + """ + OMP_NUM_THREADS=${task.cpus} FastTree -nt \\ + -gtr -gamma -spr 4 -mlacc 2 -slownni \\ + -out fasttree.newick \\ + ${aln} """ stub: def args = task.ext.args ?: '' - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fasttree: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ } diff --git a/modules/local/filterandtrim.nf b/modules/local/filterandtrim.nf index d5ce8e6..c626a78 100644 --- a/modules/local/filterandtrim.nf +++ b/modules/local/filterandtrim.nf @@ -1,67 +1,24 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process FILTERANDTRIM { tag "$meta.id" label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - // TODO: pin to a versioned docker instance container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(reads) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - // tuple val(meta), path("*.fastq.gz"), emit: trimmed_reads - // tuple val(meta), path("*.txt"), emit: trimmed_report tuple val(meta), path("${meta.id}.R1.filtered.fastq.gz"), optional: true, emit: trimmed_R1 tuple val(meta), path("${meta.id}.R2.filtered.fastq.gz"), optional: true, emit: trimmed_R2 tuple val(meta), path("${meta.id}.R[12].filtered.fastq.gz"), optional: true, emit: trimmed path("*.trimmed.txt"), emit: trimmed_report - // TODO nf-core: List additional required output channels/values here - // path "versions.yml" , emit: versions - when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ #!/usr/bin/env Rscript suppressPackageStartupMessages(library(dada2)) @@ -91,10 +48,6 @@ process FILTERANDTRIM { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ touch ${prefix}.R1.filtered.fastq.gz touch ${prefix}.R2.filtered.fastq.gz diff --git a/modules/local/learnerrors.nf b/modules/local/learnerrors.nf index caaf4fe..45089e9 100644 --- a/modules/local/learnerrors.nf +++ b/modules/local/learnerrors.nf @@ -8,8 +8,6 @@ process LEARNERRORS { tuple val(readmode), file(reads) output: - // TODO nf-core: List additional required output channels/values here - // path "versions.yml" , emit: versions tuple val(readmode), file("errors.${readmode}.RDS"), emit: error_models // path("errors.R[12].RDS"), emit: errorModelsPerSample path("${readmode}.err.pdf"), emit: pdf @@ -65,10 +63,6 @@ process LEARNERRORS { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ # TODO: make a proper stub """ diff --git a/modules/local/mergetrimtables.nf b/modules/local/mergetrimtables.nf index 4d255dd..0f34f92 100644 --- a/modules/local/mergetrimtables.nf +++ b/modules/local/mergetrimtables.nf @@ -8,10 +8,6 @@ process MERGETRIMTABLES { output: path("all.trimmed.csv"), emit: trimmed_report - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - // tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - // path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -35,10 +31,6 @@ process MERGETRIMTABLES { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ touch "all.trimmed.csv" """ diff --git a/modules/local/phangorn.nf b/modules/local/phangorn.nf index c8a6ee4..d4ca35d 100644 --- a/modules/local/phangorn.nf +++ b/modules/local/phangorn.nf @@ -1,47 +1,15 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process PHANGORN { - tag '$bam' - label 'process_me' + label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam + path(aln) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + path("unrooted.phangorn.RDS"), emit: treeRDS + path("unrooted.phangorn.newick"), emit: tree + path("unrooted.phangorn.GTR.newick"), emit: treeGTR when: task.ext.when == null || task.ext.when @@ -49,41 +17,30 @@ process PHANGORN { script: def args = task.ext.args ?: '' - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - phangorn: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(phangorn)) + + phang.align <- read.phyDat("${aln}", + format = "fasta", + type = "DNA") + + dm <- dist.ml(phang.align) + treeNJ <- NJ(dm) # Note, tip order != sequence order + fit = pml(treeNJ, data=phang.align) + write.tree(fit\$tree, file = "unrooted.phangorn.newick") + + ## negative edges length changed to 0! + fitGTR <- update(fit, k=4, inv=0.2) + fitGTR <- optim.pml(fitGTR, model="GTR", optInv=TRUE, optGamma=TRUE, + rearrangement = "stochastic", control = pml.control(trace = 0)) + saveRDS(fitGTR, "unrooted.phangorn.RDS") + write.tree(fitGTR\$tree, file = "unrooted.phangorn.GTR.newick") """ stub: def args = task.ext.args ?: '' - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - phangorn: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ } diff --git a/modules/local/plotqualityprofile.nf b/modules/local/plotqualityprofile.nf index c0e8f10..111928a 100644 --- a/modules/local/plotqualityprofile.nf +++ b/modules/local/plotqualityprofile.nf @@ -1,20 +1,3 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process PLOTQUALITYPROFILE { tag "$meta.id" label 'process_low' @@ -23,12 +6,6 @@ process PLOTQUALITYPROFILE { container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(reads) output: @@ -45,15 +22,6 @@ process PLOTQUALITYPROFILE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ #!/usr/bin/env Rscript suppressPackageStartupMessages(library(dada2)) @@ -71,10 +39,6 @@ process PLOTQUALITYPROFILE { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ touch ${prefix}.PDF """ diff --git a/modules/local/pooledseqtable.nf b/modules/local/pooledseqtable.nf index 6185e22..d8ec902 100644 --- a/modules/local/pooledseqtable.nf +++ b/modules/local/pooledseqtable.nf @@ -8,11 +8,9 @@ process POOLEDSEQTABLE { path(filts) output: - path("seqtab.filtered.RDS"), emit: seqtable - path("all.merged.RDS"), optional: true, emit: merged - path("seqtab.full.RDS"), emit: seqtabQC// we keep this for comparison and possible QC - // TODO nf-core: List additional required output channels/values here - // path "versions.yml" , emit: versions + path("seqtab.filtered.RDS"), emit: filtered_seqtable + path("all.merged.RDS"), optional: true, emit: merged_seqs + path("seqtab.full.RDS"), emit: full_seqtable// we keep this for comparison and possible QC when: task.ext.when == null || task.ext.when diff --git a/modules/local/removechimeras.nf b/modules/local/removechimeras.nf index a99f754..bd6b2c5 100644 --- a/modules/local/removechimeras.nf +++ b/modules/local/removechimeras.nf @@ -1,91 +1,41 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process REMOVECHIMERAS { - tag "$meta.id" - label 'process_me' + label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) + path(st) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + path("seqtab.nonchim.RDS"), emit: nonchim_seqtable when: task.ext.when == null || task.ext.when script: + chimOpts = params.removeBimeraDenovo_options ? ", ${params.removeBimeraDenovo_options}" : "" def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def prefix = task.ext.prefix """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - removechimeras: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(dada2)) + st.all <- readRDS("${st}") + + # Remove chimeras + seqtab <- removeBimeraDenovo( + st.all, + method="consensus", + multithread=${task.cpus}, + verbose=TRUE ${chimOpts} + ) + + saveRDS(seqtab, "seqtab.nonchim.RDS") """ stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + def prefix = task.ext.prefix """ - touch ${prefix}.bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - removechimeras: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ } diff --git a/modules/local/renameasvs.nf b/modules/local/renameasvs.nf index 0ce3890..16574b6 100644 --- a/modules/local/renameasvs.nf +++ b/modules/local/renameasvs.nf @@ -1,47 +1,16 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process RENAMEASVS { - tag '$bam' label 'process_low' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam + path(st) + path(rawst) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + path("seqtab_final.${params.id_type}.RDS"), emit: seqtable_renamed + path("asvs.${params.id_type}.nochim.fna"), emit: nonchimeric_asvs + path("readmap.RDS"), emit: readmap when: task.ext.when == null || task.ext.when @@ -49,41 +18,46 @@ process RENAMEASVS { script: def args = task.ext.args ?: '' - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - renameasvs: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(dada2)) + suppressPackageStartupMessages(library(ShortRead)) + suppressPackageStartupMessages(library(digest)) + + # read RDS w/ data + st <- readRDS("${st}") + st.raw <- readRDS("${rawst}") + + # get sequences + seqs <- colnames(st) + seqs.raw <- colnames(st.raw) + + # get IDs based on idType + ids_study <- switch("${params.id_type}", simple=paste("ASV", 1:ncol(st), sep = ""), + md5=sapply(colnames(st), digest, algo="md5")) + ids_study.raw <- switch("${params.id_type}", simple=paste("ASV", 1:ncol(st.raw), sep = ""), + md5=sapply(colnames(st.raw), digest, algo="md5")) + + # sub IDs + colnames(st) <- unname(ids_study) + colnames(st.raw) <- unname(ids_study.raw) + + # generate FASTA + seqs.dna <- ShortRead(sread = DNAStringSet(seqs), id = BStringSet(ids_study)) + # Write out fasta file. + writeFasta(seqs.dna, file = 'asvs.${params.id_type}.nochim.fna') + + seqs.dna.raw <- ShortRead(sread = DNAStringSet(seqs.raw), id = BStringSet(ids_study.raw)) + writeFasta(seqs.dna.raw, file = 'asvs.${params.id_type}.raw.fna') + + # Write modified data (note we only keep the no-chimera reads for the next stage) + saveRDS(st, "seqtab_final.${params.id_type}.RDS") + saveRDS(data.frame(id = ids_study, seq = seqs), "readmap.RDS") """ stub: def args = task.ext.args ?: '' - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - renameasvs: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ } diff --git a/modules/local/roottree.nf b/modules/local/roottree.nf index ea20d67..284dda1 100644 --- a/modules/local/roottree.nf +++ b/modules/local/roottree.nf @@ -1,47 +1,15 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process ROOTTREE { - tag '$bam' - label 'process_low' + label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + container "ghcr.io/h3abionet/tada:dev" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam + path(tree) + val(tree_tool) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + path("rooted.${tree_tool}.newick"), emit: rooted_tree + path("rooted.${tree_tool}.RDS"), emit: rooted_tree_RDS when: task.ext.when == null || task.ext.when @@ -49,41 +17,23 @@ process ROOTTREE { script: def args = task.ext.args ?: '' - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(phangorn)) + suppressPackageStartupMessages(library(ape)) + + tree <- read.tree(file = "${tree}") + + midtree <- midpoint(tree) + + write.tree(midtree, file = "rooted.${tree_tool}.newick") + saveRDS(midtree, "rooted.${tree_tool}.RDS") - cat <<-END_VERSIONS > versions.yml - "${task.process}": - roottree: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ stub: def args = task.ext.args ?: '' - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - roottree: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS """ } diff --git a/nextflow.config b/nextflow.config index 419f5a9..ad7232b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,8 +47,8 @@ params { pacbio_strict_match = true // setting these assumes use of cutadapt for trimming - fwd_adapter = false - rev_adapter = false + fwd_adapter = "" + rev_adapter = "" trim_for = 0 trim_rev = 0 trunc_for = 0 @@ -80,14 +80,14 @@ params { min_asv_len = 0 // Only run if set > 1 // Chimera detection chimera_detection = true - // removeBimeraDenovoOptions = false + removeBimeraDenovo_options = "" // Taxonomic assignment tax_assignment_method = 'rdp' // default: RDP classifier implementation in dada2 - reference = false - species = false + reference = "" + species = "" min_boot = 50 // default for dada2 - tax_levels = false + tax_levels = "" tax_batch = 0 // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything // alignment diff --git a/nextflow_schema.json b/nextflow_schema.json index 75c9487..7c3cee8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -437,6 +437,9 @@ }, "rescue_unmerged": { "type": "boolean" + }, + "removeBimeraDenovo_options": { + "type": "string" } } } \ No newline at end of file diff --git a/workflows/tada.nf b/workflows/tada.nf index 771a166..1dbb3e9 100644 --- a/workflows/tada.nf +++ b/workflows/tada.nf @@ -15,6 +15,15 @@ include { MERGETRIMTABLES } from '../modules/local/mergetrimtables' include { LEARNERRORS } from '../modules/local/learnerrors' include { DADAINFER } from '../modules/local/dadainfer' include { POOLEDSEQTABLE } from '../modules/local/pooledseqtable' +include { REMOVECHIMERAS } from '../modules/local/removechimeras' +include { RENAMEASVS } from '../modules/local/renameasvs' +include { ASSIGNTAXASPECIES } from '../modules/local/assigntaxaspecies' +include { DECIPHER } from '../modules/local/decipher' +include { PHANGORN } from '../modules/local/phangorn' +include { FASTTREE } from '../modules/local/fasttree' +include { ROOTTREE } from '../modules/local/roottree' + + include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -104,11 +113,10 @@ workflow TADA { ch_trimmed_reads = FILTERANDTRIM.out.trimmed ch_reports = FILTERANDTRIM.out.trimmed_report.collect() - MERGETRIMTABLES( ch_reports ) - + // Channel setup // We need to group data depending on which downstream steps are needed. There @@ -122,7 +130,6 @@ workflow TADA { // the two groups can be processed in parallel. So we set up the channels with // this in mind. No sample ID info is really needed. ch_trimmed_infer = FILTERANDTRIM.out.trimmed_R1 - // .concat(filteredReadsR2.ifEmpty([])) .map { [ 'R1', it[1]] } .concat(FILTERANDTRIM.out.trimmed_R2.map {['R2', it[1]] } ) .groupTuple(sort: true) @@ -142,6 +149,9 @@ workflow TADA { ch_infer = LEARNERRORS.out.error_models.join(ch_trimmed_infer) // this is always in pooled mode at the moment, should be adjusted + + // if (params.pool == "T" || params.pool == 'pseudo') { + DADAINFER( ch_infer ) @@ -155,150 +165,62 @@ workflow TADA { DADAINFER.out.inferred.collect(), ch_trimmed ) - // if (params.pool == "T" || params.pool == 'pseudo') { - // process DadaInfer { - // tag { "DadaInfer:${readmode}" } - // publishDir "${params.outdir}/dada2-Derep-Pooled", mode: "copy", overwrite: true - - // input: - // // DADA2 step runs on all R1 and/or on all R2 - // tuple val(readmode), file(err), file(reads) from errorModelsPooled - // .join(ReadsInfer) - - // output: - // // Note that the mode ('merged', 'R1', 'R2') can now potentially allow SE read analysis - // file("all.dd.${readmode}.RDS") into dadaMerge,dadaToReadTracking - - // when: - // params.precheck == false - - // script: - // dadaOpt = !params.dadaOpt.isEmpty() ? "'${params.dadaOpt.collect{k,v->"$k=$v"}.join(", ")}'" : 'NA' - // template "DadaPooled.R" - // } - - // // This one is a little tricky. We can't know a priori how many instances of reads (R1 and R2) - // // are present outside the process, but we can determine this internally within the process - // // when we collect all of them. - // // So, here we check the size of the collected channel containing the denoised models; if - // // there are two then this is a paired-end run, otherwise it's single-end. Logic is in the R script - - // process PooledSeqTable { - // tag { "PooledSeqTable:${readmode}" } - // publishDir "${params.outdir}/dada2-OriginalSeqTable", mode: "copy", overwrite: true - - // input: - // // we don't care about the mode here, so only get the dds (dada-inferred) RDS files - // file(dds) from dadaMerge.collect() - // // we don't care about the mode here, we only grab the reads - // file(filts) from ReadsMerge - // .map { it[1] } - // .flatten() - // .collect() - - // output: - // tuple val(readmode), file("seqtab.${readmode}.RDS") into seqTable,rawSeqTableToRename - // file "all.merged.RDS" optional true into mergerTracking,mergerQC - // file "seqtab.original.*.RDS" into seqtabQC// we keep this for comparison and possible QC - - // when: - // params.precheck == false - - // script: - // // We could switch this to 'paired' vs 'single-end' as well - // readmode = dds.size() == 2 ? 'merged' : 'R1' - // template "SeqTables.R" - // } - // } else { - - // process PerSampleInferDerepAndMerge { - // tag { "PerSampleInferDerepAndMerge:${meta.id}" } - // publishDir "${params.outdir}/dada2-Derep-Single/Per-Sample", mode: "copy", overwrite: true - - // input: - // tuple val(meta), file(reads) from readsToPerSample - // file(errs) from errorModelsPerSample.collect() - - // output: - // file("${meta.id}.{R1,merged}.RDS") into combinedReads - // tuple val(meta), file("${meta.id}.dd.R{1,2}.RDS") into perSampleDadaToMerge - // val(readmode) into modeSeqTable - - // when: - // params.precheck == false - - // script: - // dadaOpt = !params.dadaOpt.isEmpty() ? "'${params.dadaOpt.collect{k,v->"$k=$v"}.join(", ")}'" : 'NA' - // readmode = errs.size() == 2 ? 'merged' : 'R1' - // template "PerSampleDadaInfer.R" - // } - - // process MergeDadaRDS { - // tag { "mergeDadaRDS" } - // publishDir "${params.outdir}/dada2-Derep-Single", mode: "copy", overwrite: true - - // input: - // file(dds) from perSampleDadaToMerge - // .map { it[1] } - // .flatten() - // .collect() - - // output: - // file("all.dd.R{1,2}.RDS") into dadaToReadTracking - - // when: - // params.precheck == false - - // script: - // template "MergePerSampleDada.R" - // } - - // process SequenceTable { - // tag { "SequenceTable:${readmode}" } - // publishDir "${params.outdir}/dada2-Derep-Single", mode: "copy", overwrite: true - - // input: - // file(mr) from combinedReads.collect() - // val(readmode) from modeSeqTable.first() - - // output: - // tuple val(readmode), file("seqtab.${readmode}.RDS") into seqTable,rawSeqTableToRename - // file "all.merged.RDS" optional true into mergerTracking,mergerQC - // file "seqtab.original.${readmode}.RDS" into seqtabQC // we keep this for comparison and possible QC - - // when: - // params.precheck == false - - // script: - // template "PerSampleSeqTable.R" - // } - // } + REMOVECHIMERAS( + POOLEDSEQTABLE.out.filtered_seqtable + ) - // } else if (params.seqTables) { // TODO maybe we should check the channel here - // process MergeSeqTables { - // tag { "MergeSeqTables" } - // publishDir "${params.outdir}/dada2-MergedSeqTable", mode: 'copy' + RENAMEASVS( + REMOVECHIMERAS.out.nonchim_seqtable, + POOLEDSEQTABLE.out.filtered_seqtable + ) - // input: - // file(st) from dada2SeqTabs - // .map { it[1] } - // .collect() + // Subworkflows-Taxonomic assignment (optional) + taxtab = Channel.empty() + if (params.reference) { + ref_file = file(params.reference, checkIfExists: true) + species_file = params.species ? file(params.species, checkIfExists: true) : file("${projectDir}/assets/dummy_file") + + ASSIGNTAXASPECIES( + RENAMEASVS.out.readmap, + ref_file, + species_file + ) + taxtab = ASSIGNTAXASPECIES.out.taxtab + } + + // Subworkflows-Alignment + Phylogenetic Tree (optional) + DECIPHER( + RENAMEASVS.out.nonchimeric_asvs + ) - // output: - // tuple val("merged"), file("seqtab.merged.RDS") into seqTable, rawSeqTableToRename + ch_tree = Channel.empty() + ch_tool = Channel.empty() + // this seems like the sort of thing a function map + // would be useful for... + if (params.run_tree == 'phangorn') { + PHANGORN( + DECIPHER.out.alignment + ) + ch_tree = PHANGORN.out.treeGTR + } else if (params.run_tree == 'fasttree') { + FASTTREE( + DECIPHER.out.alignment + ) + ch_tree = FASTTREE.out.treeGTR + } else { + // this needs to die with an error message, or + // it needs to be caught above + } - // script: - // template "MergeSeqTables.R" - // } - // Channel.empty().into { SEChimera;RawSEChimeraToRename;trimmedReadTracking;dadaToReadTracking;mergerTracking;mergerQC } - // } + ROOTTREE( + ch_tree, + params.run_tree + ) - // Subworkflows-Taxonomic assignment (optional) + // Subworkflows-Alternative outputs - // Subworkflows-Alignment + Phylogenetic Tree - // Subworkflows-Alternative outputs // // Collate and save software versions