get up to rooted tree running

h3abionet · Apr 1, 2024 · 09d3bef · 09d3bef
1 parent c475678
commit 09d3bef
Show file tree

Hide file tree

Showing 17 changed files with 276 additions and 727 deletions.
diff --git a/assets/dummy_file b/assets/dummy_file
diff --git a/modules/local/assigntaxaspecies.nf b/modules/local/assigntaxaspecies.nf
@@ -1,89 +1,101 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
 process ASSIGNTAXASPECIES {
-    tag '$bam'
-    label 'process_me'
+    label 'process_medium'
 
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-        'biocontainers/YOUR-TOOL-HERE' }"
+    container "ghcr.io/h3abionet/tada:dev"
 
     input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    path bam
-
+    path(st)
+    path(ref)
+    path(sp)
+
     output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    path "*.bam", emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
-
+    path("tax_final.RDS"), emit: taxtab
+    path("bootstrap_final.RDS"), emit: bootstraps
+
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
-
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    def runSpecies = sp.name != "dummy_file" ? "TRUE" : "FALSE"
     """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        $bam
+    #!/usr/bin/env Rscript
+    suppressPackageStartupMessages(library(dada2))
+
+    seqs <- readRDS("${st}")
+    seqtab <- seqs\$seq
+
+    # Assign taxonomy
+    tax <- NULL
+    boots <- NULL
 
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        assigntaxaspecies: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
+    if ( ${params.tax_batch} == 0 | length(seqtab) < ${params.tax_batch} ) { # no batch, run normally
+        cat("Running all samples\\n")
+        tax <- assignTaxonomy(seqtab, "${ref}",
+                        multithread=${task.cpus},
+                        tryRC = TRUE,
+                        outputBootstraps = TRUE,
+                        minBoot = ${params.min_boot},
+                        verbose = TRUE)
+        boots <- tax\$boot
+        if (${runSpecies}) {
+            tax <- addSpecies(tax, "${sp}",
+                 tryRC = TRUE,
+                 verbose = TRUE)
+        } else {
+            tax <- tax\$tax
+        }
+    } else {
+        # see https://github.com/benjjneb/dada2/issues/1429 for this
+        to_split <- seq(1, length(seqtab), by = ${params.tax_batch})
+        to_split2 <- c(to_split[2:length(to_split)]-1, length(seqtab))
+
+        for(i in 1:length(to_split)){
+            cat(paste("Running all samples from",to_split[i], "to", to_split2[i], "\\n"))
+            seqtab2 <- seqtab[to_split[i]:to_split2[i]]
+            tax2 <- assignTaxonomy(seqtab2, "${ref}",
+                    multithread=${task.cpus},
+                    tryRC = TRUE,
+                    outputBootstraps = TRUE,
+                    minBoot = ${params.min_boot},
+                    verbose = TRUE)
+
+            if (is.null(boots)) {
+                boots <- tax2\$boot
+            } else {
+                boots <- rbind(boots, tax2\$boot)
+            }
+
+            if (${runSpecies}) {
+                tax2 <- addSpecies(tax2\$tax, 
+                    refFasta = "${sp}", 
+                    tryRC = TRUE,
+                    verbose = TRUE)
+            } else {
+                tax2 <- tax2\$tax
+            }
+            if (is.null(tax)) {
+                tax <- tax2
+            } else {
+                tax <- rbind(tax, tax2)
+            }
+        }
+    }
+
+    # make sure these are the same order
+    # they should be, but we don't assume this
+    rownames(tax) <- seqs[rownames(tax),]\$id
+    rownames(boots) <- seqs[rownames(boots),]\$id
+
+    # Write original data
+    saveRDS(tax, "tax_final.RDS")
+    saveRDS(boots, "bootstrap_final.RDS")
     """
 
     stub:
     def args = task.ext.args ?: ''
 
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
     """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        assigntaxaspecies: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
+    
     """
 }
diff --git a/modules/local/dadainfer.nf b/modules/local/dadainfer.nf
@@ -8,8 +8,6 @@ process DADAINFER {
     tuple val(readmode), file(err), file(reads)
 
     output:
-    // TODO nf-core: List additional required output channels/values here
-    // path "versions.yml"           , emit: versions
     path("all.dd.${readmode}.RDS"), emit: inferred
 
     when:
@@ -52,11 +50,6 @@ process DADAINFER {
 
     stub:
     def args = task.ext.args ?: ''
-
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
     """
     # add some real stuff here
     touch all.dd.${readmode}.RDS

diff --git a/modules/local/decipher.nf b/modules/local/decipher.nf
@@ -1,89 +1,35 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
 process DECIPHER {
-    tag '$bam'
-    label 'process_me'
+    label 'process_medium'
 
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-        'biocontainers/YOUR-TOOL-HERE' }"
+    container "ghcr.io/h3abionet/tada:dev"
 
     input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    path bam
+    path(seqs)
 
     output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    path "*.bam", emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
+    path("aligned_seqs.fna"), optional: true, emit: alignment
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
 
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
     """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        $bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        decipher: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
+    #!/usr/bin/env Rscript
+    suppressPackageStartupMessages(library(dada2))
+    suppressPackageStartupMessages(library(DECIPHER))
+
+    seqs <- readDNAStringSet("${seqs}")
+    alignment <- AlignSeqs(seqs,
+               anchor=NA,
+               processors = ${task.cpus})
+    writeXStringSet(alignment, "aligned_seqs.fna")
     """
 
     stub:
     def args = task.ext.args ?: ''
 
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
     """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        decipher: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
     """
 }
diff --git a/modules/local/fasttree.nf b/modules/local/fasttree.nf
@@ -1,89 +1,30 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
 process FASTTREE {
-    tag '$bam'
-    label 'process_m'
+    label 'process_medium'
 
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-        'biocontainers/YOUR-TOOL-HERE' }"
+    container "quay.io/biocontainers/fasttree:2.1.10--h14c3975_3"
 
     input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    path bam
+    path(aln)
 
     output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    path "*.bam", emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
+    path("fasttree.newick"), emit: treeGTR
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
-
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
-    """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        $bam
 
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fasttree: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
+    """
+    OMP_NUM_THREADS=${task.cpus} FastTree -nt \\
+        -gtr -gamma -spr 4 -mlacc 2 -slownni \\
+        -out fasttree.newick \\
+        ${aln}
     """
 
     stub:
     def args = task.ext.args ?: ''
 
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
     """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fasttree: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
     """
 }