Skip to content

Commit

Permalink
get up to rooted tree running
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Apr 1, 2024
1 parent c475678 commit 09d3bef
Show file tree
Hide file tree
Showing 17 changed files with 276 additions and 727 deletions.
Empty file added assets/dummy_file
Empty file.
150 changes: 81 additions & 69 deletions modules/local/assigntaxaspecies.nf
Original file line number Diff line number Diff line change
@@ -1,89 +1,101 @@
// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/modules/nf-core/
// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided using the "task.ext" directive, see here:
// https://www.nextflow.io/docs/latest/process.html#ext
// where "task.ext" is a string.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
// unless there is a run-time, storage advantage in implementing in this way
// e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
// bwa mem | samtools view -B -T ref.fasta
// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
// list (`[]`) instead of a file can be used to work around this issue.

process ASSIGNTAXASPECIES {
tag '$bam'
label 'process_me'
label 'process_medium'

// TODO nf-core: List required Conda package(s).
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
'biocontainers/YOUR-TOOL-HERE' }"
container "ghcr.io/h3abionet/tada:dev"

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
// TODO nf-core: Where applicable please provide/convert compressed files as input/output
// e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
path bam

path(st)
path(ref)
path(sp)

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
path "*.bam", emit: bam
// TODO nf-core: List additional required output channels/values here
path "versions.yml" , emit: versions

path("tax_final.RDS"), emit: taxtab
path("bootstrap_final.RDS"), emit: bootstraps

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
// TODO nf-core: Please replace the example samtools command below with your module's command
// TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
def runSpecies = sp.name != "dummy_file" ? "TRUE" : "FALSE"
"""
samtools \\
sort \\
$args \\
-@ $task.cpus \\
$bam
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
seqs <- readRDS("${st}")
seqtab <- seqs\$seq
# Assign taxonomy
tax <- NULL
boots <- NULL
cat <<-END_VERSIONS > versions.yml
"${task.process}":
assigntaxaspecies: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
if ( ${params.tax_batch} == 0 | length(seqtab) < ${params.tax_batch} ) { # no batch, run normally
cat("Running all samples\\n")
tax <- assignTaxonomy(seqtab, "${ref}",
multithread=${task.cpus},
tryRC = TRUE,
outputBootstraps = TRUE,
minBoot = ${params.min_boot},
verbose = TRUE)
boots <- tax\$boot
if (${runSpecies}) {
tax <- addSpecies(tax, "${sp}",
tryRC = TRUE,
verbose = TRUE)
} else {
tax <- tax\$tax
}
} else {
# see https://github.com/benjjneb/dada2/issues/1429 for this
to_split <- seq(1, length(seqtab), by = ${params.tax_batch})
to_split2 <- c(to_split[2:length(to_split)]-1, length(seqtab))
for(i in 1:length(to_split)){
cat(paste("Running all samples from",to_split[i], "to", to_split2[i], "\\n"))
seqtab2 <- seqtab[to_split[i]:to_split2[i]]
tax2 <- assignTaxonomy(seqtab2, "${ref}",
multithread=${task.cpus},
tryRC = TRUE,
outputBootstraps = TRUE,
minBoot = ${params.min_boot},
verbose = TRUE)
if (is.null(boots)) {
boots <- tax2\$boot
} else {
boots <- rbind(boots, tax2\$boot)
}
if (${runSpecies}) {
tax2 <- addSpecies(tax2\$tax,
refFasta = "${sp}",
tryRC = TRUE,
verbose = TRUE)
} else {
tax2 <- tax2\$tax
}
if (is.null(tax)) {
tax <- tax2
} else {
tax <- rbind(tax, tax2)
}
}
}
# make sure these are the same order
# they should be, but we don't assume this
rownames(tax) <- seqs[rownames(tax),]\$id
rownames(boots) <- seqs[rownames(boots),]\$id
# Write original data
saveRDS(tax, "tax_final.RDS")
saveRDS(boots, "bootstrap_final.RDS")
"""

stub:
def args = task.ext.args ?: ''

// TODO nf-core: A stub section should mimic the execution of the original module as best as possible
// Have a look at the following examples:
// Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
// Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
assigntaxaspecies: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
7 changes: 0 additions & 7 deletions modules/local/dadainfer.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ process DADAINFER {
tuple val(readmode), file(err), file(reads)

output:
// TODO nf-core: List additional required output channels/values here
// path "versions.yml" , emit: versions
path("all.dd.${readmode}.RDS"), emit: inferred

when:
Expand Down Expand Up @@ -52,11 +50,6 @@ process DADAINFER {

stub:
def args = task.ext.args ?: ''

// TODO nf-core: A stub section should mimic the execution of the original module as best as possible
// Have a look at the following examples:
// Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
// Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
"""
# add some real stuff here
touch all.dd.${readmode}.RDS
Expand Down
80 changes: 13 additions & 67 deletions modules/local/decipher.nf
Original file line number Diff line number Diff line change
@@ -1,89 +1,35 @@
// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/modules/nf-core/
// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided using the "task.ext" directive, see here:
// https://www.nextflow.io/docs/latest/process.html#ext
// where "task.ext" is a string.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
// unless there is a run-time, storage advantage in implementing in this way
// e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
// bwa mem | samtools view -B -T ref.fasta
// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
// list (`[]`) instead of a file can be used to work around this issue.

process DECIPHER {
tag '$bam'
label 'process_me'
label 'process_medium'

// TODO nf-core: List required Conda package(s).
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
'biocontainers/YOUR-TOOL-HERE' }"
container "ghcr.io/h3abionet/tada:dev"

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
// TODO nf-core: Where applicable please provide/convert compressed files as input/output
// e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
path bam
path(seqs)

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
path "*.bam", emit: bam
// TODO nf-core: List additional required output channels/values here
path "versions.yml" , emit: versions
path("aligned_seqs.fna"), optional: true, emit: alignment

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
// TODO nf-core: Please replace the example samtools command below with your module's command
// TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
"""
samtools \\
sort \\
$args \\
-@ $task.cpus \\
$bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
decipher: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
suppressPackageStartupMessages(library(DECIPHER))
seqs <- readDNAStringSet("${seqs}")
alignment <- AlignSeqs(seqs,
anchor=NA,
processors = ${task.cpus})
writeXStringSet(alignment, "aligned_seqs.fna")
"""

stub:
def args = task.ext.args ?: ''

// TODO nf-core: A stub section should mimic the execution of the original module as best as possible
// Have a look at the following examples:
// Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
// Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
decipher: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
77 changes: 9 additions & 68 deletions modules/local/fasttree.nf
Original file line number Diff line number Diff line change
@@ -1,89 +1,30 @@
// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/modules/nf-core/
// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided using the "task.ext" directive, see here:
// https://www.nextflow.io/docs/latest/process.html#ext
// where "task.ext" is a string.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
// unless there is a run-time, storage advantage in implementing in this way
// e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
// bwa mem | samtools view -B -T ref.fasta
// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
// list (`[]`) instead of a file can be used to work around this issue.

process FASTTREE {
tag '$bam'
label 'process_m'
label 'process_medium'

// TODO nf-core: List required Conda package(s).
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
'biocontainers/YOUR-TOOL-HERE' }"
container "quay.io/biocontainers/fasttree:2.1.10--h14c3975_3"

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
// TODO nf-core: Where applicable please provide/convert compressed files as input/output
// e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
path bam
path(aln)

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
path "*.bam", emit: bam
// TODO nf-core: List additional required output channels/values here
path "versions.yml" , emit: versions
path("fasttree.newick"), emit: treeGTR

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
// TODO nf-core: Please replace the example samtools command below with your module's command
// TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
"""
samtools \\
sort \\
$args \\
-@ $task.cpus \\
$bam

cat <<-END_VERSIONS > versions.yml
"${task.process}":
fasttree: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
OMP_NUM_THREADS=${task.cpus} FastTree -nt \\
-gtr -gamma -spr 4 -mlacc 2 -slownni \\
-out fasttree.newick \\
${aln}
"""

stub:
def args = task.ext.args ?: ''

// TODO nf-core: A stub section should mimic the execution of the original module as best as possible
// Have a look at the following examples:
// Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
// Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fasttree: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
Loading

0 comments on commit 09d3bef

Please sign in to comment.