From d0786b7a057e4478b67ffcba60d7dda3f038a9b1 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 14:54:40 +0100 Subject: [PATCH] add --dada_addspecies_allowmultiple and --dada_taxonomy_rc --- CHANGELOG.md | 1 + conf/modules.config | 9 ++++----- docs/usage.md | 8 ++++---- nextflow.config | 4 +++- nextflow_schema.json | 10 ++++++++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77c3a6bc..22dd3c52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` +- [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification ### `Changed` diff --git a/conf/modules.config b/conf/modules.config index cbc04cd0..e02e9342 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -355,8 +355,7 @@ process { ext.seed = "${params.seed}" ext.args = [ 'minBoot = 50', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ @@ -375,9 +374,9 @@ process { withName: DADA2_ADDSPECIES { ext.seed = "${params.seed}" ext.args = [ - 'allowMultiple = FALSE, n = 1e5', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + 'n = 1e5', + params.dada_addspecies_allowmultiple ? "allowMultiple = TRUE" : "", + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ diff --git a/docs/usage.md b/docs/usage.md index 38c2cc23..f8625b2f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -221,18 +221,18 @@ Pre-configured reference taxonomy databases are: | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes | | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- | | silva | + | - | + | + | 16S rRNA | -| gtdb | + | - | - | - | 16S rRNA | +| gtdb | +¹ | - | - | - | 16S rRNA | | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | -| greengenes | - | - | + | (+)¹ | 16S rRNA | +| greengenes | - | - | + | (+)² | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | -| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses² | +| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | -¹: de-replicated at 85%, only for testing purposes; ²: quality of results might vary +¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary Special features of taxonomic classification tools: diff --git a/nextflow.config b/nextflow.config index 831a43a2..7f35f9e4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -107,13 +107,15 @@ params { dada_ref_tax_custom = null dada_ref_tax_custom_sp = null cut_dada_ref_taxonomy = false + dada_addspecies_allowmultiple = false + dada_taxonomy_rc = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null - kraken2_confidence = 0 + kraken2_confidence = 0.0 // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b8afed35..37d528fd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -390,6 +390,16 @@ "help_text": "Expected amplified sequences are extracted from the DADA2 reference taxonomy using the primer sequences, that might improve classification. This is not applied to species classification (assignSpecies) but only for lower taxonomic levels (assignTaxonomy).", "description": "If the expected amplified sequences are extracted from the DADA2 reference taxonomy database" }, + "dada_addspecies_allowmultiple": { + "type": "boolean", + "help_text": "Defines the behavior when multiple exact matches against different species are returned. By default only unambiguous identifications are returned. If TRUE, a concatenated string of all exactly matched species is returned.", + "description": "If multiple exact matches against different species are returned" + }, + "dada_taxonomy_rc": { + "type": "boolean", + "help_text": "Reverse-complement of each sequences will be used for classification if it is a better match to the reference sequences than the forward sequence.", + "description": "If reverse-complement of each sequences will be also tested for classification" + }, "pplace_tree": { "type": "string", "description": "Newick file with reference phylogenetic tree. Requires also `--pplace_aln` and `--pplace_model`."