nf-core · d4straub · Dec 20, 2023 · Dec 19, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification
 - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref`
 - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh`
+- [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification
 
 ### `Changed`
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -355,8 +355,7 @@ process {
         ext.seed = "${params.seed}"
         ext.args = [
             'minBoot = 50',
-            params.pacbio ? "tryRC = TRUE" :
-                params.iontorrent ? "tryRC = TRUE" : ""
+            params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE"
         ].join(',').replaceAll('(,)*$', "")
         publishDir = [
             [
@@ -375,9 +374,9 @@ process {
     withName: DADA2_ADDSPECIES {
         ext.seed = "${params.seed}"
         ext.args = [
-            'allowMultiple = FALSE, n = 1e5',
-            params.pacbio ? "tryRC = TRUE" :
-                params.iontorrent ? "tryRC = TRUE" : ""
+            'n = 1e5',
+            params.dada_addspecies_allowmultiple ? "allowMultiple = TRUE" : "",
+            params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE"
         ].join(',').replaceAll('(,)*$', "")
         publishDir = [
             [

diff --git a/docs/usage.md b/docs/usage.md
@@ -221,18 +221,18 @@ Pre-configured reference taxonomy databases are:
 | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes                                  |
 | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- |
 | silva        | +     | -      | +       | +      | 16S rRNA                                      |
-| gtdb         | +     | -      | -       | -      | 16S rRNA                                      |
+| gtdb         | +¹    | -      | -       | -      | 16S rRNA                                      |
 | sbdi-gtdb    | +     | -      | -       | -      | 16S rRNA                                      |
 | rdp          | +     | -      | +       | -      | 16S rRNA                                      |
-| greengenes   | -     | -      | +       | (+)¹   | 16S rRNA                                      |
+| greengenes   | -     | -      | +       | (+)²   | 16S rRNA                                      |
 | pr2          | +     | -      | -       | -      | 18S rRNA                                      |
 | unite-fungi  | +     | +      | -       | +      | eukaryotic nuclear ribosomal ITS region       |
 | unite-alleuk | +     | +      | -       | +      | eukaryotic nuclear ribosomal ITS region       |
 | coidb        | +     | +      | -       | -      | eukaryotic Cytochrome Oxidase I (COI)         |
 | midori2-co1  | +     | -      | -       | -      | eukaryotic Cytochrome Oxidase I (COI)         |
-| standard     | -     | -      | +       | -      | any in genomes of archaea, bacteria, viruses² |
+| standard     | -     | -      | +       | -      | any in genomes of archaea, bacteria, viruses³ |
 
-¹: de-replicated at 85%, only for testing purposes; ²: quality of results might vary
+¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary
 
 Special features of taxonomic classification tools:
 

diff --git a/nextflow.config b/nextflow.config
@@ -107,13 +107,15 @@ params {
     dada_ref_tax_custom      = null
     dada_ref_tax_custom_sp   = null
     cut_dada_ref_taxonomy    = false
+    dada_addspecies_allowmultiple = false
+    dada_taxonomy_rc         = false
     sintax_ref_taxonomy      = null
     qiime_ref_taxonomy       = null
     qiime_ref_tax_custom     = null
     kraken2_ref_taxonomy     = null
     kraken2_assign_taxlevels = null
     kraken2_ref_tax_custom   = null
-    kraken2_confidence       = 0
+    kraken2_confidence       = 0.0
 
     // MultiQC options
     multiqc_config             = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -390,6 +390,16 @@
                     "help_text": "Expected amplified sequences are extracted from the DADA2 reference taxonomy using the primer sequences, that might improve classification. This is not applied to species classification (assignSpecies) but only for lower taxonomic levels (assignTaxonomy).",
                     "description": "If the expected amplified sequences are extracted from the DADA2 reference taxonomy database"
                 },
+                "dada_addspecies_allowmultiple": {
+                    "type": "boolean",
+                    "help_text": "Defines the behavior when multiple exact matches against different species are returned. By default only unambiguous identifications are returned. If TRUE, a concatenated string of all exactly matched species is returned.",
+                    "description": "If multiple exact matches against different species are returned"
+                },
+                "dada_taxonomy_rc": {
+                    "type": "boolean",
+                    "help_text": "Reverse-complement of each sequences will be used for classification if it is a better match to the reference sequences than the forward sequence.",
+                    "description": "If reverse-complement of each sequences will be also tested for classification"
+                },
                 "pplace_tree": {
                     "type": "string",
                     "description": "Newick file with reference phylogenetic tree. Requires also `--pplace_aln` and `--pplace_model`."