diff --git a/CHANGELOG.md b/CHANGELOG.md index b0879526..5a1a239f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` +- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` + ### `Changed` ### `Fixed` diff --git a/bin/taxref_reformat_phytoref.sh b/bin/taxref_reformat_phytoref.sh new file mode 100755 index 00000000..c61c081e --- /dev/null +++ b/bin/taxref_reformat_phytoref.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh new file mode 100755 index 00000000..54171f51 --- /dev/null +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cp *.fasta assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),[0-9]* \(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ec..263f4390 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -25,22 +25,6 @@ params { fmtscript = "taxref_reformat_coidb.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } - 'midori2-co1' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } - 'midori2-co1=gb250' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } 'gtdb' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] @@ -76,6 +60,30 @@ params { fmtscript = "taxref_reformat_gtdb.sh" dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)" } + 'midori2-co1' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'midori2-co1=gb250' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'phytoref' { + title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes" + file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] + citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." + fmtscript = "taxref_reformat_phytoref.sh" + dbversion = "unversioned" + taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] @@ -239,6 +247,22 @@ params { dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] } + 'zehr-nifh' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } + 'zehr-nifh=2.5.0' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } } //QIIME2 taxonomic reference databases qiime_ref_databases { diff --git a/docs/output.md b/docs/output.md index 62433cea..c2b93537 100644 --- a/docs/output.md +++ b/docs/output.md @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d - `ASV_table.tsv`: Counts for each ASV sequence. - `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample. - `DADA2_table.rds`: DADA2 ASV table as R object. - - `DADA2_tables.tsv`: DADA2 ASV table. + - `DADA2_table.tsv`: DADA2 ASV table. - `dada2/args/`: Directory containing files with all parameters for DADA2 steps. - `dada2/log/`: Directory containing log files for DADA2 steps. - `dada2/QC/` diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da..d2e5faa9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -335,36 +335,39 @@ "description": "Name of supported database, and optionally also version number", "default": "silva=138", "enum": [ - "midori2-co1=gb250", - "midori2-co1", + "coidb", + "coidb=221216", + "gtdb", "gtdb=R05-RS95", "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", - "gtdb", - "coidb", - "coidb=221216", - "pr2=5.0.0", - "pr2=4.14.0", - "pr2=4.13.0", + "midori2-co1", + "midori2-co1=gb250", + "phytoref", "pr2", - "rdp=18", + "pr2=4.13.0", + "pr2=4.14.0", + "pr2=5.0.0", "rdp", + "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R07-RS207-1", - "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R06-RS202-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R07-RS207-1", + "silva", "silva=132", "silva=138", - "silva", - "unite-fungi=9.0", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-fungi", - "unite-alleuk=9.0", - "unite-alleuk=8.3", + "unite-alleuk", "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.3", + "unite-alleuk=9.0", + "unite-fungi", + "unite-fungi=8.2", + "unite-fungi=8.3", + "unite-fungi=9.0", + "zehr-nifh", + "zehr-nifh=2.5.0" ] }, "dada_ref_tax_custom": {