Skip to content

Commit

Permalink
Merge pull request #675 from erikrikarddaniel/add-zehr-nifh
Browse files Browse the repository at this point in the history
Add Zehr nifH database
  • Loading branch information
erikrikarddaniel authored Dec 12, 2023
2 parents 4e10651 + 09ae4b7 commit a1ad717
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 36 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref`
- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh`

### `Changed`

### `Fixed`
Expand Down
7 changes: 7 additions & 0 deletions bin/taxref_reformat_phytoref.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh

# Write the assignTaxonomy() fasta file: assignTaxonomy.fna
cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna

# Write the addSpecies() fasta file: addSpecies.fna
cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna
7 changes: 7 additions & 0 deletions bin/taxref_reformat_zehr-nifh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh

# Write the assignTaxonomy() fasta file: assignTaxonomy.fna
cp *.fasta assignTaxonomy.fna

# Write the addSpecies() fasta file: addSpecies.fna
cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),[0-9]* \(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna
56 changes: 40 additions & 16 deletions conf/ref_databases.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,6 @@ params {
fmtscript = "taxref_reformat_coidb.sh"
dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)"
}
'midori2-co1' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'midori2-co1=gb250' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'gtdb' {
title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1"
file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ]
Expand Down Expand Up @@ -76,6 +60,30 @@ params {
fmtscript = "taxref_reformat_gtdb.sh"
dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)"
}
'midori2-co1' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'midori2-co1=gb250' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'phytoref' {
title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes"
file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ]
citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401."
fmtscript = "taxref_reformat_phytoref.sh"
dbversion = "unversioned"
taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species"
}
'pr2' {
title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0"
file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ]
Expand Down Expand Up @@ -239,6 +247,22 @@ params {
dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"]
}
'zehr-nifh' {
title = "Zehr lab nifH database - version 2.5.0"
file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ]
citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213"
fmtscript = "taxref_reformat_zehr-nifh.sh"
dbversion = "Zehr-nifH v. 2.5.0"
taxlevels = "Domain,Phylum,Class,Order,Family,Genus"
}
'zehr-nifh=2.5.0' {
title = "Zehr lab nifH database - version 2.5.0"
file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ]
citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213"
fmtscript = "taxref_reformat_zehr-nifh.sh"
dbversion = "Zehr-nifH v. 2.5.0"
taxlevels = "Domain,Phylum,Class,Order,Family,Genus"
}
}
//QIIME2 taxonomic reference databases
qiime_ref_databases {
Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d
- `ASV_table.tsv`: Counts for each ASV sequence.
- `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample.
- `DADA2_table.rds`: DADA2 ASV table as R object.
- `DADA2_tables.tsv`: DADA2 ASV table.
- `DADA2_table.tsv`: DADA2 ASV table.
- `dada2/args/`: Directory containing files with all parameters for DADA2 steps.
- `dada2/log/`: Directory containing log files for DADA2 steps.
- `dada2/QC/`
Expand Down
41 changes: 22 additions & 19 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -335,36 +335,39 @@
"description": "Name of supported database, and optionally also version number",
"default": "silva=138",
"enum": [
"midori2-co1=gb250",
"midori2-co1",
"coidb",
"coidb=221216",
"gtdb",
"gtdb=R05-RS95",
"gtdb=R06-RS202",
"gtdb=R07-RS207",
"gtdb=R08-RS214",
"gtdb",
"coidb",
"coidb=221216",
"pr2=5.0.0",
"pr2=4.14.0",
"pr2=4.13.0",
"midori2-co1",
"midori2-co1=gb250",
"phytoref",
"pr2",
"rdp=18",
"pr2=4.13.0",
"pr2=4.14.0",
"pr2=5.0.0",
"rdp",
"rdp=18",
"sbdi-gtdb",
"sbdi-gtdb=R07-RS207-1",
"sbdi-gtdb=R06-RS202-3",
"sbdi-gtdb=R06-RS202-1",
"sbdi-gtdb=R06-RS202-3",
"sbdi-gtdb=R07-RS207-1",
"silva",
"silva=132",
"silva=138",
"silva",
"unite-fungi=9.0",
"unite-fungi=8.3",
"unite-fungi=8.2",
"unite-fungi",
"unite-alleuk=9.0",
"unite-alleuk=8.3",
"unite-alleuk",
"unite-alleuk=8.2",
"unite-alleuk"
"unite-alleuk=8.3",
"unite-alleuk=9.0",
"unite-fungi",
"unite-fungi=8.2",
"unite-fungi=8.3",
"unite-fungi=9.0",
"zehr-nifh",
"zehr-nifh=2.5.0"
]
},
"dada_ref_tax_custom": {
Expand Down

0 comments on commit a1ad717

Please sign in to comment.