Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PhytoRef #674

Merged
merged 5 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref`

### `Changed`

### `Fixed`
Expand Down
7 changes: 7 additions & 0 deletions bin/taxref_reformat_phytoref.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh

# Write the assignTaxonomy() fasta file: assignTaxonomy.fna
cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna

# Write the addSpecies() fasta file: addSpecies.fna
cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna
40 changes: 24 additions & 16 deletions conf/ref_databases.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,6 @@ params {
fmtscript = "taxref_reformat_coidb.sh"
dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)"
}
'midori2-co1' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'midori2-co1=gb250' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'gtdb' {
title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1"
file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ]
Expand Down Expand Up @@ -76,6 +60,30 @@ params {
fmtscript = "taxref_reformat_gtdb.sh"
dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)"
}
'midori2-co1' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'midori2-co1=gb250' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
'phytoref' {
title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes"
file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ]
citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401."
fmtscript = "taxref_reformat_phytoref.sh"
dbversion = "unknown"
taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species"
}
'pr2' {
title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0"
file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ]
Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d
- `ASV_table.tsv`: Counts for each ASV sequence.
- `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample.
- `DADA2_table.rds`: DADA2 ASV table as R object.
- `DADA2_tables.tsv`: DADA2 ASV table.
- `DADA2_table.tsv`: DADA2 ASV table.
- `dada2/args/`: Directory containing files with all parameters for DADA2 steps.
- `dada2/log/`: Directory containing log files for DADA2 steps.
- `dada2/QC/`
Expand Down
39 changes: 20 additions & 19 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -335,36 +335,37 @@
"description": "Name of supported database, and optionally also version number",
"default": "silva=138",
"enum": [
"midori2-co1=gb250",
"midori2-co1",
"coidb",
"coidb=221216",
"gtdb",
"gtdb=R05-RS95",
"gtdb=R06-RS202",
"gtdb=R07-RS207",
"gtdb=R08-RS214",
"gtdb",
"coidb",
"coidb=221216",
"pr2=5.0.0",
"pr2=4.14.0",
"pr2=4.13.0",
"midori2-co1",
"midori2-co1=gb250",
"phytoref",
"pr2",
"rdp=18",
"pr2=4.13.0",
"pr2=4.14.0",
"pr2=5.0.0",
"rdp",
"rdp=18",
"sbdi-gtdb",
"sbdi-gtdb=R07-RS207-1",
"sbdi-gtdb=R06-RS202-3",
"sbdi-gtdb=R06-RS202-1",
"sbdi-gtdb=R06-RS202-3",
"sbdi-gtdb=R07-RS207-1",
"silva",
"silva=132",
"silva=138",
"silva",
"unite-fungi=9.0",
"unite-fungi=8.3",
"unite-fungi=8.2",
"unite-fungi",
"unite-alleuk=9.0",
"unite-alleuk=8.3",
"unite-alleuk",
"unite-alleuk=8.2",
"unite-alleuk"
"unite-alleuk=8.3",
"unite-alleuk=9.0",
"unite-fungi",
"unite-fungi=8.2",
"unite-fungi=8.3",
"unite-fungi=9.0"
]
},
"dada_ref_tax_custom": {
Expand Down
Loading