From b89983e8d0fc390162fbe448db94ee695d665a70 Mon Sep 17 00:00:00 2001 From: Vijini Mallawaarachchi Date: Tue, 19 Mar 2024 20:52:36 +1030 Subject: [PATCH] DOC: Update README.md --- README.md | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 7c538c8..4265f3d 100644 --- a/README.md +++ b/README.md @@ -169,24 +169,27 @@ The ConDiGA logo was generated using [DALLĀ·E 3](https://openai.com/dall-e-3) fr ## Citation -ConDiGA has been accepted for publication at [Microbiome](https://microbiomejournal.biomedcentral.com/). Stay tuned for the Microbiome publication. +ConDiGA is published in [Microbiome](https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-024-01775-3) at DOI: [10.1186/s40168-024-01775-3](https://doi.org/10.1186/s40168-024-01775-3). -If you use ConDiGA in your work, please cite the [bioRxiv preprint](https://www.biorxiv.org/content/10.1101/2023.04.19.537311v1) for now as follows. +If you use ConDiGA in your work, please as +> Wu, E., Mallawaarachchi, V., Zhao, J. et al. Contigs directed gene annotation (ConDiGA) for accurate protein sequence database construction in metaproteomics. Microbiome 12, 58 (2024). https://doi.org/10.1186/s40168-024-01775-3 ```bibtex -@article {Wu2023.04.19.537311, - author = {Enhui Wu and Vijini Mallawaarachchi and Jinzhi Zhao and Yi Yang and Hebin Liu and Xiaoqing Wang and Chengpin Shen and Yu Lin and Liang Qiao}, - title = {Contigs directed gene annotation (ConDiGA) for accurate protein sequence database construction in metaproteomics}, - elocation-id = {2023.04.19.537311}, - year = {2023}, - doi = {10.1101/2023.04.19.537311}, - publisher = {Cold Spring Harbor Laboratory}, - abstract = {Microbiota are closely associated to human health and disease. Metaproteomics can provide a direct means to identify microbial proteins in microbiota for compositional and functional characterization. However, in-depth and accurate metaproteomics is still limited due to the extreme complexity and high diversity of microbiota samples. One of the main challenges is constructing a protein sequence database that best fits the microbiota sample. Herein, we proposed an accurate taxonomic annotation pipeline from metagenomic data for deep metaproteomic coverage, namely contigs directed gene annotation (ConDiGA). We mixed 12 known bacterial species to derive a synthetic microbial community to benchmark metagenomic and metaproteomic pipelines. With the optimized taxonomic annotation strategy by ConDiGA, we built a protein sequence database from the metagenomic data for metaproteomic analysis and identified about 12,000 protein groups, which was very close to the result obtained with the reference proteome protein sequence database of the 12 species. We also demonstrated the practicability of the method in real fecal samples, achieved deep proteome coverage of human gut microbiome, and compared the function and taxonomy of gut microbiota at metagenomic level and metaproteomic level. Our study can tackle the current taxonomic annotation reliability problem in metagenomics-derived protein sequence database for metaproteomics. The unique dataset of metagenomic and the metaproteomic data of the 12 bacterial species is publicly available as a standard benchmarking sample for evaluating various analysis pipelines. The code of ConDiGA is open access at GitHub for the analysis of real microbiota samples.Competing Interest StatementThe authors have declared no competing interest.}, - URL = {https://www.biorxiv.org/content/early/2023/04/20/2023.04.19.537311}, - eprint = {https://www.biorxiv.org/content/early/2023/04/20/2023.04.19.537311.full.pdf}, - journal = {bioRxiv} +@article{Wu2024, +author={Wu, Enhui and Mallawaarachchi, Vijini and Zhao, Jinzhi and Yang, Yi and Liu, Hebin and Wang, Xiaoqing and Shen, Chengpin and Lin, Yu and Qiao, Liang}, +title={Contigs directed gene annotation (ConDiGA) for accurate protein sequence database construction in metaproteomics}, +journal={Microbiome}, +year={2024}, +month={Mar}, +day={19}, +volume={12}, +number={1}, +pages={58}, +abstract={Microbiota are closely associated with human health and disease. Metaproteomics can provide a direct means to identify microbial proteins in microbiota for compositional and functional characterization. However, in-depth and accurate metaproteomics is still limited due to the extreme complexity and high diversity of microbiota samples. It is generally recommended to use metagenomic data from the same samples to construct the protein sequence database for metaproteomic data analysis. Although different metagenomics-based database construction strategies have been developed, an optimization of gene taxonomic annotation has not been reported, which, however, is extremely important for accurate metaproteomic analysis.}, +issn={2049-2618}, +doi={10.1186/s40168-024-01775-3}, +url={https://doi.org/10.1186/s40168-024-01775-3} } - ``` **NOTE:** The database created by ConDiGA is described as MD3 in the manuscript. @@ -194,4 +197,4 @@ Also, please cite the following tools used by ConDiGA, the assembler and the rel * Zhu W, Lomsadze A, Borodovsky M. Ab initio gene identification in metagenomic sequences. Nucleic acids research, 38 (12): 132-132 (2010). [https://doi.org/10.1093/nar/gkq275](https://doi.org/10.1093/nar/gkq275) * Li H. Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34:3094-3100 (2018). [https://doi.org/10.1093/bioinformatics/bty191](https://doi.org/10.1093/bioinformatics/bty191) -* Woodcroft BJ, Newell R, CoverM: Read coverage calculator for metagenomics (2017). [https://github.com/wwood/CoverM](https://github.com/wwood/CoverM) \ No newline at end of file +* Woodcroft BJ, Newell R, CoverM: Read coverage calculator for metagenomics (2017). [https://github.com/wwood/CoverM](https://github.com/wwood/CoverM)