diff --git a/joss/paper.bib b/joss/paper.bib index 5623ad62f..f994eac26 100644 --- a/joss/paper.bib +++ b/joss/paper.bib @@ -57,3 +57,50 @@ @article{abdi:2013 year={2013}, pages={149–179}, } + +@article{xia:2009, + author = {Xia, Jianguo and Psychogios, Nick and Young, Nelson and Wishart, David S.}, + title = "{MetaboAnalyst: a web server for metabolomic data analysis and interpretation}", + journal = {Nucleic Acids Research}, + volume = {37}, + number = {suppl_2}, + pages = {W652-W660}, + year = {2009}, + month = {05}, + abstract = "{Metabolomics is a newly emerging field of ‘omics’ research that is concerned with characterizing large numbers of metabolites using NMR, chromatography and mass spectrometry. It is frequently used in biomarker identification and the metabolic profiling of cells, tissues or organisms. The data processing challenges in metabolomics are quite unique and often require specialized (or expensive) data analysis software and a detailed knowledge of cheminformatics, bioinformatics and statistics. In an effort to simplify metabolomic data analysis while at the same time improving user accessibility, we have developed a freely accessible, easy-to-use web server for metabolomic data analysis called MetaboAnalyst. Fundamentally, MetaboAnalyst is a web-based metabolomic data processing tool not unlike many of today's web-based microarray analysis packages. It accepts a variety of input data (NMR peak lists, binned spectra, MS peak lists, compound/concentration data) in a wide variety of formats. It also offers a number of options for metabolomic data processing, data normalization, multivariate statistical analysis, graphing, metabolite identification and pathway mapping. In particular, MetaboAnalyst supports such techniques as: fold change analysis, t-tests, PCA, PLS-DA, hierarchical clustering and a number of more sophisticated statistical or machine learning methods. It also employs a large library of reference spectra to facilitate compound identification from most kinds of input spectra. MetaboAnalyst guides users through a step-by-step analysis pipeline using a variety of menus, information hyperlinks and check boxes. Upon completion, the server generates a detailed report describing each method used, embedded with graphical and tabular outputs. MetaboAnalyst is capable of handling most kinds of metabolomic data and was designed to perform most of the common kinds of metabolomic data analyses. MetaboAnalyst is accessible at http://www.metaboanalyst.ca}", + issn = {0305-1048}, + doi = {10.1093/nar/gkp356}, + url = {https://doi.org/10.1093/nar/gkp356}, + eprint = {https://academic.oup.com/nar/article-pdf/37/suppl\_2/W652/3933058/gkp356.pdf}, +} + +@article{chong:2018, + author = {Chong, Jasmine and Soufan, Othman and Li, Carin and Caraus, Iurie and Li, Shuzhao and Bourque, Guillaume and Wishart, David S and Xia, Jianguo}, + title = "{MetaboAnalyst 4.0: towards more transparent and integrative metabolomics analysis}", + journal = {Nucleic Acids Research}, + volume = {46}, + number = {W1}, + pages = {W486-W494}, + year = {2018}, + month = {05}, + abstract = "{We present a new update to MetaboAnalyst (version 4.0) for comprehensive metabolomic data analysis, interpretation, and integration with other omics data. Since the last major update in 2015, MetaboAnalyst has continued to evolve based on user feedback and technological advancements in the field. For this year's update, four new key features have been added to MetaboAnalyst 4.0, including: (1) real-time R command tracking and display coupled with the release of a companion MetaboAnalystR package; (2) a MS Peaks to Pathways module for prediction of pathway activity from untargeted mass spectral data using the mummichog algorithm; (3) a Biomarker Meta-analysis module for robust biomarker identification through the combination of multiple metabolomic datasets and (4) a Network Explorer module for integrative analysis of metabolomics, metagenomics, and/or transcriptomics data. The user interface of MetaboAnalyst 4.0 has been reengineered to provide a more modern look and feel, as well as to give more space and flexibility to introduce new functions. The underlying knowledgebases (compound libraries, metabolite sets, and metabolic pathways) have also been updated based on the latest data from the Human Metabolome Database (HMDB). A Docker image of MetaboAnalyst is also available to facilitate download and local installation of MetaboAnalyst. MetaboAnalyst 4.0 is freely available at http://metaboanalyst.ca.}", + issn = {0305-1048}, + doi = {10.1093/nar/gky310}, + url = {https://doi.org/10.1093/nar/gky310}, + eprint = {https://academic.oup.com/nar/article-pdf/46/W1/W486/25110162/gky310.pdf}, +} + +@article{armitage:2015, +author = {Armitage, Emily Grace and Godzien, Joanna and Alonso-Herranz, Vanesa and López-Gonzálvez, Ángeles and Barbas, Coral}, +title = {Missing value imputation strategies for metabolomics data}, +journal = {ELECTROPHORESIS}, +volume = {36}, +number = {24}, +pages = {3050-3060}, +keywords = {CE-MS, Data, False-discovery rate, Imputation, k-nearest neighbour, Metabolomics, Missing values}, +doi = {10.1002/elps.201500352}, +url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/elps.201500352}, +eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/elps.201500352}, +abstract = {The origin of missing values can be caused by different reasons and depending on these origins missing values should be considered differently and dealt with in different ways. In this research, four methods of imputation have been compared with respect to revealing their effects on the normality and variance of data, on statistical significance and on the approximation of a suitable threshold to accept missing data as truly missing. Additionally, the effects of different strategies for controlling familywise error rate or false discovery and how they work with the different strategies for missing value imputation have been evaluated. Missing values were found to affect normality and variance of data and k-means nearest neighbour imputation was the best method tested for restoring this. Bonferroni correction was the best method for maximizing true positives and minimizing false positives and it was observed that as low as 40\% missing data could be truly missing. The range between 40 and 70\% missing values was defined as a “gray area” and therefore a strategy has been proposed that provides a balance between the optimal imputation strategy that was k-means nearest neighbor and the best approximation of positioning real zeros.}, +year = {2015} +} diff --git a/joss/paper.md b/joss/paper.md index d969e1da0..8b143a250 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -349,13 +349,9 @@ analysis methods. # Related Work One of the most commonly used tools for the analysis of metabolomics data is -MetaboAnalyst, initially released in 2009 (Xia et al., MetaboAnalyst: a web -server for metabolomics data analysis and integration, Nucleic Acids Research, -37, W652, 2009) and currently on version 4 (Chong et al., Using MetaboAnalyst -4.0 for Comprehensive and Integrative Metabolomics Data Analysis, Curr. -Protocols in Bioinformatics, 68, e86, 2019). MetaboAnalyst has a wide range -of capabilities including data processing, statistical analysis and pathway -enrichment analyses. +MetaboAnalyst, initially released in 2009 [@xia:2009] and currently on version 4 +[@chong:2018]. MetaboAnalyst has a wide range of capabilities including data +processing, statistical analysis and pathway enrichment analyses. The initial motivation for the development of Viime was to readily ingest, integrate and analyze metabolomics data from multiple platforms--a unique @@ -374,14 +370,11 @@ metabolites detected by different platforms. Viime offers another value-added feature during data ingestion: imputation of missing data. MetaboAnalyst replaces all missing values with 1/5 of the positive values of the corresponding column. Viime uses a more sophisticated imputation -strategy (Armitage EG, Godzien J, Alonso-Herranz V, López-Gonzálvez A, Barbas C. -Missing value imputation strategies for metabolomics data. Electrophoresis 2015, -36, 3050–3060. https://doi.org/10.1002/elps.201500352), heuristically -classifying missing data as Missing Not At Random (MNAR) or Missing Completely -At Random (MCAR). For MNAR data, the user can choose to replace the values with -either zeros or half of the minimum value of that variable, while the MCAR -options include imputation by Random Forest, K-Nearest Neighbors, the mean -value, or the median value. +strategy [@armitage:2015], heuristically classifying missing data as Missing Not +At Random (MNAR) or Missing Completely At Random (MCAR). For MNAR data, the user +can choose to replace the values with either zeros or half of the minimum value +of that variable, while the MCAR options include imputation by Random Forest, +K-Nearest Neighbors, the mean value, or the median value. Finally, Viime includes powerful interactive data manipulation and visualization tools, improving upon tools such as MetaboAnalyst. Both platforms enable a