-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathreferences.bib
465 lines (428 loc) · 42 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
@Article{geoquery,
author = {Sean Davis and Paul Meltzer},
title = {GEOquery: a bridge between the Gene Expression Omnibus (GEO) and BioConductor},
journal = {Bioinformatics},
year = {2007},
volume = {14},
pages = {1846--1847},
}
@article {LiE7111,
author = {Li, Siming and Mi, Lin and Yu, Lei and Yu, Qi and Liu, Tongyu and Wang, Guo-Xiao and Zhao, Xu-Yun and Wu, Jun and Lin, Jiandie D.},
title = {Zbtb7b engages the long noncoding RNA Blnc1 to drive brown and beige fat development and thermogenesis},
volume = {114},
number = {34},
pages = {E7111--E7120},
year = {2017},
doi = {10.1073/pnas.1703494114},
publisher = {National Academy of Sciences},
abstract = {Brown and beige fat function has important implications for metabolic physiology and the treatment of metabolic disorders. How transcription factors interface with long noncoding RNAs (lncRNAs), an emerging class of regulatory factors, to drive development and thermogenesis of brown/beige fat remains essentially unknown. Here we identified Zbtb7b as an activator of the thermogenic gene program through a genome-wide functional screen and showed that it plays an essential role in cold-induced thermogenesis and beige fat formation. Mechanistically, Zbtb7b forms a ribonucleoprotein transcriptional complex with the lncRNA Blnc1 and drives thermogenic gene expression via a feedforward loop. This work illustrates the emerging concept of a protein{\textendash}lncRNA regulatory network in the control of adipose tissue biology and energy metabolism.Brown and beige adipocytes convert chemical energy into heat through uncoupled respiration to defend against cold stress. Beyond thermogenesis, brown and beige fats engage other metabolic tissues via secreted factors to influence systemic energy metabolism. How the protein and long noncoding RNA (lncRNA) regulatory networks act in concert to regulate key aspects of thermogenic adipocyte biology remains largely unknown. Here we developed a genome-wide functional screen to interrogate the transcription factors and cofactors in thermogenic gene activation and identified zinc finger and BTB domain-containing 7b (Zbtb7b) as a potent driver of brown fat development and thermogenesis and cold-induced beige fat formation. Zbtb7b is required for activation of the thermogenic gene program in brown and beige adipocytes. Genetic ablation of Zbtb7b impaired cold-induced transcriptional remodeling in brown fat, rendering mice sensitive to cold temperature, and diminished browning of inguinal white fat. Proteomic analysis revealed a mechanistic link between Zbtb7b and the lncRNA regulatory pathway through which Zbtb7b recruits the brown fat lncRNA 1 (Blnc1)/heterogeneous nuclear ribonucleoprotein U (hnRNPU) ribonucleoprotein complex to activate thermogenic gene expression in adipocytes. These findings illustrate the emerging concept of a protein{\textendash}lncRNA regulatory network in the control of adipose tissue biology and energy metabolism.},
issn = {0027-8424},
URL = {http://www.pnas.org/content/114/34/E7111},
eprint = {http://www.pnas.org/content/114/34/E7111.full.pdf},
journal = {Proceedings of the National Academy of Sciences}
}
@Article{oligo,
author = {Benilton S Carvalho and Rafael A Irizarry},
title = {A Framework for Oligonucleotide Microarray Preprocessing},
journal = {Bioinformatics},
volume = {26},
number = {19},
year = {2010},
issn = {1367-4803},
pages = {2363-7},
doi = {10.1093/bioinformatics/btq431},
publisher = {Oxford University Press},
address = {Oxford, UK},
}
@Manual{pdmogene,
title = {pd.mogene.2.1.st: Platform Design Info for Affymetrix MoGene-2.1-st},
author = {Benilton Carvalho},
year = {2015},
note = {R package version 3.14.1},
}
@Article{AQM,
title = {arrayQualityMetrics--a bioconductor package for quality assessment of microarray data},
author = {Audrey Kauffmann and Robert Gentleman and Wolfgang Huber},
journal = {Bioinformatics},
year = {2009},
volume = {25},
number = {3},
pages = {415--6},
}
@Book{ggplot2,
author = {Hadley Wickham},
title = {ggplot2: Elegant Graphics for Data Analysis},
publisher = {Springer-Verlag New York},
year = {2009},
isbn = {978-0-387-98140-6},
url = {http://ggplot2.org},
}
@Manual{ggrepel,
title = {ggrepel: Repulsive Text and Label Geoms for 'ggplot2'},
author = {Kamil Slowikowski},
year = {2017},
note = {R package version 0.7.0},
url = {https://CRAN.R-project.org/package=ggrepel},
}
@Manual{pvca,
title = {pvca: Principal Variance Component Analysis (PVCA)},
author = {Pierre Bushel},
year = {2013},
note = {R package version 1.18.0},
}
@article{rmaIri,
author = {Irizarry, Rafael A. and Hobbs, Bridget and Collin, Francois and Beazer‐Barclay, Yasmin D. and Antonellis, Kristen J. and Scherf, Uwe and Speed, Terence P.},
title = {Exploration, normalization, and summaries of high density oligonucleotide array probe level data},
journal = {Biostatistics},
volume = {4},
number = {2},
pages = {249-264},
year = {2003},
doi = {10.1093/biostatistics/4.2.249},
URL = {http://dx.doi.org/10.1093/biostatistics/4.2.249},
eprint = {/oup/backfile/content_public/journal/biostatistics/4/2/10.1093/biostatistics/4.2.249/2/040249.pdf}
}
@article{Hackstadt2009,
abstract = {BACKGROUND Due to the large number of hypothesis tests performed during the process of routine analysis of microarray data, a multiple testing adjustment is certainly warranted. However, when the number of tests is very large and the proportion of differentially expressed genes is relatively low, the use of a multiple testing adjustment can result in very low power to detect those genes which are truly differentially expressed. Filtering allows for a reduction in the number of tests and a corresponding increase in power. Common filtering methods include filtering by variance, average signal or MAS detection call (for Affymetrix arrays). We study the effects of filtering in combination with the Benjamini-Hochberg method for false discovery rate control and q-value for false discovery rate estimation. RESULTS Three case studies are used to compare three different filtering methods in combination with the two false discovery rate methods and three different preprocessing methods. For the case studies considered, filtering by detection call and variance (on the original scale) consistently led to an increase in the number of differentially expressed genes identified. On the other hand, filtering by variance on the log2 scale had a detrimental effect when paired with MAS5 or PLIER preprocessing methods, even when the testing was done on the log2 scale. A simulation study was done to further examine the effect of filtering by variance. We find that filtering by variance leads to higher power, often with a decrease in false discovery rate, when paired with either of the false discovery rate methods considered. This holds regardless of the proportion of genes which are differentially expressed or whether we assume dependence or independence among genes. CONCLUSION The case studies show that both detection call and variance filtering are viable methods of filtering which can increase the number of differentially expressed genes identified. The simulation study demonstrates that when paired with a false discovery rate method, filtering by variance can increase power while still controlling the false discovery rate. Filtering out 50{\%} of probe sets seems reasonable as long as the majority of genes are not expected to be differentially expressed.},
author = {Hackstadt, Amber J and Hess, Ann M},
doi = {10.1186/1471-2105-10-11},
file = {:C$\backslash$:/Users/asanc/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Hackstadt, Hess - 2009 - Filtering for increased power for microarray data analysis.pdf:pdf},
issn = {1471-2105},
journal = {BMC bioinformatics},
mendeley-groups = {Springer},
month = {jan},
pages = {11},
pmid = {19133141},
publisher = {BioMed Central},
title = {{Filtering for increased power for microarray data analysis.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/19133141 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2661050},
volume = {10},
year = {2009}
}
@article{Jeanmougin2010,
abstract = {High-throughput post-genomic studies are now routinely and promisingly investigated in biological and biomedical research. The main statistical approach to select genes differentially expressed between two groups is to apply a t-test, which is subject of criticism in the literature. Numerous alternatives have been developed based on different and innovative variance modeling strategies. However, a critical issue is that selecting a different test usually leads to a different gene list. In this context and given the current tendency to apply the t-test, identifying the most efficient approach in practice remains crucial. To provide elements to answer, we conduct a comparison of eight tests representative of variance modeling strategies in gene expression data: Welch's t-test, ANOVA [1], Wilcoxon's test, SAM [2], RVM [3], limma [4], VarMixt [5] and SMVar [6]. Our comparison process relies on four steps (gene list analysis, simulations, spike-in data and re-sampling) to formulate comprehensive and robust conclusions about test performance, in terms of statistical power, false-positive rate, execution time and ease of use. Our results raise concerns about the ability of some methods to control the expected number of false positives at a desirable level. Besides, two tests (limma and VarMixt) show significant improvement compared to the t-test, in particular to deal with small sample sizes. In addition limma presents several practical advantages, so we advocate its application to analyze gene expression data.},
author = {Jeanmougin, Marine and de Reynies, Aurelien and Marisa, Laetitia and Paccard, Caroline and Nuel, Gregory and Guedj, Mickael},
doi = {10.1371/journal.pone.0012336},
editor = {Shedden, Kerby},
file = {::},
issn = {1932-6203},
journal = {PLoS ONE},
mendeley-groups = {Springer},
month = {sep},
number = {9},
pages = {e12336},
publisher = {Public Library of Science},
title = {{Should We Abandon the t-Test in the Analysis of Gene Expression Microarray Data: A Comparison of Variance Modeling Strategies}},
url = {http://dx.plos.org/10.1371/journal.pone.0012336},
volume = {5},
year = {2010}
}
@incollection{Smyth2005,
address = {New York},
author = {Smyth, G. K.},
booktitle = {Bioinformatics and Computational Biology Solutions Using R and Bioconductor},
doi = {10.1007/0-387-29362-0_23},
file = {::},
mendeley-groups = {Springer},
pages = {397--420},
publisher = {Springer-Verlag},
title = {{limma: Linear Models for Microarray Data}},
url = {http://link.springer.com/10.1007/0-387-29362-0{\_}23},
year = {2005}
}
@article{Smyth2004,
abstract = {{\textless}p{\textgreater}The problem of identifying differentially expressed genes in designed microarray experiments is considered. Lonnstedt and Speed (2002) derived an expression for the posterior odds of differential expression in a replicated two-color experiment using a simple hierarchical parametric model. The purpose of this paper is to develop the hierarchical model of Lonnstedt and Speed (2002) into a practical approach for general microarray experiments with arbitrary numbers of treatments and RNA samples. The model is reset in the context of general linear models with arbitrary coefficients and contrasts of interest. The approach applies equally well to both single channel and two color microarray experiments. Consistent, closed form estimators are derived for the hyperparameters in the model. The estimators proposed have robust behavior even for small numbers of arrays and allow for incomplete data arising from spot filtering or spot quality weights. The posterior odds statistic is reformulated in terms of a moderated t-statistic in which posterior residual standard deviations are used in place of ordinary standard deviations. The empirical Bayes approach is equivalent to shrinkage of the estimated sample variances towards a pooled estimate, resulting in far more stable inference when the number of arrays is small. The use of moderated t-statistics has the advantage over the posterior odds that the number of hyperparameters which need to estimated is reduced; in particular, knowledge of the non-null prior for the fold changes are not required. The moderated t-statistic is shown to follow a t-distribution with augmented degrees of freedom. The moderated t inferential approach extends to accommodate tests of composite null hypotheses through the use of moderated F-statistics. The performance of the methods is demonstrated in a simulation study. Results are presented for two publicly available data sets.{\textless}/p{\textgreater}},
author = {Smyth, Gordon K},
doi = {10.2202/1544-6115.1027},
issn = {1544-6115},
journal = {Statistical Applications in Genetics and Molecular Biology},
mendeley-groups = {Springer},
month = {jan},
number = {1},
pages = {1--25},
pmid = {16646809},
title = {{Linear Models and Empirical Bayes Methods for Assessing Differential Expression in Microarray Experiments}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/16646809 https://www.degruyter.com/view/j/sagmb.2004.3.issue-1/sagmb.2004.3.1.1027/sagmb.2004.3.1.1027.xml},
volume = {3},
year = {2004}
}
@article{Tusher2001,
abstract = {Microarrays can measure the expression of thousands of genes to identify changes in expression between different biological states. Methods are needed to determine the significance of these changes while accounting for the enormous number of genes. We describe a method, Significance Analysis of Microarrays (SAM), that assigns a score to each gene on the basis of change in gene expression relative to the standard deviation of repeated measurements. For genes with scores greater than an adjustable threshold, SAM uses permutations of the repeated measurements to estimate the percentage of genes identified by chance, the false discovery rate (FDR). When the transcriptional response of human cells to ionizing radiation was measured by microarrays, SAM identified 34 genes that changed at least 1.5-fold with an estimated FDR of 12{\%}, compared with FDRs of 60 and 84{\%} by using conventional methods of analysis. Of the 34 genes, 19 were involved in cell cycle regulation and 3 in apoptosis. Surprisingly, four nucleotide excision repair genes were induced, suggesting that this repair pathway for UV-damaged DNA might play a previously unrecognized role in repairing DNA damaged by ionizing radiation.},
author = {Tusher, V G and Tibshirani, R and Chu, G},
doi = {10.1073/pnas.091062498},
issn = {0027-8424},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
mendeley-groups = {Springer},
month = {apr},
number = {9},
pages = {5116--21},
pmid = {11309499},
title = {{Significance analysis of microarrays applied to the ionizing radiation response.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/11309499 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC33173},
volume = {98},
year = {2001}
}
@article{Chrominski2015,
abstract = {Motivation When we were asked for help with high-level microarray data analysis (on Affymetrix HGU-133A microarray), we faced the problem of selecting an appropriate method. We wanted to select a method that would yield "the best result" (detected as many "really" differentially expressed genes (DEGs) as possible, without false positives and false negatives). However, life scientists could not help us – they use their "favorite" method without special argumentation. We also did not find any norm or recommendation. Therefore, we decided to examine it for our own purpose. We considered whether the results obtained using different methods of high-level microarray data analyses Significant Analysis of Microarrays, Rank Products, Bland-Altman, Mann-Whitney test, T test and the Linear Models for Microarray Data – would be in agreement. Initially, we conducted a comparative analysis of the results on eight real data sets from microarray experiments (from the Array Express database). The results were surprising. On the same array set, the set of DEGs by different methods were significantly different. We also applied the methods to artificial data sets and determined some measures that allow the preparation of the overall scoring of tested methods for future recommendation. Results We found a very low level concordance of results from tested methods on real array sets. The number of common DEGs (detected by all six methods on fixed array sets, checked on eight array sets) ranged from 6 to 433 (22,283 total array readings). Results on artificial data sets were better than those on the real data. However, they were not fully satisfying. We scored tested methods on accuracy, recall, precision, f-measure and Matthews correlation coefficient. Based on the overall scoring, the best methods were SAM and LIMMA. We also found TT to be acceptable. The worst scoring was MW. Based on our study, we recommend: 1. Carefully taking into account the need for study when choosing a method, 2. Making high-level analysis with more than one method and then only taking the genes that are common to all methods (which seems to be reasonable) and 3. Being very careful (while summarizing facts) about sets of differentially expressed genes: different methods discover different sets of DEGs.},
author = {Chrominski, Kornel and Tkacz, Magdalena},
doi = {10.1371/JOURNAL.PONE.0128845},
file = {::},
issn = {1932-6203},
journal = {PLOS ONE},
mendeley-groups = {Springer},
number = {6},
pages = {e0128845},
publisher = {Public Library of Science},
title = {{Comparison of High-Level Microarray Analysis Methods in the Context of Result Consistency}},
url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0128845{\#}sec011},
volume = {10},
year = {2015}
}
@article{Tan2011,
abstract = {Identification of genes differentially expressed across multiple conditions has become an important statistical problem in analyzing large-scale microarray data. Many statistical methods have been developed to address the challenging problem. Therefore, an extensive comparison among these statistical methods is extremely important for experimental scientists to choose a valid method for their data analysis. In this study, we conducted simulation studies to compare six statistical methods: the Bonferroni (B-) procedure, the Benjamini and Hochberg (BH-) procedure, the Local false discovery rate (Localfdr) method, the Optimal Discovery Procedure (ODP), the Ranking Analysis of F-statistics (RAF), and the Significant Analysis of Microarray data (SAM) in identifying differentially expressed genes. We demonstrated that the strength of treatment effect, the sample size, proportion of differentially expressed genes and variance of gene expression will significantly affect the performance of different methods. The simulated results show that ODP exhibits an extremely high power in indentifying differentially expressed genes, but significantly underestimates the False Discovery Rate (FDR) in all different data scenarios. The SAM has poor performance when the sample size is small, but is among the best-performing methods when the sample size is large. The B-procedure is stringent and thus has a low power in all data scenarios. Localfdr and RAF show comparable statistical behaviors with the BH-procedure with favorable power and conservativeness of FDR estimation. RAF performs the best when proportion of differentially expressed genes is small and treatment effect is weak, but Localfdr is better than RAF when proportion of differentially expressed genes is large.},
author = {Tan, Yuande and Liu, Yin},
file = {::},
issn = {0973-2063},
journal = {Bioinformation},
mendeley-groups = {Springer},
number = {8},
pages = {400--4},
pmid = {22347782},
publisher = {Biomedical Informatics Publishing Group},
title = {{Comparison of methods for identifying differentially expressed genes across multiple conditions from microarray data.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22347782 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3280440},
volume = {7},
year = {2011}
}
@article{Allison2006,
abstract = {In just a few years, microarrays have gone from obscurity to being almost ubiquitous in biological research. At the same time, the statistical methodology for microarray analysis has progressed from simple visual assessments of results to a weekly deluge of papers that describe purportedly novel algorithms for analysing changes in gene expression. Although the many procedures that are available might be bewildering to biologists who wish to apply them, statistical geneticists are recognizing commonalities among the different methods. Many are special cases of more general models, and points of consensus are emerging about the general approaches that warrant use and elaboration.},
author = {Allison, David B. and Cui, Xiangqin and Page, Grier P. and Sabripour, Mahyar},
doi = {10.1038/nrg1749},
file = {:C$\backslash$:/Users/asanc/Dropbox (VHIR)/Classes/Analisi{\_}Dades{\_}Omiques/Materials complementaris/Allison-IMPRESCINDIBLE.pdf (Case Conflict 1):pdf (Case Conflict 1)},
isbn = {1471-0056 (Print)$\backslash$r1471-0056 (Linking)},
issn = {14710056},
journal = {Nature Reviews Genetics},
mendeley-groups = {Springer},
number = {1},
pages = {55--65},
pmid = {16369572},
title = {{Microarray data analysis: From disarray to consolidation and consensus}},
volume = {7},
year = {2006}
}
@article{GregoryAlvord2007,
author = {{Gregory Alvord}, W. and Roayaei, J. A. and Quinones, O. A. and Schneider, K. T.},
doi = {10.1093/bib/bbm043},
file = {::},
issn = {1467-5463},
journal = {Briefings in Bioinformatics},
mendeley-groups = {Springer},
month = {may},
number = {6},
pages = {415--431},
publisher = {Oxford University Press},
title = {{A microarray analysis for differential gene expression in the soybean genome using Bioconductor and R}},
url = {https://academic.oup.com/bib/article-lookup/doi/10.1093/bib/bbm043},
volume = {8},
year = {2007}
}
@article{Benjamini1995,
author = {Benjamini, Yoav and Hochberg, Yosef},
file = {::},
journal = {Source Journal of the Royal Statistical Society. Series B (Methodological)},
mendeley-groups = {Springer},
number = {1},
pages = {289--300},
title = {{Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}},
url = {http://www.jstor.org/stable/2346101 http://www.jstor.org/ http://www.jstor.org/action/showPublisher?publisherCode=black.},
volume = {57},
year = {1995}
}
@Article{biobase,
author = {{Huber} and {W.} and {Carey} and V. J. and {Gentleman} and {R.} and {Anders} and {S.} and {Carlson} and {M.} and {Carvalho} and B. S. and {Bravo} and H. C. and {Davis} and {S.} and {Gatto} and {L.} and {Girke} and {T.} and {Gottardo} and {R.} and {Hahne} and {F.} and {Hansen} and K. D. and {Irizarry} and R. A. and {Lawrence} and {M.} and {Love} and M. I. and {MacDonald} and {J.} and {Obenchain} and {V.} and {{Ole's}} and A. K. and {{Pag`es}} and {H.} and {Reyes} and {A.} and {Shannon} and {P.} and {Smyth} and G. K. and {Tenenbaum} and {D.} and {Waldron} and {L.} and {Morgan} and {M.}},
title = {{O}rchestrating high-throughput genomic analysis with {B}ioconductor},
journal = {Nature Methods},
year = {2015},
volume = {12},
number = {2},
pages = {115--121},
url = {http://www.nature.com/nmeth/journal/v12/n2/full/nmeth.3252.html},
}
@Manual{genefilter,
title = {genefilter: genefilter: methods for filtering genes from high-throughput
experiments},
author = {R. Gentleman and V. Carey and W. Huber and F. Hahne},
year = {2017},
note = {R package version 1.60.0},
}
@Manual{xtable,
title = {xtable: Export Tables to LaTeX or HTML},
author = {David B. Dahl},
year = {2016},
note = {R package version 1.8-2},
url = {https://CRAN.R-project.org/package=xtable},
}
@incollection{Sanchez-Pla2014,
abstract = {DNA microarray is a recent technology in which a high number of nucleic acid sequences are bound to a surface and are used to identify and quantify the DNA on a sample by letting both groups of sequences, in the sample and on the array, to hybridize, and subsequently identifying the hybridized sequences. Microarrays have been applied to all types of biological and medical problems, from cancer prognosis to the study of circadian cycles and fruit ripening. The most common types of applications are in gene expression but they have also been heavily used to quantify genetic variation, to detect aberrant numbers of copies associated with diseases, and in many other situations. Microarrays have been the technique of choice during the first decade of the twenty-first century for many applications, but with the advent of next-generation sequencing techniques it may be expected that some of their applications are adopted by this new technology, especially in those applications where microarrays show some limitations.},
author = {S{\'{a}}nchez-Pla, Alex},
editor = {{Carolina Sim{\'{o}}}, Alejandro Cifuentes and Virginia Garc{\'{i}}a-Ca{\~{n}}as},
file = {:C$\backslash$:/Users/asanc/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/S{\'{a}}nchez-Pla - 2014 - Chapter 1 - DNA Microarrays Technology Overview and Current Status.html:html},
isbn = {0166-526X},
keywords = {Gene Expression,High throughput,microarrays},
mendeley-groups = {Springer},
mendeley-tags = {Gene Expression,High throughput,microarrays},
pages = {1--23},
publisher = {Elsevier},
series = {Fundamentals of Advanced Omics Technologies: From Genes to Metabolites},
shorttitle = {Chapter 1 - DNA Microarrays Technology},
title = {DNA Microarrays Technology: Overview and Current Status},
url = {http://www.sciencedirect.com/science/article/pii/B9780444626516000015},
volume = {Volume 63},
year = {2014}
}
@article{Sanchez-Pla2012,
abstract = {Transcriptomics has emerged as a powerful approach for biomarker discovery. In the present review, the two main types of high throughput transcriptomic technologies - microarrays and next generation sequencing - that can be used to identify candidate biomarkers are briefly described. Microarrays, the mainstream technology of the last decade, have provided hundreds of valuable datasets in a wide variety of diseases including multiple sclerosis (MS), in which this approach has been used to disentangle different aspects of its complex pathogenesis. RNA-seq, the current next generation sequencing approach, is expected to provide similar power as microarrays but extending their capabilities to aspects up to now more difficult to analyse such as alternative splicing and discovery of novel transcripts.},
author = {S{\'{a}}nchez-Pla, Alex and Reverter, Ferran and {Ru{\'{i}}z de Villa}, M. Carme and Comabella, Manuel},
doi = {10.1016/j.jneuroim.2012.04.008},
issn = {01655728},
journal = {Journal of Neuroimmunology},
mendeley-groups = {Elsevier},
month = {jul},
number = {1-2},
pages = {23--31},
pmid = {22626445},
title = {{Transcriptomics: mRNA and alternative splicing}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22626445 http://linkinghub.elsevier.com/retrieve/pii/S0165572812001233},
volume = {248},
year = {2012}
}
@Manual{mogene21,
title = {mogene21sttranscriptcluster.db: Affymetrix mogene21 annotation data (chip
mogene21sttranscriptcluster)},
author = {James W. MacDonald},
year = {2017},
note = {R package version 8.7.0},
}
@Manual{anotate,
title = {annotate: Annotation for microarrays},
author = {R. Gentleman},
year = {2017},
note = {R package version 1.56.1},
}
@Manual{orgMm,
title = {org.Mm.eg.db: Genome wide annotation for Mouse},
author = {Marc Carlson},
year = {2017},
note = {R package version 3.5.0},
}
@Manual{gplots,
title = {gplots: Various R Programming Tools for Plotting Data},
author = {Gregory R. Warnes and Ben Bolker and Lodewijk Bonebakker and Robert Gentleman and Wolfgang Huber Andy Liaw and Thomas Lumley and Martin Maechler and Arni Magnusson and Steffen Moeller and Marc Schwartz and Bill Venables},
year = {2016},
note = {R package version 3.0.1},
url = {https://CRAN.R-project.org/package=gplots},
}
@book{Draghici2012,
abstract = {2nd ed. "Richly illustrated in color, Statistics and Data Analysis for Microarrays Using R and Bioconductor, Second Edition provides a clear and rigorous description of powerful analysis techniques and algorithms for mining and interpreting biological information. Omitting tedious details, heavy formalisms, and cryptic notations, the text takes a hands-on, example-based approach that teaches students the basics of R and microarray technology as well as how to choose and apply the proper data analysis tool to specific problems. New to the Second Edition Completely updated and double the size of its predecessor, this timely second edition replaces the commercial software with the open source R and Bioconductor environments. Fourteen new chapters cover such topics as the basic mechanisms of the cell, reliability and reproducibility issues in DNA microarrays, basic statistics and linear models in R, experiment design, multiple comparisons, quality control, data pre-processing and normalization, Gene Ontology analysis, pathway analysis, and machine learning techniques. Methods are illustrated with toy examples and real data and the R code for all routines is available on an accompanying CD-ROM. With all the necessary prerequisites included, this best-selling book guides students from very basic notions to advanced analysis techniques in R and Bioconductor. The first half of the text presents an overview of microarrays and the statistical elements that form the building blocks of any data analysis. The second half introduces the techniques most commonly used in the analysis of microarray data"-- "Preface Although the industry once suffered from a lack of qualified targets and candidate drugs, lead scientists must now decide where to start amidst the overload of biological data. In our opinion, this phenomenon has shifted the bottleneck in drug discovery from data collection to data anal- ysis, interpretation and integration. Life Science Informatics, UBS Warburg Market Report, 2001 One of the most promising tools available today to researchers in life sciences is the microarray technology. Typically, one DNA array will provide hundreds or thousands of gene expression values. However, the immense potential of this technology can only be realized if many such experiments are done. In order to understand the biological phenomena, expression levels need to be compared between species or between healthy and ill individuals or at different time points for the same individual or population of individuals. This approach is currently generating an immense quantity of data. Buried under this humongous pile of numbers lays invaluable biological information. The keys to understanding phenomena from fetal development to cancer may be found in these numbers. Clearly, powerful analysis techniques and algorithms are essential tools in mining these data. However, the computer scientist or statistician that does have the expertise to use advanced analysis techniques usually lacks the biological knowledge necessary to understand even the simplest biological phenomena. At the same time, the scientist having the right background to formulate and test biological hypotheses may feel a little uncomfortable when it comes to analyzing the data thus generated"-- The Cell and Its Basic Mechanisms -- Microarrays -- Reliability and Reproducibility Issues in DNA Microarray Measurements -- Image Processing -- Introduction to R -- Bioconductor: Principles and Illustrations -- Elements of Statistics -- Probability Distributions -- Basic Statistics in R -- Statistical Hypothesis Testing -- Classical Approaches to Data Analysis -- Analysis of Variance (ANOVA) -- Linear Models in R -- Experiment Design -- Multiple Comparisons -- Analysis and Visualization Tools -- Cluster Analysis -- Quality Control -- Data Pre-Processing and Normalization. Methods for Selecting Differentially Regulated Genes -- The Gene Ontology (GO) -- Functional Analysis and Biological Interpretation of Microarray Data -- Uses, Misuses, and Abuses in GO Profiling -- A Comparison of Several Tools for Ontological Analysis -- Focused Microarrays -- Comparison and Selection -- ID Mapping Issues -- Pathway Analysis -- Machine Learning Techniques -- The Road Ahead.},
author = {Draghici, Sorin},
isbn = {9781439809754},
mendeley-groups = {Springer},
pages = {1042},
publisher = {CRC Press},
title = {{Statistics and data analysis for microarrays using R and Bioconductor}},
url = {https://www.crcpress.com/Statistics-and-Data-Analysis-for-Microarrays-Using-R-and-Bioconductor/Draghici/p/book/9781439809754},
year = {2012}
}
@book{Efron2013,
abstract = {First paperback edition. We live in a new age for statistical inference, where modern scientific technology such as microarrays and fMRI machines routinely produce thousands and sometimes millions of parallel data sets, each with its own estimation or testing problem. Doing thousands of problems at once is more than repeated application of classical methods. Taking an empirical Bayes approach, Bradley Efron, inventor of the bootstrap, shows how information accrues across problems in a way that combines Bayesian and frequentist ideas. Estimation, testing, and prediction blend in this framework, producing opportunities for new methodologies of increased power. New difficulties also arise, easily leading to flawed inferences. This book takes a careful look at both the promise and pitfalls of large-scale statistical inference, with particular attention to false discovery rates, the most successful of the new statistical techniques. Emphasis is on the inferential ideas underlying technical developments, illustrated using a large number of real examples.},
author = {Efron, Bradley},
isbn = {9781107619678},
mendeley-groups = {Springer},
pages = {263},
publisher = {Cambridge University Press},
title = {{Large-scale inference : empirical Bayes methods for estimation, testing, and prediction}},
url = {http://admin.cambridge.org/academic/subjects/statistics-probability/statistical-theory-and-methods/large-scale-inference-empirical-bayes-methods-estimation-testing-and-prediction},
year = {2013}
}
@article{Khatri2005,
abstract = {Independent of the platform and the analysis methods used, the result of a microarray experiment is, in most cases, a list of differentially expressed genes. An automatic ontological analysis approach has been recently proposed to help with the biological interpretation of such results. Currently, this approach is the de facto standard for the secondary analysis of high throughput experiments and a large number of tools have been developed for this purpose. We present a detailed comparison of 14 such tools using the following criteria: scope of the analysis, visualization capabilities, statistical model(s) used, correction for multiple comparisons, reference microarrays available, installation issues and sources of annotation data. This detailed analysis of the capabilities of these tools will help researchers choose the most appropriate tool for a given type of analysis. More importantly, in spite of the fact that this type of analysis has been generally adopted, this approach has several important intrinsic drawbacks. These drawbacks are associated with all tools discussed and represent conceptual limitations of the current state-of-the-art in ontological analysis. We propose these as challenges for the next generation of secondary data analysis tools.},
author = {Khatri, Purvesh and Draghici, Sorin},
doi = {10.1093/bioinformatics/bti565},
issn = {1367-4803},
journal = {Bioinformatics (Oxford, England)},
keywords = {Algorithms,Computational Biology,Computer Graphics,DNA,Data Interpretation,Database Management Systems,Databases,Gene Expression Profiling,Gene Expression Regulation,Genetic,Humans,Models,Oligonucleotide Array Sequence Analysis,Reference Standards,Sequence Analysis,Software,Statistical},
language = {eng},
mendeley-groups = {Springer},
month = {sep},
number = {18},
pages = {3587--3595},
pmid = {15994189},
shorttitle = {Ontological analysis of gene expression data},
title = {{Ontological analysis of gene expression data: current tools, limitations, and open problems}},
volume = {21},
year = {2005}
}
@article{Khatri2012,
abstract = {Pathway analysis has become the first choice for gaining insight into the underlying biology of differentially expressed genes and proteins, as it reduces complexity and has increased explanatory power. We discuss the evolution of knowledge base-driven pathway analysis over its first decade, distinctly divided into three generations. We also discuss the limitations that are specific to each generation, and how they are addressed by successive generations of methods. We identify a number of annotation challenges that must be addressed to enable development of the next generation of pathway analysis methods. Furthermore, we identify a number of methodological challenges that the next generation of methods must tackle to take advantage of the technological advances in genomics and proteomics in order to improve specificity, sensitivity, and relevance of pathway analysis.},
author = {Khatri, Purvesh and Sirota, Marina and Butte, Atul J.},
doi = {10.1371/journal.pcbi.1002375},
file = {:C$\backslash$:/Users/asanc/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Khatri, Sirota, Butte - 2012 - Ten Years of Pathway Analysis Current Approaches and Outstanding Challenges.html:html},
issn = {1553-7358},
journal = {PLOS Computational Biology},
keywords = {Apoptosis,Gene Expression,Genome analysis,Genome annotation,Genomic databases,Human genomics,Proteomic databases,microarrays},
mendeley-groups = {Springer},
mendeley-tags = {Apoptosis,Gene Expression,Genome analysis,Genome annotation,Genomic databases,Human genomics,Proteomic databases,microarrays},
month = {feb},
number = {2},
pages = {e1002375},
shorttitle = {Ten Years of Pathway Analysis},
title = {{Ten Years of Pathway Analysis: Current Approaches and Outstanding Challenges}},
url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002375 http://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1002375{\&}type=printable},
volume = {8},
year = {2012}
}
@article{Yu2016,
abstract = {Reactome is a manually curated pathway annotation database for unveiling high-order biological pathways from high-throughput data. ReactomePA is an R/Bioconductor package providing enrichment analyses, including hypergeometric test and gene set enrichment analyses. A functional analysis can be applied to the genomic coordination obtained from a sequencing experiment to analyze the functional significance of genomic loci including cis-regulatory elements and non-coding regions. Comparison among different experiments is also supported. Moreover, ReactomePA provides several visualization functions to produce highly customizable, publication-quality figures. The source code and documents of ReactomePA are freely available through Bioconductor (http://www.bioconductor.org/packages/ReactomePA).},
author = {Yu, Guangchuang and He, Qing-Yu},
doi = {10.1039/c5mb00663e},
issn = {1742-2051},
journal = {Molecular bioSystems},
mendeley-groups = {Springer},
month = {feb},
number = {2},
pages = {477--9},
pmid = {26661513},
title = {{ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/26661513},
volume = {12},
year = {2016}
}
@incollection{Mehta2011,
abstract = {A typical microarray experiment results in series of images, depending on the experimental design and number of samples. Software analyses the images to obtain the intensity at each spot and quantify the expression for each transcript. This is followed by normalization, and then various data analysis techniques are applied on the data. The whole analysis pipeline requires a large number of software to accurately handle the massive amount of data. Fortunately, there are large number of freely available and commercial software to churn the massive amount of data to manageable sets of differentially expressed genes, functions, and pathways. This chapter describes the software and tools which can be used to analyze the gene expression data right from the image analysis to gene list, ontology, and pathways.},
author = {Mehta, Jai Prakash and Rani, Sweta},
booktitle = {Methods in molecular biology (Clifton, N.J.)},
doi = {10.1007/978-1-61779-289-2_4},
issn = {1940-6029},
mendeley-groups = {Springer},
pages = {41--53},
pmid = {21898212},
title = {{Software and Tools for Microarray Data Analysis}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/21898212 http://link.springer.com/10.1007/978-1-61779-289-2{\_}4},
volume = {784},
year = {2011}
}
@incollection{Clough2016,
abstract = {The Gene Expression Omnibus (GEO) database is an international public repository that archives and freely distributes high-throughput gene expression and other functional genomics data sets. Created in 2000 as a worldwide resource for gene expression studies, GEO has evolved with rapidly changing technologies and now accepts high-throughput data for many other data applications, including those that examine genome methylation, chromatin structure, and genome-protein interactions. GEO supports community-derived reporting standards that specify provision of several critical study elements including raw data, processed data, and descriptive metadata. The database not only provides access to data for tens of thousands of studies, but also offers various Web-based tools and strategies that enable users to locate data relevant to their specific interests, as well as to visualize and analyze the data. This chapter includes detailed descriptions of methods to query and download GEO data and use the analysis and visualization tools. The GEO homepage is at http://www.ncbi.nlm.nih.gov/geo/.},
author = {Clough, Emily and Barrett, Tanya},
booktitle = {Methods in molecular biology (Clifton, N.J.)},
doi = {10.1007/978-1-4939-3578-9_5},
issn = {1940-6029},
keywords = {Data mining,Database,Functional genomics,Gene expression,High-throughput sequencing,Microarray},
mendeley-groups = {Springer},
pages = {93--110},
pmid = {27008011},
title = {{The Gene Expression Omnibus Database}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/27008011 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4944384 http://link.springer.com/10.1007/978-1-4939-3578-9{\_}5},
volume = {1418},
year = {2016}
}