-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathreferences.bib
974 lines (873 loc) · 38.8 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
@book{mingo2017free,
title={Free probability and random matrices},
author={Mingo, James A and Speicher, Roland},
volume={35},
publisher={Springer},
year={2017}
}
@article{adlam2019random,
title={A random matrix perspective on mixtures of nonlinearities for deep learning},
author={Adlam, Ben and Levinson, Jake and Pennington, Jeffrey},
journal={arXiv preprint arXiv:1912.00827},
year={2019}
}
@article{jacot2020kernel,
title={Kernel alignment risk estimator: risk prediction from training data},
author={Jacot, Arthur and {\c{S}}im{\c{s}}ek, Berfin and Spadaro, Francesco and Hongler, Cl{\'e}ment and Gabriel, Franck},
journal={arXiv preprint arXiv:2006.09796},
year={2020}
}
@article{bartlett2020benign,
title={Benign overfitting in linear regression},
author={Bartlett, Peter L and Long, Philip M and Lugosi, G{\'a}bor and Tsigler, Alexander},
journal={Proceedings of the National Academy of Sciences},
volume={117},
number={48},
pages={30063--30070},
year={2020},
publisher={National Acad Sciences}
}
@article{mitra2019understanding,
title={Understanding overfitting peaks in generalization error: Analytical risk curves for $ l\_2 $ and $ l\_1 $ penalized interpolation},
author={Mitra, Partha P},
journal={arXiv preprint arXiv:1906.03667},
year={2019}
}
@article{belkin2020two,
title={Two models of double descent for weak features},
author={Belkin, Mikhail and Hsu, Daniel and Xu, Ji},
journal={SIAM Journal on Mathematics of Data Science},
volume={2},
number={4},
pages={1167--1180},
year={2020},
publisher={SIAM}
}
@inproceedings{rakhlin2019consistency,
title={Consistency of interpolation with laplace kernels is a high-dimensional phenomenon},
author={Rakhlin, Alexander and Zhai, Xiyu},
booktitle={Conference on Learning Theory},
pages={2595--2623},
year={2019},
organization={PMLR}
}
@article{liang2020just,
title={Just interpolate: Kernel “ridgeless” regression can generalize},
author={Liang, Tengyuan and Rakhlin, Alexander},
journal={The Annals of Statistics},
volume={48},
number={3},
pages={1329--1347},
year={2020},
publisher={Institute of Mathematical Statistics}
}
@inproceedings{belkin2018understand,
title={To understand deep learning we need to understand kernel learning},
author={Belkin, Mikhail and Ma, Siyuan and Mandal, Soumik},
booktitle={International Conference on Machine Learning},
pages={541--549},
year={2018},
organization={PMLR}
}
@article{nakkiran2019deep,
title={Deep double descent: Where bigger models and more data hurt},
author={Nakkiran, Preetum and Kaplun, Gal and Bansal, Yamini and Yang, Tristan and Barak, Boaz and Sutskever, Ilya},
journal={arXiv preprint arXiv:1912.02292},
year={2019}
}
@article{advani2020high,
title={High-dimensional dynamics of generalization error in neural networks},
author={Advani, Madhu S and Saxe, Andrew M and Sompolinsky, Haim},
journal={Neural Networks},
volume={132},
pages={428--446},
year={2020},
publisher={Elsevier}
}
@article{belkin2019reconciling,
title={Reconciling modern machine-learning practice and the classical bias--variance trade-off},
author={Belkin, Mikhail and Hsu, Daniel and Ma, Siyuan and Mandal, Soumik},
journal={Proceedings of the National Academy of Sciences},
volume={116},
number={32},
pages={15849--15854},
year={2019},
publisher={National Acad Sciences}
}
@article{zhang2021understanding,
title={Understanding deep learning (still) requires rethinking generalization},
author={Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
journal={Communications of the ACM},
volume={64},
number={3},
pages={107--115},
year={2021},
publisher={ACM New York, NY, USA}
}
@article{canziani2016analysis,
title={An analysis of deep neural network models for practical applications},
author={Canziani, Alfredo and Paszke, Adam and Culurciello, Eugenio},
journal={arXiv preprint arXiv:1605.07678},
year={2016}
}
@article {bun2016cleaning,
AUTHOR = {Bun, Jo\"{e}l and Bouchaud, Jean-Philippe and Potters, Marc},
TITLE = {Cleaning large correlation matrices: tools from random matrix
theory},
JOURNAL = {Phys. Rep.},
FJOURNAL = {Physics Reports. A Review Section of Physics Letters},
VOLUME = {666},
YEAR = {2017},
PAGES = {1--109},
ISSN = {0370-1573},
MRCLASS = {60B20 (15B52 65F99 91G10)},
MRNUMBER = {3590056},
DOI = {10.1016/j.physrep.2016.10.005},
URL = {https://10.1016/j.physrep.2016.10.005},
}
@InProceedings{pennington2017geometry,
title = {Geometry of Neural Network Loss Surfaces via Random Matrix Theory},
author = {Jeffrey Pennington and Yasaman Bahri},
booktitle = {Proceedings of the 34th International Conference on Machine Learning},
pages = {2798--2806},
year = {2017},
volume = {70},
series = {Proceedings of Machine Learning Research},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v70/pennington17a/pennington17a.pdf},
url = {
http://proceedings.mlr.press/v70/pennington17a.html
},
}
@article {baik2006eigenvalues,
AUTHOR = {Baik, Jinho and Silverstein, Jack W.},
TITLE = {Eigenvalues of large sample covariance matrices of spiked
population models},
JOURNAL = {J. Multivariate Anal.},
FJOURNAL = {Journal of Multivariate Analysis},
VOLUME = {97},
YEAR = {2006},
NUMBER = {6},
PAGES = {1382--1408},
DOI = {10.1016/j.jmva.2005.08.003},
URL = {https://10.1016/j.jmva.2005.08.003},
}
@book{bai2010spectral,
title={\href{https://link.springer.com/book/10.1007\%2F978-1-4419-0661-8}{Spectral analysis of large dimensional random matrices}},
author={Bai, Z. and Silverstein, J.},
volume={20},
year={2010},
publisher={Springer}
}
@phdthesis{liao2019random,
author = {Zhenyu Liao},
title = {A random matrix framework for large dimensional
machine learning and neural networks},
school = {Université Paris-Saclay},
year = 2019
}
@article{domingo2020average,
title={Average-case acceleration for bilinear games and normal matrices},
author={Domingo-Enrich, Carles and Pedregosa, Fabian and Scieur, Damien},
journal={arXiv preprint arXiv:2010.02076},
year={2020}
}
@inproceedings{montgomery1973pair,
title={The pair correlation of zeros of the zeta function},
author={Montgomery, Hugh L},
booktitle={Proc. Symp. Pure Math},
year={1973},
url={http://www-personal.umich.edu/~hlm/paircor1.pdf}
}
@book{anderson2010introduction,
title={An introduction to random matrices},
author={Anderson, Greg W and Guionnet, Alice and Zeitouni, Ofer},
year={2010},
publisher={Cambridge university press}
}
@article{bouchaud2009financial,
title={Financial applications of random matrix theory: a short review},
author={Bouchaud, Jean-Philippe and Potters, Marc},
journal={arXiv preprint arXiv:0910.1205},
year={2009},
url={https://arxiv.org/pdf/0910.1205}
}
@article{erdos1960evolution,
title={On the evolution of random graphs},
author={Erdos, Paul and R{\'e}nyi, Alfr{\'e}d},
journal={Publ. Math. Inst. Hung. Acad. Sci},
year={1960},
url={http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.348.530&rep=rep1&type=pdf}
}
@book{tulino2004random,
title={Random matrix theory and wireless communications},
author={Tulino, Antonia M and Verd{\'u}, Sergio and Verdu, Sergio},
year={2004},
publisher={Now Publishers Inc},
url={http://dx.doi.org/10.1561/0100000001}
}
@article{wishart1928generalised,
title={The generalised product moment distribution in samples from a normal multivariate population},
author={Wishart, John},
journal={Biometrika},
year={1928},
publisher={JSTOR},
url={https://doi.org/10.2307/2331939}
}
@inproceedings{choromanska2015loss,
title={The loss surfaces of multilayer networks},
author={Choromanska, Anna and Henaff, Mikael and Mathieu, Michael and Arous, G{\'e}rard Ben and LeCun, Yann},
booktitle={Artificial intelligence and statistics},
year={2015},
organization={PMLR},
url={https://arxiv.org/pdf/1412.0233.pdf}
}
@inproceedings{yao2020pyhessian,
title={Pyhessian: Neural networks through the lens of the hessian},
author={Yao, Zhewei and Gholami, Amir and Keutzer, Kurt and Mahoney, Michael W},
booktitle={2020 IEEE International Conference on Big Data (Big Data)},
year={2020},
organization={IEEE},
url={https://github.com/amirgholami/PyHessian}
}
@book{mehta2004random,
title={Random matrices},
author={Mehta, Madan Lal},
year={2004},
publisher={Elsevier},
url={https://www.elsevier.com/books/random-matrices/lal-mehta/978-0-12-088409-4}
}
@inproceedings{pedregosa2020acceleration,
title={Acceleration through spectral density estimation},
author={Pedregosa, Fabian and Scieur, Damien},
booktitle={International Conference on Machine Learning},
year={2020},
url={https://arxiv.org/pdf/2002.04756.pdf},
organization={PMLR}
}
@inproceedings{lacotte2020optimal,
title={Optimal randomized first-order methods for least-squares problems},
author={Lacotte, Jonathan and Pilanci, Mert},
booktitle={International Conference on Machine Learning},
year={2020},
organization={PMLR},
url={https://arxiv.org/pdf/2002.09488.pdf}
}
@inproceedings{adlam2020neural,
title={The neural tangent kernel in high dimensions: Triple descent and a multi-scale theory of generalization},
author={Adlam, Ben and Pennington, Jeffrey},
booktitle={International Conference on Machine Learning},
year={2020},
url={https://arxiv.org/pdf/2008.06786.pdf},
organization={PMLR}
}
@article{marvcenko1967distribution,
title={Distribution of eigenvalues for some sets of random matrices},
author={Mar{\v{c}}enko, Vladimir A and Pastur, Leonid Andreevich},
journal={Mathematics of the USSR-Sbornik},
year={1967},
url={https://iopscience.iop.org/article/10.1070/SM1967v001n04ABEH001994/meta},
publisher={IOP Publishing}
}
@article{liao2020random,
title={A random matrix analysis of random Fourier features: beyond the Gaussian kernel, a precise phase transition, and the corresponding double descent},
author={Liao, Zhenyu and Couillet, Romain and Mahoney, Michael W},
journal={arXiv preprint arXiv:2006.05013},
url={https://arxiv.org/pdf/2006.05013.pdf},
year={2020}
}
@article{halko2011finding,
title={Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions},
author={Halko, Nathan and Martinsson, Per-Gunnar and Tropp, Joel A},
journal={SIAM review},
year={2011},
publisher={SIAM},
url={https://www.jstor.org/stable/23065163}
}
@inproceedings{ghorbani2019investigation,
title={An investigation into neural net optimization via hessian eigenvalue density},
author={Ghorbani, Behrooz and Krishnan, Shankar and Xiao, Ying},
booktitle={International Conference on Machine Learning},
year={2019},
organization={PMLR},
url={https://arxiv.org/pdf/1901.10159.pdf}
}
@article{sagun2014explorations,
title={Explorations on high dimensional landscapes},
author={Sagun, Levent and Guney, V Ugur and Arous, Gerard Ben and LeCun, Yann},
journal={arXiv preprint arXiv:1412.6615},
year={2014},
url={https://arxiv.org/pdf/1412.6615.pdf}
}
@inproceedings{baity2018comparing,
title={Comparing dynamics: Deep neural networks versus glassy systems},
author={Baity-Jesi, Marco and Sagun, Levent and Geiger, Mario and Spigler, Stefano and Arous, G{\'e}rard Ben and Cammarota, Chiara and LeCun, Yann and Wyart, Matthieu and Biroli, Giulio},
booktitle={International Conference on Machine Learning},
year={2018},
url={https://arxiv.org/pdf/1803.06969.pdf},
organization={PMLR}
}
@article{gardner1988optimal,
title={Optimal storage properties of neural network models},
author={Gardner, Elizabeth and Derrida, Bernard},
journal={Journal of Physics A: Mathematical and general},
year={1988},
publisher={IOP Publishing},
url={http://www.phys.ens.fr/~derrida/PAPIERS/1988/optimal-storage.pdf}
}
@book{dotsenko1995introduction,
title={An introduction to the theory of spin glasses and neural networks},
author={Dotsenko, Viktor},
year={1995},
url={https://doi.org/10.1142/2460},
publisher={World Scientific}
}
@article{pennington2017nonlinear,
title={Nonlinear random matrix theory for deep learning},
author={Pennington, Jeffrey and Worah, Pratik},
year={2017},
url={https://papers.nips.cc/paper/2017/file/0f3d014eead934bbdbacb62a01dc4831-Paper.pdf}
}
@incollection{keating1993riemann,
title={The {Riemann} zeta-function and quantum chaology},
author={Keating, J},
booktitle={Quantum chaos},
year={1993},
publisher={Elsevier},
url={https://arxiv.org/pdf/0708.4223.pdf}
}
@article{odlyzko1987distribution,
title={On the distribution of spacings between zeros of the zeta function},
author={Odlyzko, Andrew M},
journal={Mathematics of Computation},
url={https://doi.org/10.1090/S0025-5718-1987-0866115-0},
year={1987}
}
@article{peche2019note,
title={A note on the pennington-worah distribution},
author={P{\'e}ch{\'e}, S and others},
journal={Electronic Communications in Probability},
year={2019},
url={https://doi.org/10.1214/19-ECP262},
publisher={The Institute of Mathematical Statistics and the Bernoulli Society}
}
@article{spielman2011spectral,
title={Spectral sparsification of graphs},
author={Spielman, Daniel A and Teng, Shang-Hua},
journal={SIAM Journal on Computing},
year={2011},
publisher={SIAM},
url={https://arxiv.org/pdf/0808.4134.pdf}
}
@article{paquette2020universality,
title={Universality for the conjugate gradient and MINRES algorithms on sample covariance matrices},
author={Paquette, Elliot and Trogdon, Thomas},
journal={arXiv preprint arXiv:2007.00640},
year={2020},
url={https://arxiv.org/pdf/2007.00640.pdf}
}
@inproceedings{williams2001using,
title={Using the Nystr{\"o}m method to speed up kernel machines},
author={Williams, Christopher and Seeger, Matthias},
booktitle={Proceedings of the 14th annual conference on neural information processing systems},
year={2001},
url={https://papers.nips.cc/paper/2000/hash/19de10adbaa1b2ee13f77f679fa1483a-Abstract.html}
}
@article{mei2019generalization,
title={The generalization error of random features regression: Precise asymptotics and double descent curve},
author={Mei, Song and Montanari, Andrea},
journal={arXiv preprint arXiv:1908.05355},
year={2019},
url={https://arxiv.org/pdf/1908.05355.pdf}
}
@article{drineas2005nystrom,
title={On the Nystr{\"o}m Method for Approximating a Gram Matrix for Improved Kernel-Based Learning.},
author={Drineas, Petros and Mahoney, Michael W and Cristianini, Nello},
journal={journal of machine learning research},
url={https://www.jmlr.org/papers/volume6/drineas05a/drineas05a.pdf},
year={2005},
}
@article{gower2015randomized,
title={Randomized iterative methods for linear systems},
author={Gower, Robert M and Richt{\'a}rik, Peter},
journal={SIAM Journal on Matrix Analysis and Applications},
year={2015},
url={https://arxiv.org/pdf/1506.03296.pdf},
publisher={SIAM}
}
@inproceedings{rudi2015less,
title={Less is More: Nystr{\"o}m Computational Regularization.},
author={Rudi, Alessandro and Camoriano, Raffaello and Rosasco, Lorenzo},
booktitle={Advances in Neural Information Processing Systems},
url={https://arxiv.org/pdf/1507.04717.pdf},
year={2015}
}
@article{drineas2018lectures,
title={Lectures on randomized numerical linear algebra},
author={Drineas, Petros and Mahoney, Michael W},
journal={The Mathematics of Data},
year={2018},
publisher={American Mathematical Soc.},
url={https://arxiv.org/pdf/1712.08880.pdf}
}
@article{amit1985spin,
title={Spin-glass models of neural networks},
author={Amit, Daniel J and Gutfreund, Hanoch and Sompolinsky, Haim},
journal={Physical Review A},
year={1985},
publisher={APS},
url={https://doi.org/10.1103/PhysRevA.32.1007}
}
@article{JMLR:v21:20-933,
author = {Vardan Papyan},
title = {Traces of Class/Cross-Class Structure Pervade Deep Learning Spectra},
journal = {Journal of Machine Learning Research},
year = {2020},
url = {http://jmlr.org/papers/v21/20-933.html}
}
@article{dauphin2014identifying,
title={Identifying and attacking the saddle point problem in high-dimensional non-convex optimization},
author={Dauphin, Yann and Pascanu, Razvan and Gulcehre, Caglar and Cho, Kyunghyun and Ganguli, Surya and Bengio, Yoshua},
journal={arXiv preprint arXiv:1406.2572},
year={2014},
URL={https://arxiv.org/pdf/1406.2572.pdf}
}
@article{martin2018implicit,
title={Implicit self-regularization in deep neural networks: Evidence from random matrix theory and implications for learning},
author={Martin, Charles H and Mahoney, Michael W},
journal={arXiv preprint arXiv:1810.01075},
year={2018},
url={https://arxiv.org/pdf/1810.01075.pdf}
}
@article{baskerville2021applicability,
title={Applicability of Random Matrix Theory in Deep Learning},
author={Baskerville, Nicholas P and Granziol, Diego and Keating, Jonathan P},
journal={arXiv preprint arXiv:2102.06740},
year={2021},
url={https://arxiv.org/pdf/2102.06740.pdf}
}
@article{hastie2019surprises,
title={Surprises in high-dimensional ridgeless least squares interpolation},
author={Hastie, Trevor and Montanari, Andrea and Rosset, Saharon and Tibshirani, Ryan J},
journal={arXiv preprint arXiv:1903.08560},
url={https://arxiv.org/pdf/1903.08560.pdf},
year={2019}
}
@article {bai2005CLT,
AUTHOR = {Bai, Z. D. and Silverstein, Jack W.},
TITLE = {C{LT} for linear spectral statistics of large-dimensional
sample covariance matrices},
JOURNAL = {Ann. Probab.},
FJOURNAL = {The Annals of Probability},
VOLUME = {32},
YEAR = {2004},
NUMBER = {1A},
PAGES = {553--605},
ISSN = {0091-1798},
MRCLASS = {60F05 (15A52 62G30 62H05)},
MRNUMBER = {2040792},
MRREVIEWER = {Sreenivasan Ravi},
DOI = {10.1214/aop/1078415845},
URL = {https://10.1214/aop/1078415845},
}
@article{krbalek2000statistical,
title={The statistical properties of the city transport in Cuernavaca (Mexico) and random matrix ensembles},
author={Krb{\'a}lek, Milan and Seba, Petr},
journal={Journal of Physics A: Mathematical and General},
year={2000},
publisher={IOP Publishing},
url={https://arxiv.org/pdf/nlin/0001015.pdf}
}
@article{jagannath2017random,
title={Random matrices and the New York City subway system},
author={Jagannath, Aukosh and Trogdon, Thomas},
journal={Physical Review E},
year={2017},
publisher={APS},
url={https://arxiv.org/pdf/1703.02537.pdf}
}
@article{wigner1955characteristic,
title={Characteristic Vectors of Bordered Matrices With Infinite Dimensions},
author={Wigner, Eugene},
journal={Annals of Mathematics},
year={1955},
publisher={JSTOR},
url={https://doi.org/10.1007/978-3-662-02781-3_35}
}
@article{Kuczynski1992,
author = {Kuczy{\'{n}}ski, J and Wo{\'{z}}niakowski, H},
doi = {10.1137/0613066},
issn = {0895-4798},
journal = {SIAM Journal on Matrix Analysis and Applications},
month = {oct},
number = {4},
pages = {1094--1122},
title = {{Estimating the Largest Eigenvalue by the Power and Lanczos Algorithms with a Random Start}},
url = {http://epubs.siam.org/doi/10.1137/0613066},
volume = {13},
year = {1992}
}
@inproceedings{Deshpande,
author = {Deshpande, A and Spielman, D A},
booktitle = {46th Annual IEEE Symposium on Foundations of Computer Science (FOCS'05)},
doi = {10.1109/SFCS.2005.44},
isbn = {0-7695-2468-0},
pages = {349--356},
publisher = {IEEE},
title = {{Improved Smoothed Analysis of the Shadow Vertex Simplex Method}},
url = {http://ieeexplore.ieee.org/document/1530727/},
year = {2005}
}
@article{DiagonalRMT,
author = {Pfrang, C W and Deift, P and Menon, G},
file = {:Users/thomastrogdon/Library/Application Support/Mendeley Desktop/Downloaded/Pfrang, Deift, Menon - 2014 - How long does it take to compute the eigenvalues of a random symmetric matrix.pdf:pdf},
journal = {Random matrix theory, interacting particle systems, and integrable systems, MSRI Publications},
pages = {411--442},
title = {{How long does it take to compute the eigenvalues of a random symmetric matrix?}},
url = {http://arxiv.org/abs/1203.4635},
volume = {65},
year = {2014}
}
@article{Vershynin2009a,
author = {Vershynin, R},
journal = {SIAM Journal on Computing},
title = {{Beyond Hirsch Conjecture: Walks on Random Polytopes and Smoothed Complexity of the Simplex Method}},
url = {http://epubs.siam.org/doi/10.1137/070683386},
volume = {39},
year = {2009}
}
@article{PhysRev.120.1698,
title = {"Repulsion of Energy Levels" in Complex Atomic Spectra},
author = {Rosenzweig, Norbert and Porter, Charles E.},
journal = {Phys. Rev.},
year = {1960},
url = {https://link.aps.org/doi/10.1103/PhysRev.120.1698}
}
@book{tao2012topics,
title={Topics in random matrix theory},
author={Tao, Terence},
year={2012},
publisher={American Mathematical Soc.}
}
@article{Liao2021,
abstract = {Given an optimization problem, the Hessian matrix and its eigenspectrum can be used in many ways, ranging from designing more efficient second-order algorithms to performing model analysis and regression diagnostics. When nonlinear models and non-convex problems are considered, strong simplifying assumptions are often made to make Hessian spectral analysis more tractable. This leads to the question of how relevant the conclusions of such analyses are for more realistic nonlinear models. In this paper, we exploit deterministic equivalent techniques from random matrix theory to make a $\backslash$emph{\{}precise{\}} characterization of the Hessian eigenspectra for a broad family of nonlinear models, including models that generalize the classical generalized linear models, without relying on strong simplifying assumptions used previously. We show that, depending on the data properties, the nonlinear response model, and the loss function, the Hessian can have $\backslash$emph{\{}qualitatively{\}} different spectral behaviors: of bounded or unbounded support, with single- or multi-bulk, and with isolated eigenvalues on the left- or right-hand side of the bulk. By focusing on such a simple but nontrivial nonlinear model, our analysis takes a step forward to unveil the theoretical origin of many visually striking features observed in more complex machine learning models.},
archivePrefix = {arXiv},
arxivId = {2103.01519},
author = {Liao, Z and Mahoney, M W},
eprint = {2103.01519},
month = {mar},
title = {{Hessian Eigenspectra of More Realistic Nonlinear Models}},
url = {http://arxiv.org/abs/2103.01519},
year = {2021}
}
@article{Bourgade2014,
author = {Bourgade, P and Erdős, L and Yau, H-T},
doi = {10.1007/s00220-014-2120-z},
issn = {0010-3616},
journal = {Communications in Mathematical Physics},
month = {nov},
number = {1},
pages = {261--353},
title = {{Edge Universality of Beta Ensembles}},
url = {http://link.springer.com/10.1007/s00220-014-2120-z},
volume = {332},
year = {2014}
}
@book{DeiftOrthogonalPolynomials,
address = {Providence, RI},
author = {Deift, P},
pages = {257},
publisher = {Amer. Math. Soc.},
title = {{Orthogonal Polynomials and Random Matrices: a Riemann-Hilbert Approach}},
year = {2000}
}
@article{Paquette2021,
abstract = {We propose a new framework, inspired by random matrix theory, for analyzing the dynamics of stochastic gradient descent (SGD) when both number of samples and dimensions are large. This framework applies to any fixed stepsize and the finite sum setting. Using this new framework, we show that the dynamics of SGD on a least squares problem with random data become deterministic in the large sample and dimensional limit. Furthermore, the limiting dynamics are governed by a Volterra integral equation. This model predicts that SGD undergoes a phase transition at an explicitly given critical stepsize that ultimately affects its convergence rate, which we also verify experimentally. Finally, when input data is isotropic, we provide explicit expressions for the dynamics and average-case convergence rates (i.e., the complexity of an algorithm averaged over all possible inputs). These rates show significant improvement over the worst-case complexities.},
archivePrefix = {arXiv},
arxivId = {2102.04396},
author = {Paquette, C and Lee, K and Pedregosa, F and Paquette, E},
eprint = {2102.04396},
journal = {arXiv preprint 2102.04396},
month = {feb},
title = {{SGD in the Large: Average-case Analysis, Asymptotics, and Stepsize Criticality}},
url = {http://arxiv.org/abs/2102.04396},
year = {2021}
}
@article{Erdos2013a,
author = {Erdős, L and Knowles, A and Yau, H-T and Yin, J},
doi = {10.1214/11-AOP734},
issn = {0091-1798},
journal = {The Annals of Probability},
month = {may},
number = {3B},
title = {{Spectral statistics of Erdős–R{\'{e}}nyi graphs I: Local semicircle law}},
url = {https://projecteuclid.org/journals/annals-of-probability/volume-41/issue-3B/Spectral-statistics-of-ErdősR{\'{e}}nyi-graphs-I-Local-semicircle-law/10.1214/11-AOP734.full},
volume = {41},
year = {2013}
}
@article{Baik2005,
author = {Baik, J and {Ben Arous}, G and P{\'{e}}ch{\'{e}}, S},
doi = {10.1214/009117905000000233},
issn = {0091-1798},
journal = {The Annals of Probability},
month = {sep},
number = {5},
title = {{Phase transition of the largest eigenvalue for nonnull complex sample covariance matrices}},
url = {https://projecteuclid.org/journals/annals-of-probability/volume-33/issue-5/Phase-transition-of-the-largest-eigenvalue-for-nonnull-complex-sample/10.1214/009117905000000233.full},
volume = {33},
year = {2005}
}
@article{Bloemendal2013,
author = {Bloemendal, A and Vir{\'{a}}g, B},
doi = {10.1007/s00440-012-0443-2},
issn = {0178-8051},
journal = {Probability Theory and Related Fields},
month = {aug},
number = {3-4},
pages = {795--825},
title = {{Limits of spiked random matrices I}},
url = {http://link.springer.com/10.1007/s00440-012-0443-2},
volume = {156},
year = {2013}
}
@article{Ding2019,
abstract = {We introduce a class of separable sample covariance matrices of the form {\$}\backslashwidetilde{\{}\backslashmathcal{\{}Q{\}}{\}}{\_}1:=\backslashwidetilde A{\^{}}{\{}1/2{\}} X \backslashwidetilde B X{\^{}}* \backslashwidetilde A{\^{}}{\{}1/2{\}}.{\$} Here {\$}\backslashwidetilde{\{}A{\}}{\$} and {\$}\backslashwidetilde{\{}B{\}}{\$} are positive definite matrices whose spectrums consist of bulk spectrums plus several spikes, i.e. larger eigenvalues that are separated from the bulks. Conceptually, we call {\$}\backslashwidetilde{\{}\backslashmathcal{\{}Q{\}}{\}}{\_}1{\$} a $\backslash$emph{\{}spiked separable covariance matrix model{\}}. On the one hand, this model includes the spiked covariance matrix as a special case with {\$}\backslashwidetilde{\{}B{\}}=I{\$}. On the other hand, it allows for more general correlations of datasets. In particular, for spatio-temporal dataset, {\$}\backslashwidetilde{\{}A{\}}{\$} and {\$}\backslashwidetilde{\{}B{\}}{\$} represent the spatial and temporal correlations, respectively. In this paper, we study the outlier eigenvalues and eigenvectors, i.e. the principal components, of the spiked separable covariance model {\$}\backslashwidetilde{\{}\backslashmathcal{\{}Q{\}}{\}}{\_}1{\$}. We prove the convergence of the outlier eigenvalues {\$}\backslashwidetilde \backslashlambda{\_}i{\$} and the generalized components (i.e. {\$}\backslashlangle \backslashmathbf v, \backslashwidetilde{\{}\backslashmathbf{\{}\backslashxi{\}}{\}}{\_}i \backslashrangle{\$} for any deterministic vector {\$}\backslashmathbf v{\$}) of the outlier eigenvectors {\$}\backslashwidetilde{\{}\backslashmathbf{\{}\backslashxi{\}}{\}}{\_}i{\$} with optimal convergence rates. Moreover, we also prove the delocalization of the non-outlier eigenvectors. We state our results in full generality, in the sense that they also hold near the so-called BBP transition and for degenerate outliers. Our results highlight both the similarity and difference between the spiked separable covariance matrix model and the spiked covariance model. In particular, we show that the spikes of both {\$}\backslashwidetilde{\{}A{\}}{\$} and {\$}\backslashwidetilde{\{}B{\}}{\$} will cause outliers of the eigenvalue spectrum, and the eigenvectors can help us to select the outliers that correspond to the spikes of {\$}\backslashwidetilde{\{}A{\}}{\$} (or {\$}\backslashwidetilde{\{}B{\}}{\$}).},
archivePrefix = {arXiv},
arxivId = {1905.13060},
author = {Ding, X and Yang, F},
eprint = {1905.13060},
journal = {arXiv preprint arXiv:1905.13060},
month = {may},
title = {{Spiked separable covariance matrices and principal components}},
url = {http://arxiv.org/abs/1905.13060},
year = {2019}
}
@article{Paquette2020a,
abstract = {Average-case analysis computes the complexity of an algorithm averaged over all possible inputs. Compared to worst-case analysis, it is more representative of the typical behavior of an algorithm, but remains largely unexplored in optimization. One difficulty is that the analysis can depend on the probability distribution of the inputs to the model. However, we show that this is not the case for a class of large-scale problems trained with gradient descent including random least squares and one-hidden layer neural networks with random weights. In fact, the halting time exhibits a universality property: it is independent of the probability distribution. With this barrier for average-case analysis removed, we provide the first explicit average-case convergence rates showing a tighter complexity not captured by traditional worst-case analysis. Finally, numerical simulations suggest this universality property holds for a more general class of algorithms and problems.},
archivePrefix = {arXiv},
arxivId = {2006.04299},
author = {Paquette, C and van Merri{\"{e}}nboer, B and Pedregosa, F},
eprint = {2006.04299},
journal = {arXiv preprint arXiv:2006.04299},
month = {jun},
title = {{Halting Time is Predictable for Large Models: A Universality Property and Average-case Analysis}},
url = {http://arxiv.org/abs/2006.04299},
year = {2020}
}
@article{Hestenes1952,
author = {Hestenes, M and Steifel, E},
journal = {J. Research Nat. Bur. Standards},
pages = {409--436},
title = {{Method of Conjugate Gradients for Solving Linear Systems}},
volume = {20},
year = {1952}
}
@article{Ding2021,
abstract = {We consider the conjugate gradient algorithm applied to a general class of spiked sample covariance matrices. The main result of the paper is that the norms of the error and residual vectors at any finite step concentrate on deterministic values determined by orthogonal polynomials with respect to a deformed Marchenko--Pastur law. The first-order limits and fluctuations are shown to be universal. Additionally, for the case where the bulk eigenvalues lie in a single interval we show a stronger universality result in that the asymptotic rate of convergence of the conjugate gradient algorithm only depends on the support of the bulk, provided the spikes are well-separated from the bulk. In particular, this shows that the classical condition number bound for the conjugate gradient algorithm is pessimistic for spiked matrices.},
archivePrefix = {arXiv},
arxivId = {2106.13902},
author = {Ding, X and Trogdon, T},
eprint = {2106.13902},
journal = {arXiv preprint arXiv:2106.13902},
month = {jun},
title = {{The conjugate gradient algorithm on a general class of spiked covariance matrices}},
url = {http://arxiv.org/abs/2106.13902},
year = {2021}
}
@article{Dadush2020,
author = {Dadush, D and Huiberts, S},
doi = {10.1137/18M1197205},
issn = {0097-5397},
journal = {SIAM Journal on Computing},
month = {jan},
number = {5},
pages = {STOC18--449--STOC18--499},
title = {{A Friendly Smoothed Analysis of the Simplex Method}},
url = {https://epubs.siam.org/doi/10.1137/18M1197205},
volume = {49},
year = {2020}
}
@incollection{Dantzig1990,
address = {New York, NY, USA},
author = {Dantzig, G B},
booktitle = {A history of scientific computing},
doi = {10.1145/87252.88081},
month = {jun},
pages = {141--151},
publisher = {ACM},
title = {{Origins of the simplex method}},
url = {http://dl.acm.org/doi/10.1145/87252.88081},
year = {1990}
}
@article{kaczmarz1937angenaherte,
title={Angen{\"a}herte Aufl{\"o}sung von systemen linearer Gleichungen (English translation by Jason Stockmann): Bulletin International de l’Acad{\'e}mie Polonaise des Sciences et des Lettres},
author={Kaczmarz, S},
year={1937}
}
@article{TracyWidom,
author = {Tracy, C A and Widom, H},
file = {:Users/thomastrogdon/Library/Application Support/Mendeley Desktop/Downloaded/Tracy, Widom - 1994 - Level-spacing distributions and the Airy kernel.pdf:pdf},
journal = {Comm. Math. Phys.},
pages = {151--174},
title = {{Level-spacing distributions and the Airy kernel}},
volume = {159},
year = {1994}
}
@article{Deift2017,
arxivId = {1701.01896},
author = {Deift, P and Trogdon, T},
journal = {SIAM Journal on Numerical Analysis},
title = {{Universality for Eigenvalue Algorithms on Sample Covariance Matrices}},
url = {http://arxiv.org/abs/1701.01896},
year = {2017}
}
@article{Kostlan1988,
abstract = {In this paper the statistical properties of problems that occur in numerical linear algebra are studied. Bounds are calculated for the average performance of the power method for the calculation of eigenvectors of symmetric and Hermitian matrices, thus an upper bound is found for the average complexity of eigenvector calculation. The condition number of a matrix is studied and sharp bounds are calculated for the average (and the variance) loss of precision encountered when one solves a system of linear equations.},
author = {Kostlan, E},
doi = {10.1016/0377-0427(88)90402-5},
file = {:Users/thomastrogdon/Library/Application Support/Mendeley Desktop/Downloaded/Kostlan - 1988 - Complexity theory of numerical linear algebra.pdf:pdf},
issn = {03770427},
journal = {Journal of Computational and Applied Mathematics},
keywords = {Complexity,Gaussian ensembles,condition number,large systems,linear algebra,numerical analysis,power method,probability densities,random matrices},
month = {jun},
number = {2-3},
pages = {219--230},
title = {{Complexity theory of numerical linear algebra}},
url = {http://www.sciencedirect.com/science/article/pii/0377042788904025},
volume = {22},
year = {1988}
}
@article{Spielman2004,
author = {Spielman, D A and Teng, S-H},
doi = {10.1145/990308.990310},
issn = {00045411},
journal = {Journal of the ACM},
keywords = {Simplex method,complexity,perturbation,smoothed analysis},
month = {may},
number = {3},
pages = {385--463},
publisher = {ACM},
title = {{Smoothed analysis of algorithms}},
url = {http://dl.acm.org/citation.cfm?id=990308.990310},
volume = {51},
year = {2004}
}
@incollection{Dantzig1951,
abstract = {The late George B. Dantzig , widely known as the father of linear programming, was a major influence in mathematics, operations research, and economics. As Professor Emeritus at Stanford University, he continued his decades of research on linear programming and related subjects. Dantzig was awarded eight honorary doctorates, the National Medal of Science, and the John von Neumann Theory Prize from the Institute for Operations Research and the Management Sciences.The 24 chapters of this volume highlight the amazing breadth and enduring influence of Dantzigs research. Short, non-technical summaries at the opening of each major section introduce a specific research area and discuss the current significance of Dantzigs work in that field. Among the topics covered are mathematical statistics, the Simplex Method of linear programming, economic modeling, network optimization, and nonlinear programming. The book also includes a complete bibliography of Dantzigs writings.},
author = {Dantzig, G B},
booktitle = {Activity Analysis of Production and Allocation},
isbn = {0804748349},
pages = {339--347},
title = {{Maximization of a linear function of variables subject to linear inequalities}},
url = {http://cowles.econ.yale.edu/P/cm/m13/m13-21.pdf},
year = {1951}
}
@article{Smale1983,
author = {Smale, S},
doi = {10.1007/BF02591902},
issn = {0025-5610},
journal = {Mathematical Programming},
month = {oct},
number = {3},
pages = {241--262},
title = {{On the average number of steps of the simplex method of linear programming}},
url = {http://link.springer.com/10.1007/BF02591902},
volume = {27},
year = {1983}
}
@book{Borgwardt1987,
address = {Berlin, Heidelberg},
author = {Borgwardt, K H},
publisher = {Springer--Verlag},
title = {{The simplex method: A probabilistic analysis}},
url={https://www.springer.com/gp/book/9783540170969},
year = {1987}
}
@article{Strohmer2009,
author = {Strohmer, T and Vershynin, R},
doi = {10.1007/s00041-008-9030-4},
issn = {1069-5869},
journal = {Journal of Fourier Analysis and Applications},
month = {apr},
number = {2},
pages = {262--278},
title = {{A Randomized Kaczmarz Algorithm with Exponential Convergence}},
url = {http://link.springer.com/10.1007/s00041-008-9030-4},
volume = {15},
year = {2009}
}
@article{deift2014universality,
title={Universality in numerical computations with random data},
author={Deift, Percy A and Menon, Govind and Olver, Sheehan and Trogdon, Thomas},
journal={Proceedings of the National Academy of Sciences},
year={2014},
publisher={National Acad Sciences},
url={https://doi.org/10.1073/pnas.1413446111}
}
@article{sagun2015universal,
title={Universal halting times in optimization and machine learning},
author={Sagun, Levent and Trogdon, Thomas and LeCun, Yann},
journal={Quarterly of Applied Mathematics},
year={2017},
url={https://doi.org/10.1090/qam/1483}
}
@article{goldstine1951numerical,
title={Numerical inverting of matrices of high order. II},
author={Goldstine, Herman H and Von Neumann, John},
journal={Proceedings of the American Mathematical Society},
year={1951},
url={https://doi.org/10.2307/2032484}
}
@article{Knuth92,
author = "D.E. Knuth",
title = "Two notes on notation",
journal = "Amer. Math. Monthly",
volume = "99",
year = "1992",
pages = "403--422",
}
@article{Knowles2017,
author = {Knowles, A and Yin, J},
doi = {10.1007/s00440-016-0730-4},
issn = {0178-8051},
journal = {Probability Theory and Related Fields},
month = {oct},
number = {1-2},
pages = {257--352},
title = {{Anisotropic local laws for random matrices}},
url = {http://link.springer.com/10.1007/s00440-016-0730-4},
volume = {169},
year = {2017}
}
@book{ConcreteMath,
author = "R.L. Graham and D.E. Knuth and O. Patashnik",
title = "Concrete mathematics",
publisher = "Addison-Wesley",
address = "Reading, MA",
year = "1989"
}
@unpublished{Simpson,
author = "H. Simpson",
title = "Proof of the {R}iemann {H}ypothesis",
note = "preprint (2003), available at
\texttt{http://www.math.drofnats.edu/riemann.ps}",
year = "2003"
}
@incollection{Er01,
author = "P. Erd{\H o}s",
title = "A selection of problems and results in combinatorics",
booktitle = "Recent trends in combinatorics (Matrahaza, 1995)",
publisher = "Cambridge Univ. Press",
address = "Cambridge",
pages = "1--6",
year = "1995"
}
@article{greenwade93,
author = "George D. Greenwade",
title = "The {C}omprehensive {T}ex {A}rchive {N}etwork ({CTAN})",
year = "1993",
journal = "TUGBoat",
volume = "14",
number = "3",
pages = "342--351"
}