-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomputer_vision.bib
287 lines (266 loc) · 10.5 KB
/
computer_vision.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
@article{DBLP:journals/corr/LuXPS16,
author = {Jiasen Lu and Caiming Xiong and Devi Parikh and Richard Socher},
title = {Knowing When to Look: Adaptive Attention via {A} Visual Sentinel for
Image Captioning},
journal = {CoRR},
volume = {abs/1612.01887},
year = {2016},
url = {http://arxiv.org/abs/1612.01887},
archivePrefix = {arXiv},
eprint = {1612.01887},
timestamp = {Mon, 13 Aug 2018 16:46:57 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/LuXPS16},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/XuBKCCSZB15,
author = {Kelvin Xu and Jimmy Ba and Ryan Kiros and Kyunghyun Cho and Aaron C.
Courville and Ruslan Salakhutdinov and Richard S. Zemel and Yoshua
Bengio},
title = {Show, Attend and Tell: Neural Image Caption Generation with Visual
Attention},
journal = {CoRR},
volume = {abs/1502.03044},
year = {2015},
url = {http://arxiv.org/abs/1502.03044},
archivePrefix = {arXiv},
eprint = {1502.03044},
timestamp = {Mon, 13 Aug 2018 16:47:52 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/XuBKCCSZB15},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/RennieMMRG16,
author = {Steven J. Rennie and Etienne Marcheret and Youssef Mroueh and Jarret
Ross and Vaibhava Goel},
title = {Self-critical Sequence Training for Image Captioning},
journal = {CoRR},
volume = {abs/1612.00563},
year = {2016},
url = {http://arxiv.org/abs/1612.00563},
archivePrefix = {arXiv},
eprint = {1612.00563},
timestamp = {Mon, 13 Aug 2018 16:47:39 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/RennieMMRG16},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/AndersonHBTJGZ17,
author = {Peter Anderson and Xiaodong He and Chris Buehler and Damien Teney
and Mark Johnson and Stephen Gould and Lei Zhang},
title = {Bottom-Up and Top-Down Attention for Image Captioning and {VQA}},
journal = {CoRR},
volume = {abs/1707.07998},
year = {2017},
url = {http://arxiv.org/abs/1707.07998},
archivePrefix = {arXiv},
eprint = {1707.07998},
timestamp = {Tue, 20 Nov 2018 12:24:39 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/AndersonHBTJGZ17},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/YinO17,
author = {Xuwang Yin and Vicente Ordonez},
title = {{OBJ2TEXT:} Generating Visually Descriptive Language from Object
Layouts},
journal = {CoRR},
volume = {abs/1707.07102},
year = {2017},
url = {http://arxiv.org/abs/1707.07102},
archivePrefix = {arXiv},
eprint = {1707.07102},
timestamp = {Mon, 13 Aug 2018 16:49:07 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/YinO17},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1905-11946,
author = {Mingxing Tan and Quoc V. Le},
title = {EfficientNet: Rethinking Model Scaling for Convolutional Neural
Networks},
journal = {CoRR},
volume = {abs/1905.11946},
year = {2019},
url = {http://arxiv.org/abs/1905.11946},
archivePrefix = {arXiv},
eprint = {1905.11946},
timestamp = {Mon, 03 Jun 2019 13:42:33 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1905-11946},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1803-08314,
author = {Xihui Liu and Hongsheng Li and Jing Shao and Dapeng Chen and
Xiaogang Wang},
title = {Show, Tell and Discriminate: Image Captioning by Self-retrieval with
Partially Labeled Data},
journal = {CoRR},
volume = {abs/1803.08314},
year = {2018},
url = {http://arxiv.org/abs/1803.08314},
archivePrefix = {arXiv},
eprint = {1803.08314},
timestamp = {Mon, 13 Aug 2018 16:47:36 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1803-08314},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/KilickayaEIE16,
author = {Mert Kilickaya and Aykut Erdem and Nazli Ikizler{-}Cinbis and Erkut
Erdem},
title = {Re-evaluating Automatic Metrics for Image Captioning},
journal = {CoRR},
volume = {abs/1612.07600},
year = {2016},
url = {http://arxiv.org/abs/1612.07600},
archivePrefix = {arXiv},
eprint = {1612.07600},
timestamp = {Mon, 13 Aug 2018 16:48:05 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/KilickayaEIE16},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/BartzYM17,
author = {Christian Bartz and Haojin Yang and Christoph Meinel},
title = {{STN-OCR:} {A} single Neural Network for Text Detection and Text
Recognition},
journal = {CoRR},
volume = {abs/1707.08831},
year = {2017},
url = {http://arxiv.org/abs/1707.08831},
archivePrefix = {arXiv},
eprint = {1707.08831},
timestamp = {Mon, 13 Aug 2018 16:48:31 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/BartzYM17},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@inproceedings{jianfeng2017deep,
author = {Wang, Jianfeng and Hu, Xiaolin},
title = {Gated Recurrent Convolution Neural Network for OCR},
booktitle = {Advances in Neural Information Processing Systems},
year = {2017},
}
@article{DBLP:journals/corr/abs-1906-01969,
author = {Marcin Namysl and Iuliu Konya},
title = {Efficient, Lexicon-Free {OCR} using Deep Learning},
journal = {CoRR},
volume = {abs/1906.01969},
year = {2019},
url = {http://arxiv.org/abs/1906.01969},
archivePrefix = {arXiv},
eprint = {1906.01969},
timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1906-01969},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1906-00446,
author = {Ali Razavi and A{\"{a}}ron van den Oord and Oriol Vinyals},
title = {Generating Diverse High-Fidelity Images with {VQ-VAE-2}},
journal = {CoRR},
volume = {abs/1906.00446},
year = {2019},
url = {http://arxiv.org/abs/1906.00446},
archivePrefix = {arXiv},
eprint = {1906.00446},
timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1906-00446},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@misc{wang2019cnngenerated,
title = {CNN-generated images are surprisingly easy to spot... for now},
author = {Sheng-Yu Wang and Oliver Wang and Richard Zhang and Andrew Owens and
Alexei A. Efros},
year = {2019},
eprint = {1912.11035},
archivePrefix = {arXiv},
primaryClass = {cs.CV},
}
@article{DBLP:journals/corr/abs-1812-04948,
author = {Tero Karras and Samuli Laine and Timo Aila},
title = {A Style-Based Generator Architecture for Generative Adversarial
Networks},
journal = {CoRR},
volume = {abs/1812.04948},
year = {2018},
url = {http://arxiv.org/abs/1812.04948},
archivePrefix = {arXiv},
eprint = {1812.04948},
timestamp = {Tue, 01 Jan 2019 15:01:25 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1812-04948},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{Karras2019stylegan2,
title = {Analyzing and Improving the Image Quality of {StyleGAN}},
author = {Tero Karras and Samuli Laine and Miika Aittala and Janne Hellsten
and Jaakko Lehtinen and Timo Aila},
journal = {CoRR},
volume = {abs/1912.04958},
year = {2019},
}
@misc{cornia2019m2,
title = {M$^2$: Meshed-Memory Transformer for Image Captioning},
author = {Marcella Cornia and Matteo Stefanini and Lorenzo Baraldi and Rita
Cucchiara},
year = {2019},
eprint = {1912.08226},
archivePrefix = {arXiv},
primaryClass = {cs.CV},
}
@misc{perezrua2020knowing,
title = {Knowing What, Where and When to Look: Efficient Video Action Modeling
with Attention},
author = {Juan-Manuel Perez-Rua and Brais Martinez and Xiatian Zhu and Antoine
Toisoul and Victor Escorcia and Tao Xiang},
year = {2020},
eprint = {2004.01278},
archivePrefix = {arXiv},
primaryClass = {cs.CV},
}
@misc{luo2020analysis,
title = {Analysis of diversity-accuracy tradeoff in image captioning},
author = {Ruotian Luo and Gregory Shakhnarovich},
year = {2020},
eprint = {2002.11848},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{https://doi.org/10.48550/arxiv.2102.05095,
doi = {10.48550/ARXIV.2102.05095},
url = {https://arxiv.org/abs/2102.05095},
author = {Bertasius, Gedas and Wang, Heng and Torresani, Lorenzo},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and
information sciences, FOS: Computer and information sciences},
title = {Is Space-Time Attention All You Need for Video Understanding?},
publisher = {arXiv},
year = {2021},
copyright = {arXiv.org perpetual, non-exclusive license},
}
@article{DBLP:journals/corr/abs-2201-12086,
author = {Junnan Li and Dongxu Li and Caiming Xiong and Steven C. H. Hoi},
title = {{BLIP:} Bootstrapping Language-Image Pre-training for Unified
Vision-Language Understanding and Generation},
journal = {CoRR},
volume = {abs/2201.12086},
year = {2022},
url = {https://arxiv.org/abs/2201.12086},
eprinttype = {arXiv},
eprint = {2201.12086},
timestamp = {Wed, 02 Feb 2022 15:00:01 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2201-12086.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@misc{https://doi.org/10.48550/arxiv.2205.13115,
doi = {10.48550/ARXIV.2205.13115},
url = {https://arxiv.org/abs/2205.13115},
author = {Cho, Jaemin and Yoon, Seunghyun and Kale, Ajinkya and Dernoncourt,
Franck and Bui, Trung and Bansal, Mohit},
keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI),
Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and
information sciences, FOS: Computer and information sciences},
title = {Fine-grained Image Captioning with CLIP Reward},
publisher = {arXiv},
year = {2022},
copyright = {arXiv.org perpetual, non-exclusive license},
}
@inproceedings{ge2024visual,
title = {Visual Fact Checker: Enabling High-Fidelity Detailed Caption
Generation},
author = {Ge, Yunhao and Zeng, Xiaohui and Huffman, Jacob Samuel and Lin,
Tsung-Yi and Liu, Ming-Yu and Cui, Yin},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition ({CVPR
})},
year = {2024},
}