references.bib

@misc{zhao2023survey,
  title={A Survey of Large Language Models}, 
  author={Wayne Xin Zhao and Kun Zhou and Junyi Li and Tianyi Tang and Xiaolei Wang and Yupeng Hou and Yingqian Min and Beichen Zhang and Junjie Zhang and Zican Dong and Yifan Du and Chen Yang and Yushuo Chen and Zhipeng Chen and Jinhao Jiang and Ruiyang Ren and Yifan Li and Xinyu Tang and Zikang Liu and Peiyu Liu and Jian-Yun Nie and Ji-Rong Wen},
  year={2023},
  eprint={2303.18223},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@misc{vaswani2023attention,
  title={Attention Is All You Need}, 
  author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
  year={2017},
  eprint={1706.03762},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@misc{yang2023harnessing,
  title={Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and Beyond}, 
  author={Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han and Qizhang Feng and Haoming Jiang and Bing Yin and Xia Hu},
  year={2023},
  eprint={2304.13712},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@article{radford2018improving,
  added-at = {2020-07-14T16:37:42.000+0200},
  author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
  biburl = {https://www.bibsonomy.org/bibtex/273ced32c0d4588eb95b6986dc2c8147c/jonaskaiser},
  interhash = {5c343ed9a31ac52fd17a898f72af228f},
  intrahash = {73ced32c0d4588eb95b6986dc2c8147c},
  keywords = {final thema:transformer},
  timestamp = {2020-07-14T16:49:42.000+0200},
  title = {Improving language understanding by generative pre-training},
  year = 2018
}

@inproceedings{Radford2019LanguageMA,
  title={Language Models are Unsupervised Multitask Learners},
  author={Alec Radford and Jeff Wu and Rewon Child and David Luan and Dario Amodei and Ilya Sutskever},
  year={2019},
  url={https://api.semanticscholar.org/CorpusID:160025533}
}

@inproceedings{NEURIPS2020_1457c0d6,
  author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
  booktitle = {Advances in Neural Information Processing Systems},
  editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
  pages = {1877--1901},
  publisher = {Curran Associates, Inc.},
  title = {Language Models are Few-Shot Learners},
  url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
  volume = {33},
  year = {2020}
}

@misc{chen2021evaluating,
  title={Evaluating Large Language Models Trained on Code}, 
  author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
  year={2021},
  eprint={2107.03374},
  archivePrefix={arXiv},
  primaryClass={cs.LG}
}

@misc{ouyang2022training,
  title={Training language models to follow instructions with human feedback}, 
  author={Long Ouyang and Jeff Wu and Xu Jiang and Diogo Almeida and Carroll L. Wainwright and Pamela Mishkin and Chong Zhang and Sandhini Agarwal and Katarina Slama and Alex Ray and John Schulman and Jacob Hilton and Fraser Kelton and Luke Miller and Maddie Simens and Amanda Askell and Peter Welinder and Paul Christiano and Jan Leike and Ryan Lowe},
  year={2022},
  eprint={2203.02155},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@article{LLMEvaluationSurvey,
  title={A Survey on Evaluation of Large Language Models}, 
  author={Chang, Yupeng and Wang, Xu and Wang, Jindong and Wu, Yuan and Zhu, Kaijie and Chen, Hao and Yang, Linyi and Yi, Xiaoyuan and Wang, Cunxiang and Wang, Yidong and Ye, Wei and Zhang, Yue and Chang, Yi and Yu, Philip S. and Yang, Qiang and Xie, Xing},
  year={2023},
  journal={arXiv preprint arXiv:2307.03109},
  url={https://arxiv.org/abs/2307.03109},
}


@article{LLMInstructionTuningSurvey,
  title={Instruction Tuning for Large Language Models: A Survey}, 
  author={Zhang, Shengyu and Dong, Linfeng and Li, Xiaoya and Zhang, Sen and Sun, Xiaofei and Wang, Shuhe and Li, Jiwei and Hu, Runyi and Zhang, Tianwei and Wu, Fei and others},
  year={2023},
  journal={arXiv preprint arXiv:2308.10792},
  url={https://arxiv.org/abs/2308.10792},
}

@article{LLMAgentSurvey,
  title   = {The Rise and Potential of Large Language Model Based Agents: A Survey}, 
  author  = {Zhiheng Xi and Wenxiang Chen and Xin Guo and Wei He and Yiwen Ding and Boyang Hong and Ming Zhang and Junzhe Wang and Senjie Jin and Enyu Zhou and Rui Zheng and Xiaoran Fan and Xiao Wang and Limao Xiong and Yuhao Zhou and Weiran Wang and Changhao Jiang and Yicheng Zou and Xiangyang Liu and Zhangyue Yin and Shihan Dou and Rongxiang Weng and Wensen Cheng and Qi Zhang and Wenjuan Qin and Yongyan Zheng and Xipeng Qiu and Xuanjing Huang and Tao Gui},
  year    = {2023},
  journal = {arXiv preprint arXiv:2309.07864},
  url     = {https://arxiv.org/abs/2309.07864},
}

@article{RATGSurvey,
  title   = {A Survey on Retrieval-Augmented Text Generation}, 
  author  = {Huayang Li and Yixuan Su and Deng Cai and Yan Wang and Lemao Liu},
  year    = {2022},
  journal = {arXiv preprint arXiv:2202.01110},
  url     = {https://arxiv.org/abs/2202.01110},
}

@book{zhang2023dive,
  title     = {Dive into Deep Learning},
  author    = {Zhang, Aston and Lipton, Zachary C. and Li, Mu and Smola, Alexander J.},
  publisher = {Cambridge University Press},
  url       = {https://zh.d2l.ai/index.html},
  note      = {\url{https://D2L.ai}},
  year      = {2023}
}

@article{LLMHallucination,
  title   = {Siren's Song in the AI Ocean: A Survey on Hallucination in Large Language Models}, 
  author  = {Zhang, Yue and Li, Yafu and Cui, Leyang and Cai, Deng and Liu, Lemao and Fu, Tingchen and Huang, Xinting and Zhao, Enbo and Zhang, Yu and Chen, Yulong and Wang, Longyue and Luu, Anh Tuan and Bi, Wei and Shi, Freda and Shi, Shuming},
  journal = {arXiv preprint arXiv:2309.01219},
  year    = {2023},
  url     = {https://arxiv.org/abs/2309.01219}
}

@article{NLPHallucination,
  doi     = {10.1145/3571730},
  url     = {https://doi.org/10.1145/3571730},
  year    = {2023},
  volume  = {55},
  number  = {12},
  pages   = {1--38},
  author  = {Ziwei Ji and Nayeon Lee and Rita Frieske and Tiezheng Yu and Dan Su and Yan Xu and Etsuko Ishii and Ye Jin Bang and Andrea Madotto and Pascale Fung},
  title   = {Survey of Hallucination in Natural Language Generation},
  journal = {{ACM} Computing Surveys}
}

@article{AugmentedLM,
  title         = {Augmented Language Models: a Survey}, 
  author        = {Grégoire Mialon and Roberto Dessì and Maria Lomeli and Christoforos Nalmpantis and Ram Pasunuru and Roberta Raileanu and Baptiste Rozière and Timo Schick and Jane Dwivedi-Yu and Asli Celikyilmaz and Edouard Grave and Yann LeCun and Thomas Scialom},
  year          = {2023},
  eprint        = {2302.07842},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CL},
  url           = {https://arxiv.org/abs/2302.07842},
}

@article{liu2023lost,
  title         = {Lost in the Middle: How Language Models Use Long Contexts}, 
  author        = {Nelson F. Liu and Kevin Lin and John Hewitt and Ashwin Paranjape and Michele Bevilacqua and Fabio Petroni and Percy Liang},
  year          = {2023},
  eprint        = {2307.03172},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CL},
  url           = {https://arxiv.org/abs/2307.03172}
}

@article{li2023multimodal,
  title={Multimodal Foundation Models: From Specialists to General-Purpose Assistants}, 
  author={Chunyuan Li and Zhe Gan and Zhengyuan Yang and Jianwei Yang and Linjie Li and Lijuan Wang and Jianfeng Gao},
  year={2023},
  eprint={2309.10020},
  archivePrefix={arXiv},
  primaryClass={cs.CV},
  url={https://arxiv.org/abs/2309.10020},
}

@article{yang2023dawn,
  title={The Dawn of LMMs: Preliminary Explorations with GPT-4V(ision)}, 
  author={Zhengyuan Yang and Linjie Li and Kevin Lin and Jianfeng Wang and Chung-Ching Lin and Zicheng Liu and Lijuan Wang},
  year={2023},
  eprint={2309.17421},
  archivePrefix={arXiv},
  primaryClass={cs.CV},
  url={https://arxiv.org/abs/2309.17421}
}

@article{yao2022react,
  title={ReAct: Synergizing Reasoning and Acting in Language Models},
  author={Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
  journal={arXiv preprint arXiv:2210.03629},
  year={2022},
  url={https://arxiv.org/abs/2210.03629}
}

@article{karpas2022mrkl,
  title={MRKL Systems: A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning}, 
  author={Ehud Karpas and Omri Abend and Yonatan Belinkov and Barak Lenz and Opher Lieber and Nir Ratner and Yoav Shoham and Hofit Bata and Yoav Levine and Kevin Leyton-Brown and Dor Muhlgay and Noam Rozen and Erez Schwartz and Gal Shachaf and Shai Shalev-Shwartz and Amnon Shashua and Moshe Tenenholtz},
  year={2022},
  eprint={2205.00445},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@misc{wang2023planandsolve,
  title={Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models}, 
  author={Lei Wang and Wanyu Xu and Yihuai Lan and Zhiqiang Hu and Yunshi Lan and Roy Ka-Wei Lee and Ee-Peng Lim},
  year={2023},
  eprint={2305.04091},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@Inbook{multiagentintro,
  author="Balaji, P. G.
  and Srinivasan, D.",
  editor="Srinivasan, Dipti
  and Jain, Lakhmi C.",
  title="An Introduction to Multi-Agent Systems",
  bookTitle="Innovations in Multi-Agent Systems and Applications - 1",
  year="2010",
  publisher="Springer Berlin Heidelberg",
  address="Berlin, Heidelberg",
  pages="1--27",
  abstract="Multi-agent systems is a subfield of Distributed Artificial Intelligence that has experienced rapid growth because of the flexibility and the intelligence available solve distributed problems. In this chapter, a brief survey of multi-agent systems has been presented. These encompass different attributes such as architecture, communication, coordination strategies, decision making and learning abilities. The goal of this chapter is to provide a quick reference to assist in the design of multi-agent systems and to highlight the merit and demerits of the existing methods.",
  isbn="978-3-642-14435-6",
  doi="10.1007/978-3-642-14435-6_1",
  url="https://doi.org/10.1007/978-3-642-14435-6_1"
}

@misc{talebirad2023multiagent,
  title={Multi-Agent Collaboration: Harnessing the Power of Intelligent LLM Agents}, 
  author={Yashar Talebirad and Amirhossein Nadiri},
  year={2023},
  eprint={2306.03314},
  archivePrefix={arXiv},
  primaryClass={cs.AI}
}

@misc{sennrich2016neural,
  title={Neural Machine Translation of Rare Words with Subword Units}, 
  author={Rico Sennrich and Barry Haddow and Alexandra Birch},
  year={2016},
  eprint={1508.07909},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@misc{wang2019neural,
  title={Neural Machine Translation with Byte-Level Subwords}, 
  author={Changhan Wang and Kyunghyun Cho and Jiatao Gu},
  year={2019},
  eprint={1909.03341},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@misc{pub37842,
  title = {Japanese and Korean Voice Search},
  author = {Mike Schuster and Kaisuke Nakajima},
  year = {2012},
  booktitle = {International Conference on Acoustics, Speech and Signal Processing},
  pages = {5149--5152},
  url = {https://research.google/pubs/pub37842/},
}

@misc{kudo2018subword,
  title={Subword Regularization: Improving Neural Network Translation Models with Multiple Subword Candidates}, 
  author={Taku Kudo},
  year={2018},
  eprint={1804.10959},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}
@onlilne{LLMTutorial,
  title={A Tutorial on LLM},
  author={Haifeng Li},
  year={2023},
  url={https://medium.com/@haifengl/a-tutorial-to-llm-f78dd4e82efc},
}

@online{yao2022react_online,
  title={ReAct: Synergizing Reasoning and Acting in Language Models},
  author={Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
  year={2022},
  url={https://react-lm.github.io/},
}

@online{tsne_online,
  title={Introduction to t-SNE},
  author={Abid Ali Awan},
  year={2023},
  url={https://www.datacamp.com/tutorial/introduction-t-sne},
}

@online{ms_prompt_engineer,
  title={What are Prompts?},
  year={2023},
  url={https://learn.microsoft.com/en-us/semantic-kernel/prompt-engineering/},
}