diff --git a/blog/2023-05-03-arena.md b/blog/2023-05-03-arena.md index beadfd3b..6e5e61a7 100644 --- a/blog/2023-05-03-arena.md +++ b/blog/2023-05-03-arena.md @@ -60,7 +60,7 @@ Please note that we periodically release blog posts to update the leaderboard. F - [May 10 Updates](https://lmsys.org/blog/2023-05-10-leaderboard/) - [May 25 Updates](https://lmsys.org/blog/2023-05-25-leaderboard/) - [June 22 Updates](https://lmsys.org/blog/2023-06-22-leaderboard/) -- [Dataset Release](https://lmsys.org/blog/2023-07-20-dataset/) +- [Dataset Release (July 20)](https://lmsys.org/blog/2023-07-20-dataset/) - [Dec. 7 Updates](https://lmsys.org/blog/2023-12-07-leaderboard/) - [Policy Updates (March 1, 2024)](https://lmsys.org/blog/2024-03-01-policy/) @@ -164,7 +164,8 @@ We thank other members of the Vicuna team for valuable feedback and MBZUAI for d - Colab notebook: [https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing](https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing) ## Citation -Please cite our [paper](https://arxiv.org/abs/2403.04132) if you find our work or dataset useful. +Please cite the following [papers](https://arxiv.org/abs/2403.04132) if you find our work useful. + ``` @misc{chiang2024chatbot, title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference}, @@ -174,4 +175,20 @@ Please cite our [paper](https://arxiv.org/abs/2403.04132) if you find our work o archivePrefix={arXiv}, primaryClass={cs.AI} } + +@inproceedings{zheng2023judging, + title={Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena}, + author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric Xing and Hao Zhang and Joseph E. Gonzalez and Ion Stoica}, + booktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track}, + year={2023}, + url={https://openreview.net/forum?id=uccHPGDlao} +} + +@inproceedings{zheng2024lmsyschatm, + title={LMSYS-Chat-1M: A Large-Scale Real-World LLM Conversation Dataset}, + author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Tianle Li and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zhuohan Li and Zi Lin and Eric Xing and Joseph E. Gonzalez and Ion Stoica and Hao Zhang}, + booktitle={The Twelfth International Conference on Learning Representations}, + year={2024}, + url={https://openreview.net/forum?id=BOfDKxfwt0} +} ``` diff --git a/content/projects.json b/content/projects.json index 3fce4fa7..79904ba0 100644 --- a/content/projects.json +++ b/content/projects.json @@ -1,30 +1,4 @@ [ - { - "name": "Models", - "entries": [ - { - "name": "Vicuna", - "architecture": "Base: Llama", - "size": "Size: 7B, 13B, 33B", - "desc": "An open-source chatbot impressing GPT-4 with 90%* ChatGPT quality.", - "link": "/blog/2023-03-30-vicuna" - }, - { - "name": "LongChat", - "architecture": "Base: Llama", - "size": "Size: 7B, 13B", - "desc": "A series of open-source chatbots with long context length (16K - 32K).", - "link": "/blog/2023-06-29-longchat" - }, - { - "name": "FastChat-T5", - "architecture": "Base: Flan-T5", - "size": "Size: 3B", - "desc": "A commercial-friendly, compact, yet powerful chat assistant.", - "link": "https://huggingface.co/lmsys/fastchat-t5-3b-v1.0" - } - ] - }, { "name": "Evaluation", "entries": [ @@ -109,5 +83,31 @@ "link": "https://huggingface.co/datasets/lmsys/toxic-chat" } ] + }, + { + "name": "Models", + "entries": [ + { + "name": "Vicuna", + "architecture": "Base: Llama", + "size": "Size: 7B, 13B, 33B", + "desc": "An open-source chatbot impressing GPT-4 with 90%* ChatGPT quality.", + "link": "/blog/2023-03-30-vicuna" + }, + { + "name": "LongChat", + "architecture": "Base: Llama", + "size": "Size: 7B, 13B", + "desc": "A series of open-source chatbots with long context length (16K - 32K).", + "link": "/blog/2023-06-29-longchat" + }, + { + "name": "FastChat-T5", + "architecture": "Base: Flan-T5", + "size": "Size: 3B", + "desc": "A commercial-friendly, compact, yet powerful chat assistant.", + "link": "https://huggingface.co/lmsys/fastchat-t5-3b-v1.0" + } + ] } ] diff --git a/src/pages/index.js b/src/pages/index.js index fb313917..5b8c0ef9 100644 --- a/src/pages/index.js +++ b/src/pages/index.js @@ -70,22 +70,22 @@ export default function Home() { - +
-

LMSYS-Chat-1M

+

FastChat


- A large-scale real-world LLM conversation dataset. + An open platform for training, serving, and evaluating LLM-based chatbots.

- +
-

FastChat

+

LMSYS-Chat-1M


- An open platform for training, serving, and evaluating LLM-based chatbots. + A large-scale real-world LLM conversation dataset.