Skip to content

Commit 6015d93

Browse files
committed
reorder projects
1 parent 257a39a commit 6015d93

File tree

1 file changed

+26
-26
lines changed

1 file changed

+26
-26
lines changed

content/projects.json

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
[
2-
{
3-
"name": "Evaluation",
4-
"entries": [
5-
{
6-
"name": "Chatbot Arena",
7-
"architecture": "",
8-
"size": "",
9-
"desc": "A benchmark platform for large language models (LLMs) that features anonymous, randomized battles in a crowdsourced manner. It comes with a leaderboard based on Elo ratings.",
10-
"link": "https://lmarena.ai/"
11-
},
12-
{
13-
"name": "Arena Hard Auto",
14-
"architecture": "",
15-
"size": "",
16-
"desc": "An automatic pipeline converting live data to high quality benchmarks for evaluating chat assistants. The questions are more difficult than those in MT-Bench.",
17-
"link": "https://github.com/lm-sys/arena-hard-auto"
18-
},
19-
{
20-
"name": "MT-Bench",
21-
"architecture": "",
22-
"size": "",
23-
"desc": "A set of challenging, multi-turn, and open-ended questions for evaluating chat assistants. It uses LLM-as-a-judge to evaluate model responses.",
24-
"link": "https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge"
25-
}
26-
]
27-
},
282
{
293
"name": "Systems",
304
"entries": [
@@ -72,6 +46,32 @@
7246
}
7347
]
7448
},
49+
{
50+
"name": "Evaluation",
51+
"entries": [
52+
{
53+
"name": "Chatbot Arena",
54+
"architecture": "",
55+
"size": "",
56+
"desc": "A benchmark platform for large language models (LLMs) that features anonymous, randomized battles in a crowdsourced manner. It comes with a leaderboard based on Elo ratings.",
57+
"link": "https://lmarena.ai/"
58+
},
59+
{
60+
"name": "Arena Hard Auto",
61+
"architecture": "",
62+
"size": "",
63+
"desc": "An automatic pipeline converting live data to high quality benchmarks for evaluating chat assistants. The questions are more difficult than those in MT-Bench.",
64+
"link": "https://github.com/lm-sys/arena-hard-auto"
65+
},
66+
{
67+
"name": "MT-Bench",
68+
"architecture": "",
69+
"size": "",
70+
"desc": "A set of challenging, multi-turn, and open-ended questions for evaluating chat assistants. It uses LLM-as-a-judge to evaluate model responses.",
71+
"link": "https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge"
72+
}
73+
]
74+
},
7575
{
7676
"name": "Datasets",
7777
"entries": [

0 commit comments

Comments
 (0)