Skip to content

Commit 436676f

Browse files
Ahasannnclaudehappy-otter
committed
feat: Update R2-Router to Global KNN (235b+flash+ministral-3b, λ=0.85)
- Switch from Ridge regression to Global KNN (K=28, cosine, distance-weighted) - Train on sub_10 split (809 queries), route all 8400 - Pool: Qwen3-235B (72.8%), Gemini 2.5 Flash (19.8%), Ministral-3B (7.4%) - Acc=70.64%, Cost=$0.0496/1kq, Arena(β=0.1)=71.21 Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
1 parent d9c5f98 commit 436676f

4 files changed

Lines changed: 73683 additions & 85768 deletions

File tree

model_cost/model_cost.json

Lines changed: 101 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
{
2+
"google/gemma-3n-e4b-it": {
3+
"input_token_price_per_million": 0.02,
4+
"output_token_price_per_million": 0.04
5+
},
26
"gemini-3-pro-preview": {
37
"input_token_price_per_million": 2.0,
48
"output_token_price_per_million": 12.0
@@ -15,6 +19,10 @@
1519
"input_token_price_per_million": 1.25,
1620
"output_token_price_per_million": 10.0
1721
},
22+
"gpt-5.2": {
23+
"input_token_price_per_million": 1.75,
24+
"output_token_price_per_million": 14.0
25+
},
1826
"gpt-5.1-None": {
1927
"input_token_price_per_million": 1.25,
2028
"output_token_price_per_million": 10.0
@@ -87,60 +95,96 @@
8795
"input_token_price_per_million": 0.5,
8896
"output_token_price_per_million": 1.5
8997
},
90-
"z-ai_glm-4.7": {
91-
"input_token_price_per_million": 0.4,
92-
"output_token_price_per_million": 1.5
93-
},
94-
"qwen_qwen3-vl-235b-a22b-instruct": {
95-
"input_token_price_per_million": 0.2,
96-
"output_token_price_per_million": 1.2
97-
},
98-
"qwen_qwen3-coder": {
99-
"input_token_price_per_million": 0.22,
100-
"output_token_price_per_million": 0.95
101-
},
102-
"x-ai_grok-code-fast-1": {
103-
"input_token_price_per_million": 0.2,
104-
"output_token_price_per_million": 1.5
105-
},
106-
"xiaomi_mimo-v2-flash:free": {
107-
"input_token_price_per_million": 0.1,
108-
"output_token_price_per_million": 0.3
109-
},
110-
"openai_gpt-oss-120b": {
111-
"input_token_price_per_million": 0.039,
112-
"output_token_price_per_million": 0.19
113-
},
114-
"qwen_qwen3-235b-a22b-2507": {
115-
"input_token_price_per_million": 0.071,
116-
"output_token_price_per_million": 0.463
117-
},
118-
"qwen/qwen3-235b-a22b-2507": {
119-
"input_token_price_per_million": 0.071,
120-
"output_token_price_per_million": 0.463
121-
},
122-
"qwen/qwen3-next-80b-a3b-instruct": {
123-
"input_token_price_per_million": 0.09,
124-
"output_token_price_per_million": 1.10
125-
},
126-
"claude-haiku-4.5": {
127-
"input_token_price_per_million": 1.0,
128-
"output_token_price_per_million": 5.0
129-
},
130-
"x-ai_grok-4.1-fast": {
131-
"input_token_price_per_million": 0.2,
132-
"output_token_price_per_million": 0.5
133-
},
134-
"mistralai_devstral-2512:free": {
135-
"input_token_price_per_million": 0.05,
136-
"output_token_price_per_million": 0.22
137-
},
138-
"meta-llama_llama-3.3-70b-instruct": {
139-
"input_token_price_per_million": 0.1,
140-
"output_token_price_per_million": 0.32
141-
},
142-
"meta-llama_llama-3.1-405b-instruct": {
143-
"input_token_price_per_million": 3.5,
144-
"output_token_price_per_million": 3.5
145-
}
146-
}
98+
"z-ai_glm-4.7": {
99+
"input_token_price_per_million": 0.4,
100+
"output_token_price_per_million": 1.5
101+
},
102+
"qwen_qwen3-vl-235b-a22b-instruct": {
103+
"input_token_price_per_million": 0.2,
104+
"output_token_price_per_million": 1.2
105+
},
106+
"qwen_qwen3-coder": {
107+
"input_token_price_per_million": 0.22,
108+
"output_token_price_per_million": 0.95
109+
},
110+
"x-ai_grok-code-fast-1": {
111+
"input_token_price_per_million": 0.2,
112+
"output_token_price_per_million": 1.5
113+
},
114+
"xiaomi_mimo-v2-flash:free": {
115+
"input_token_price_per_million": 0.1,
116+
"output_token_price_per_million": 0.3
117+
},
118+
"openai_gpt-oss-120b": {
119+
"input_token_price_per_million": 0.039,
120+
"output_token_price_per_million": 0.19
121+
},
122+
"qwen_qwen3-235b-a22b-2507": {
123+
"input_token_price_per_million": 0.071,
124+
"output_token_price_per_million": 0.463
125+
},
126+
"qwen/qwen3-next-80b-a3b-instruct": {
127+
"input_token_price_per_million": 0.09,
128+
"output_token_price_per_million": 1.1
129+
},
130+
"claude-haiku-4.5": {
131+
"input_token_price_per_million": 1.0,
132+
"output_token_price_per_million": 5.0
133+
},
134+
"x-ai_grok-4.1-fast": {
135+
"input_token_price_per_million": 0.2,
136+
"output_token_price_per_million": 0.5
137+
},
138+
"mistralai_devstral-2512:free": {
139+
"input_token_price_per_million": 0.05,
140+
"output_token_price_per_million": 0.22
141+
},
142+
"meta-llama_llama-3.3-70b-instruct": {
143+
"input_token_price_per_million": 0.1,
144+
"output_token_price_per_million": 0.32
145+
},
146+
"meta-llama_llama-3.1-405b-instruct": {
147+
"input_token_price_per_million": 3.5,
148+
"output_token_price_per_million": 3.5
149+
},
150+
"mistralai/ministral-3-3b-2512": {
151+
"input_token_price_per_million": 0.1,
152+
"output_token_price_per_million": 0.1
153+
},
154+
"mistralai/ministral-3-8b-2512": {
155+
"input_token_price_per_million": 0.15,
156+
"output_token_price_per_million": 0.15
157+
},
158+
"mistralai/ministral-3-14b-2512": {
159+
"input_token_price_per_million": 0.2,
160+
"output_token_price_per_million": 0.2
161+
},
162+
"gpt-4o": {
163+
"input_token_price_per_million": 2.5,
164+
"output_token_price_per_million": 10.0
165+
},
166+
"qwen/qwen3-30b-a3b-instruct-2507": {
167+
"input_token_price_per_million": 0.08,
168+
"output_token_price_per_million": 0.33
169+
},
170+
"Qwen/Qwen3-Coder-Next": {
171+
"input_token_price_per_million": 0.07,
172+
"output_token_price_per_million": 0.3
173+
},
174+
"qwen/qwen3-coder-30b-a3b-instruct": {
175+
"input_token_price_per_million": 0.07,
176+
"output_token_price_per_million": 0.27
177+
},
178+
"qwen/qwen3-235b-a22b-2507": {
179+
"input_token_price_per_million": 0.071,
180+
"output_token_price_per_million": 0.463
181+
},
182+
"moonshotai/kimi-k2.5": {
183+
"input_token_price_per_million": 0.60,
184+
"output_token_price_per_million": 3.00
185+
},
186+
"z-ai/glm-5": {
187+
"input_token_price_per_million": 1.00,
188+
"output_token_price_per_million": 3.20
189+
}
190+
}

router_inference/config/r2-router.json

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
"router_cls_name": "R2Router",
55
"models": [
66
"qwen/qwen3-235b-a22b-2507",
7-
"qwen/qwen3-next-80b-a3b-instruct",
8-
"gemini-2.0-flash-001",
9-
"claude-haiku-4.5"
7+
"gemini-2.5-flash",
8+
"mistralai/ministral-3-3b-2512"
109
],
11-
"description": "R2-Router: Category-aware Ridge regression predicts per-query quality scores for 4 LLMs × 9 token budgets. Routes via risk = (1-λ)×quality - λ×cost with shrinkage toward category means.",
12-
"lambda": 0.999
10+
"description": "R2-Router: Global KNN (K=28, cosine, distance-weighted) trained on sub_10 split. Routes via risk=(1-lambda)*quality - lambda*tokens*price. Lambda=0.85.",
11+
"lambda": 0.85
1312
}
1413
}

0 commit comments

Comments
 (0)