Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 100 additions & 44 deletions model_cost/model_cost.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
{
"google/gemma-3n-e4b-it": {
"input_token_price_per_million": 0.02,
"output_token_price_per_million": 0.04
},
"gemini-3-pro-preview": {
"input_token_price_per_million": 2.0,
"output_token_price_per_million": 12.0
Expand All @@ -15,6 +19,10 @@
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
},
"gpt-5.2": {
"input_token_price_per_million": 1.75,
"output_token_price_per_million": 14.0
},
"gpt-5.1-None": {
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
Expand Down Expand Up @@ -87,48 +95,96 @@
"input_token_price_per_million": 0.5,
"output_token_price_per_million": 1.5
},
"z-ai_glm-4.7": {
"input_token_price_per_million": 0.4,
"output_token_price_per_million": 1.5
},
"qwen_qwen3-vl-235b-a22b-instruct": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.2
},
"qwen_qwen3-coder": {
"input_token_price_per_million": 0.22,
"output_token_price_per_million": 0.95
},
"x-ai_grok-code-fast-1": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.5
},
"xiaomi_mimo-v2-flash:free": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
},
"openai_gpt-oss-120b": {
"input_token_price_per_million": 0.039,
"output_token_price_per_million": 0.19
},
"qwen_qwen3-235b-a22b-2507": {
"input_token_price_per_million": 0.071,
"output_token_price_per_million": 0.463
},
"x-ai_grok-4.1-fast": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 0.5
},
"mistralai_devstral-2512:free": {
"input_token_price_per_million": 0.05,
"output_token_price_per_million": 0.22
},
"meta-llama_llama-3.3-70b-instruct": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.32
},
"meta-llama_llama-3.1-405b-instruct": {
"input_token_price_per_million": 3.5,
"output_token_price_per_million": 3.5
}
"z-ai_glm-4.7": {
"input_token_price_per_million": 0.4,
"output_token_price_per_million": 1.5
},
"qwen_qwen3-vl-235b-a22b-instruct": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.2
},
"qwen_qwen3-coder": {
"input_token_price_per_million": 0.22,
"output_token_price_per_million": 0.95
},
"x-ai_grok-code-fast-1": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.5
},
"xiaomi_mimo-v2-flash:free": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
},
"openai_gpt-oss-120b": {
"input_token_price_per_million": 0.039,
"output_token_price_per_million": 0.19
},
"qwen_qwen3-235b-a22b-2507": {
"input_token_price_per_million": 0.071,
"output_token_price_per_million": 0.463
},
"qwen/qwen3-next-80b-a3b-instruct": {
"input_token_price_per_million": 0.09,
"output_token_price_per_million": 1.1
},
"claude-haiku-4.5": {
"input_token_price_per_million": 1.0,
"output_token_price_per_million": 5.0
},
"x-ai_grok-4.1-fast": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 0.5
},
"mistralai_devstral-2512:free": {
"input_token_price_per_million": 0.05,
"output_token_price_per_million": 0.22
},
"meta-llama_llama-3.3-70b-instruct": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.32
},
"meta-llama_llama-3.1-405b-instruct": {
"input_token_price_per_million": 3.5,
"output_token_price_per_million": 3.5
},
"mistralai/ministral-3-3b-2512": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.1
},
"mistralai/ministral-3-8b-2512": {
"input_token_price_per_million": 0.15,
"output_token_price_per_million": 0.15
},
"mistralai/ministral-3-14b-2512": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 0.2
},
"gpt-4o": {
"input_token_price_per_million": 2.5,
"output_token_price_per_million": 10.0
},
"qwen/qwen3-30b-a3b-instruct-2507": {
"input_token_price_per_million": 0.08,
"output_token_price_per_million": 0.33
},
"Qwen/Qwen3-Coder-Next": {
"input_token_price_per_million": 0.07,
"output_token_price_per_million": 0.3
},
"qwen/qwen3-coder-30b-a3b-instruct": {
"input_token_price_per_million": 0.07,
"output_token_price_per_million": 0.27
},
"qwen/qwen3-235b-a22b-2507": {
"input_token_price_per_million": 0.071,
"output_token_price_per_million": 0.463
},
"moonshotai/kimi-k2.5": {
"input_token_price_per_million": 0.60,
"output_token_price_per_million": 3.00
},
"z-ai/glm-5": {
"input_token_price_per_million": 1.00,
"output_token_price_per_million": 3.20
}
}
16 changes: 16 additions & 0 deletions router_inference/config/r2-router.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"pipeline_params": {
"router_name": "r2-router",
"router_cls_name": "R2Router",
"models": [
"qwen/qwen3-235b-a22b-2507",
"qwen/qwen3-next-80b-a3b-instruct",
"qwen/qwen3-30b-a3b-instruct-2507",
"Qwen/Qwen3-Coder-Next",
"gemini-2.5-flash",
"claude-3-haiku-20240307"
],
"description": "R2-Router: Global KNN (K=80, cosine, distance-weighted) trained on sub_10 split. 6 models, 4 budgets (concise, budget_200, budget_400, budget_800). Routes via risk=(1-lambda)*quality - lambda*tokens*price. Lambda=0.999.",
"lambda": 0.999
}
}
Loading