Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ Create a config file in `./router_inference/config/<router_name>.json`. An examp
{
"pipeline_params": {
"router_name": "your-router",
"router_cls_name": "your_router_class_name",
"models": [
"gpt-4o-mini",
"claude-3-haiku-20240307",
Expand Down Expand Up @@ -129,12 +130,13 @@ For each model in your config, add an entry with the pricing per million tokens

Create your own router class by inheriting from `BaseRouter` and implementing the `_get_prediction()` method. See [`router_inference/router/example_router.py`](./router_inference/router/example_router.py) for a complete example.

Then, modify [`router_inference/generate_prediction_file.py`](./router_inference/generate_prediction_file.py#L150) to use your router class:
Then, modify [`router_inference/router/__init__.py`](./router_inference/router/__init__.py) to include your router class:

```python
# Replace ExampleRouter with your router class
# Import your router class
from router_inference.router.my_router import MyRouter
router = MyRouter(args.router_name)

__all__ = ["BaseRouter", "ExampleRouter", "MyRouter"]
```

Finally, generate the prediction file:
Expand Down
332 changes: 90 additions & 242 deletions model_cost/model_cost.json
Original file line number Diff line number Diff line change
@@ -1,133 +1,21 @@
{
"Qwen/QwQ-32B": {
"input_token_price_per_million": 1.2,
"output_token_price_per_million": 1.2
},
"anthropic/claude-3.5-sonnet": {
"input_token_price_per_million": 3,
"output_token_price_per_million": 15
},
"claude-3-7-sonnet-20250219": {
"input_token_price_per_million": 3,
"output_token_price_per_million": 15
},
"claude-3-haiku-20240307": {
"input_token_price_per_million": 0.25,
"output_token_price_per_million": 1.25
},
"claude-sonnet-4-5": {
"input_token_price_per_million": 3.0,
"output_token_price_per_million": 15.0
},
"codestral-latest": {
"input_token_price_per_million": 0.3,
"output_token_price_per_million": 0.9
},
"deepseek-coder": {
"input_token_price_per_million": 0.25,
"output_token_price_per_million": 1.0
},
"deepseek-reasoner": {
"input_token_price_per_million": 0.28,
"output_token_price_per_million": 0.42
},
"deepseek-v3.2": {
"input_token_price_per_million": 0.28,
"output_token_price_per_million": 0.42
},
"gemini-2.0-flash-001": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.4
},
"gemini-2.5-flash": {
"input_token_price_per_million": 0.3,
"output_token_price_per_million": 2.5
},
"gemini-2.5-flash-lite": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.4
},
"gemini-2.5-pro": {
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
},
"gemini-3-flash-preview": {
"input_token_price_per_million": 0.5,
"output_token_price_per_million": 3
},
"gemini-3-pro-preview": {
"input_token_price_per_million": 2.0,
"output_token_price_per_million": 12.0
},
"gemini-3.0-pro": {
"input_token_price_per_million": 2.0,
"output_token_price_per_million": 12.0
},
"glm-4-air": {
"input_token_price_per_million": 0.137,
"output_token_price_per_million": 0.137
},
"glm-4-air-250414": {
"input_token_price_per_million": 0.07,
"output_token_price_per_million": 0.07
},
"glm-4-flash": {
"input_token_price_per_million": 0.0137,
"output_token_price_per_million": 0.0137
},
"glm-4-plus": {
"input_token_price_per_million": 0.7,
"output_token_price_per_million": 0.7
},
"glm-4.5-air": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
},
"glm-4.6": {
"input_token_price_per_million": 0.28,
"output_token_price_per_million": 1.12
},
"gpt-3.5-turbo": {
"input_token_price_per_million": 1,
"output_token_price_per_million": 2
},
"gpt-3.5-turbo-1106": {
"input_token_price_per_million": 1,
"output_token_price_per_million": 2
},
"gpt-4": {
"input_token_price_per_million": 10,
"output_token_price_per_million": 30
},
"gpt-4-1106-preview": {
"input_token_price_per_million": 10,
"output_token_price_per_million": 30
},
"gpt-4-turbo": {
"input_token_price_per_million": 10,
"output_token_price_per_million": 30
},
"gpt-4.1": {
"input_token_price_per_million": 2.0,
"output_token_price_per_million": 8.0
},
"gpt-4.1-mini": {
"input_token_price_per_million": 0.4,
"output_token_price_per_million": 1.6
"gemini-3-flash-preview": {
"input_token_price_per_million": 0.5,
"output_token_price_per_million": 3
},
"gpt-4.1-nano": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.4
"o4-mini": {
"input_token_price_per_million": 1.1,
"output_token_price_per_million": 4.4
},
"gpt-4o": {
"input_token_price_per_million": 2.5,
"gpt-5.1": {
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
},
"gpt-4o-mini": {
"input_token_price_per_million": 0.15,
"output_token_price_per_million": 0.6
},
"gpt-5-chat-latest": {
"gpt-5.1-None": {
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
},
Expand All @@ -139,148 +27,108 @@
"input_token_price_per_million": 0.05,
"output_token_price_per_million": 0.4
},
"gpt-5.1": {
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
},
"gpt-5.1-None": {
"input_token_price_per_million": 1.25,
"output_token_price_per_million": 10.0
},
"llama-3-1-405b-instruct": {
"input_token_price_per_million": 2.4,
"output_token_price_per_million": 2.4
},
"llama-3-1-8b-instruct": {
"input_token_price_per_million": 0.22,
"output_token_price_per_million": 0.22
},
"llama-3-2-1b-instruct": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.1
},
"llama-3-2-3b-instruct": {
"input_token_price_per_million": 0.15,
"output_token_price_per_million": 0.15
},
"llama-3-3-70b-instruct": {
"input_token_price_per_million": 0.72,
"output_token_price_per_million": 0.72
},
"meta-llama/Llama-3-70b-chat-hf": {
"input_token_price_per_million": 0.88,
"output_token_price_per_million": 0.88
},
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
"input_token_price_per_million": 0.88,
"output_token_price_per_million": 0.88
},
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
"input_token_price_per_million": 0.88,
"output_token_price_per_million": 0.88
},
"meta-llama/llama-3-8b-instruct": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.1
},
"meta-llama_llama-3.1-405b-instruct": {
"input_token_price_per_million": 3.5,
"output_token_price_per_million": 3.5
},
"meta-llama_llama-3.3-70b-instruct": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.32
},
"meta/codellama-34b-instruct": {
"input_token_price_per_million": 3.2,
"output_token_price_per_million": 3.2
"deepseek-reasoner": {
"input_token_price_per_million": 0.28,
"output_token_price_per_million": 0.42
},
"mistral-large-latest": {
"gemini-3.0-pro": {
"input_token_price_per_million": 2.0,
"output_token_price_per_million": 6.0
"output_token_price_per_million": 12.0
},
"mistral-medium": {
"input_token_price_per_million": 0.4,
"output_token_price_per_million": 2.0
"deepseek-v3.2": {
"input_token_price_per_million": 0.28,
"output_token_price_per_million": 0.42
},
"mistral-medium-latest": {
"input_token_price_per_million": 0.4,
"test": {
"input_token_price_per_million": 1.0,
"output_token_price_per_million": 2.0
},
"mistral-small-latest": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
},
"mistralai/mistral-7b-instruct": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 0.2
},
"mistralai/mixtral-8x7b-instruct": {
"input_token_price_per_million": 0.6,
"output_token_price_per_million": 0.6
},
"mistralai_devstral-2512:free": {
"input_token_price_per_million": 0.05,
"output_token_price_per_million": 0.22
},
"o4-mini": {
"input_token_price_per_million": 1.1,
"output_token_price_per_million": 4.4
"claude-sonnet-4-5": {
"input_token_price_per_million": 3.0,
"output_token_price_per_million": 15.0
},
"open-mistral-7b": {
"input_token_price_per_million": 0.15,
"output_token_price_per_million": 0.15
"claude-3-haiku-20240307": {
"input_token_price_per_million": 0.25,
"output_token_price_per_million": 1.25
},
"open-mistral-nemo": {
"gpt-4o-mini": {
"input_token_price_per_million": 0.15,
"output_token_price_per_million": 0.15
},
"open-mixtral-8x7b": {
"input_token_price_per_million": 0.6,
"output_token_price_per_million": 0.6
},
"openai_gpt-oss-120b": {
"input_token_price_per_million": 0.039,
"output_token_price_per_million": 0.19
},
"qwen_qwen3-235b-a22b-2507": {
"input_token_price_per_million": 0.071,
"output_token_price_per_million": 0.463
"glm-4.5-air": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
},
"qwen_qwen3-coder": {
"input_token_price_per_million": 0.22,
"output_token_price_per_million": 0.95
"glm-4.6": {
"input_token_price_per_million": 0.28,
"output_token_price_per_million": 1.12
},
"qwen_qwen3-vl-235b-a22b-instruct": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.2
"glm-4-air-250414": {
"input_token_price_per_million": 0.07,
"output_token_price_per_million": 0.07
},
"qwen_qwen3-vl-235b-a22b-thinking": {
"input_token_price_per_million": 0.3,
"output_token_price_per_million": 1.2
},
"qwen_qwen3-vl-32b-instruct": {
"input_token_price_per_million": 0.5,
"output_token_price_per_million": 1.5
},
"test": {
"input_token_price_per_million": 1.0,
"output_token_price_per_million": 2.0
},
"x-ai_grok-4.1-fast": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 0.5
"gemini-2.5-flash-lite": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.4
},
"x-ai_grok-code-fast-1": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.5
"gemini-2.5-flash": {
"input_token_price_per_million": 0.3,
"output_token_price_per_million": 2.5
},
"xiaomi_mimo-v2-flash:free": {
"gemini-2.0-flash-001": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
"output_token_price_per_million": 0.4
},
"z-ai_glm-4.7": {
"input_token_price_per_million": 0.4,
"qwen_qwen3-vl-32b-instruct": {
"input_token_price_per_million": 0.5,
"output_token_price_per_million": 1.5
}
},
"z-ai_glm-4.7": {
"input_token_price_per_million": 0.4,
"output_token_price_per_million": 1.5
},
"qwen_qwen3-vl-235b-a22b-instruct": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.2
},
"qwen_qwen3-coder": {
"input_token_price_per_million": 0.22,
"output_token_price_per_million": 0.95
},
"x-ai_grok-code-fast-1": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 1.5
},
"xiaomi_mimo-v2-flash:free": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.3
},
"openai_gpt-oss-120b": {
"input_token_price_per_million": 0.039,
"output_token_price_per_million": 0.19
},
"qwen_qwen3-235b-a22b-2507": {
"input_token_price_per_million": 0.071,
"output_token_price_per_million": 0.463
},
"x-ai_grok-4.1-fast": {
"input_token_price_per_million": 0.2,
"output_token_price_per_million": 0.5
},
"mistralai_devstral-2512:free": {
"input_token_price_per_million": 0.05,
"output_token_price_per_million": 0.22
},
"meta-llama_llama-3.3-70b-instruct": {
"input_token_price_per_million": 0.1,
"output_token_price_per_million": 0.32
},
"meta-llama_llama-3.1-405b-instruct": {
"input_token_price_per_million": 3.5,
"output_token_price_per_million": 3.5
}
}
Loading