Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fair-clocks-lick.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"kilo-code": patch
---

feat (fireworks.ai): add minimax 2.1, glm 4.7, updated other models
166 changes: 120 additions & 46 deletions packages/types/src/providers/fireworks.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
import type { ModelInfo } from "../model.js"

export type FireworksModelId =
| "accounts/fireworks/models/kimi-k2p5"
| "accounts/fireworks/models/kimi-k2-instruct"
| "accounts/fireworks/models/kimi-k2-instruct-0905"
| "accounts/fireworks/models/kimi-k2-thinking"
| "accounts/fireworks/models/minimax-m2"
| "accounts/fireworks/models/minimax-m2p1"
| "accounts/fireworks/models/qwen3-235b-a22b"
| "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
| "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct"
| "accounts/fireworks/models/deepseek-r1-0528"
| "accounts/fireworks/models/deepseek-v3"
| "accounts/fireworks/models/deepseek-v3-0324"
| "accounts/fireworks/models/deepseek-v3p1"
| "accounts/fireworks/models/deepseek-v3p2"
| "accounts/fireworks/models/glm-4p5"
| "accounts/fireworks/models/glm-4p5-air"
| "accounts/fireworks/models/glm-4p6"
| "accounts/fireworks/models/glm-4p7"
| "accounts/fireworks/models/gpt-oss-20b"
| "accounts/fireworks/models/gpt-oss-120b"

Expand All @@ -28,7 +34,8 @@ export const fireworksModels = {
defaultToolProtocol: "native",
inputPrice: 0.6,
outputPrice: 2.5,
cacheReadsPrice: 0.15,
cacheReadsPrice: 0.3,
displayName: "Kimi K2 Instruct 0905",
description:
"Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support.",
},
Expand All @@ -41,8 +48,18 @@ export const fireworksModels = {
defaultToolProtocol: "native",
inputPrice: 0.6,
outputPrice: 2.5,
description:
"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.",
deprecated: true,
},
"accounts/fireworks/models/kimi-k2p5": {
maxTokens: 256000,
contextWindow: 256000,
supportsImages: true,
supportsPromptCache: true,
supportsNativeTools: true,
inputPrice: 0.6,
outputPrice: 3,
cacheReadsPrice: 0.1,
displayName: "Kimi K2.5",
},
"accounts/fireworks/models/kimi-k2-thinking": {
maxTokens: 16000,
Expand All @@ -60,19 +77,41 @@ export const fireworksModels = {
"The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.",
},
"accounts/fireworks/models/minimax-m2": {
maxTokens: 4096,
contextWindow: 204800,
maxTokens: 192000,
contextWindow: 192000,
supportsImages: false,
supportsPromptCache: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.3,
outputPrice: 1.2,
description:
"MiniMax M2 is a high-performance language model with 204.8K context window, optimized for long-context understanding and generation tasks.",
cacheReadsPrice: 0.15,
displayName: "MiniMax-M2",
},
"accounts/fireworks/models/minimax-m2p1": {
maxTokens: 200000,
contextWindow: 200000,
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
inputPrice: 0.3,
outputPrice: 1.2,
cacheReadsPrice: 0.15,
displayName: "MiniMax-M2.1",
},
"accounts/fireworks/models/qwen3-235b-a22b": {
maxTokens: 16384,
contextWindow: 128000,
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
inputPrice: 0.22,
outputPrice: 0.88,
cacheReadsPrice: 0.11,
displayName: "Qwen3 235B A22B",
},
"accounts/fireworks/models/qwen3-235b-a22b-instruct-2507": {
maxTokens: 32768,
maxTokens: 256000,
contextWindow: 256000,
supportsImages: false,
supportsPromptCache: false,
Expand All @@ -81,29 +120,30 @@ export const fireworksModels = {
inputPrice: 0.22,
outputPrice: 0.88,
description: "Latest Qwen3 thinking model, competitive against the best closed source models in Jul 2025.",
displayName: "Qwen3 235B A22B Instruct 2507",
},
"accounts/fireworks/models/qwen3-coder-480b-a35b-instruct": {
maxTokens: 32768,
contextWindow: 256000,
maxTokens: 256_000,
contextWindow: 256_000,
supportsImages: false,
supportsPromptCache: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.45,
outputPrice: 1.8,
description: "Qwen3's most agentic code model to date.",
cacheReadsPrice: 0.23,
displayName: "Qwen3 Coder 480B A35B Instruct",
},
"accounts/fireworks/models/deepseek-r1-0528": {
maxTokens: 20480,
maxTokens: 160000,
contextWindow: 160000,
supportsImages: false,
supportsPromptCache: false,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 3,
outputPrice: 8,
description:
"05/28 updated checkpoint of Deepseek R1. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro. Compared to the previous version, the upgraded model shows significant improvements in handling complex reasoning tasks, and this version also offers a reduced hallucination rate, enhanced support for function calling, and better experience for vibe coding. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
displayName: "DeepSeek R1 0528",
},
"accounts/fireworks/models/deepseek-v3": {
maxTokens: 16384,
Expand All @@ -114,79 +154,113 @@ export const fireworksModels = {
defaultToolProtocol: "native",
inputPrice: 0.9,
outputPrice: 0.9,
deprecated: true,
description:
"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
},
"accounts/fireworks/models/deepseek-v3-0324": {
maxTokens: 160000,
contextWindow: 160000,
supportsImages: false,
supportsPromptCache: false,
supportsNativeTools: true,
inputPrice: 0.9,
outputPrice: 0.9,
displayName: "DeepSeek V3 0324",
},
"accounts/fireworks/models/deepseek-v3p1": {
maxTokens: 16384,
contextWindow: 163840,
maxTokens: 160_000,
contextWindow: 160_000,
supportsImages: false,
supportsPromptCache: false,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.56,
outputPrice: 1.68,
description:
"DeepSeek v3.1 is an improved version of the v3 model with enhanced performance, better reasoning capabilities, and improved code generation. This Mixture-of-Experts (MoE) model maintains the same 671B total parameters with 37B activated per token.",
displayName: "DeepSeek V3.1",
},
"accounts/fireworks/models/deepseek-v3p2": {
maxTokens: 160_000,
contextWindow: 160_000,
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
inputPrice: 0.56,
outputPrice: 1.68,
cacheReadsPrice: 0.28,
displayName: "Deepseek v3.2",
},
"accounts/fireworks/models/glm-4p5": {
maxTokens: 16384,
contextWindow: 128000,
maxTokens: 131_072,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.55,
outputPrice: 2.19,
description:
"Z.ai GLM-4.5 with 355B total parameters and 32B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
displayName: "GLM-4.5",
},
"accounts/fireworks/models/glm-4p5-air": {
maxTokens: 16384,
contextWindow: 128000,
maxTokens: 131_072,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.55,
outputPrice: 2.19,
defaultToolProtocol: "native",
inputPrice: 0.22,
outputPrice: 0.88,
displayName: "GLM-4.5 Air",
description:
"Z.ai GLM-4.5-Air with 106B total parameters and 12B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
},
"accounts/fireworks/models/glm-4p6": {
maxTokens: 25344,
contextWindow: 198000,
maxTokens: 198_000,
contextWindow: 198_000,
supportsImages: false,
supportsPromptCache: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.55,
outputPrice: 2.19,
description:
"Z.ai GLM-4.6 is an advanced coding model with exceptional performance on complex programming tasks. Features improved reasoning capabilities and enhanced code generation quality, making it ideal for software development workflows.",
cacheReadsPrice: 0.28,
displayName: "GLM-4.6",
},
"accounts/fireworks/models/glm-4p7": {
maxTokens: 198_000,
contextWindow: 198_000,
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
inputPrice: 0.6,
outputPrice: 2.2,
cacheReadsPrice: 0.3,
displayName: "GLM-4.7",
},
"accounts/fireworks/models/gpt-oss-20b": {
maxTokens: 16384,
contextWindow: 128000,
maxTokens: 128_000,
contextWindow: 128_000,
supportsImages: false,
supportsPromptCache: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.07,
outputPrice: 0.3,
defaultToolProtocol: "native",
inputPrice: 0.05,
outputPrice: 0.2,
cacheReadsPrice: 0.04,
displayName: "GPT-OSS 20B",
description:
"OpenAI gpt-oss-20b: Compact model for local/edge deployments. Optimized for low-latency and resource-constrained environments with chain-of-thought output, adjustable reasoning, and agentic workflows.",
},
"accounts/fireworks/models/gpt-oss-120b": {
maxTokens: 16384,
contextWindow: 128000,
maxTokens: 128_000,
contextWindow: 128_000,
supportsImages: false,
supportsPromptCache: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
inputPrice: 0.15,
outputPrice: 0.6,
description:
"OpenAI gpt-oss-120b: Production-grade, general-purpose model that fits on a single H100 GPU. Features complex reasoning, configurable effort, full chain-of-thought transparency, and supports function calling, tool use, and structured outputs.",
cacheReadsPrice: 0.08,
displayName: "GPT-OSS 120B",
},
} as const satisfies Record<string, ModelInfo>
Loading
Loading