Kilo-Org · kevinvandijk · Feb 11, 2026 · Dec 30, 2025 · Jan 7, 2026 · Jan 14, 2026
diff --git a/.changeset/fair-clocks-lick.md b/.changeset/fair-clocks-lick.md
@@ -0,0 +1,5 @@
+---
+"kilo-code": patch
+---
+
+feat (fireworks.ai): add minimax 2.1,  glm 4.7, updated other models
diff --git a/packages/types/src/providers/fireworks.ts b/packages/types/src/providers/fireworks.ts
@@ -1,18 +1,24 @@
 import type { ModelInfo } from "../model.js"
 
 export type FireworksModelId =
+	| "accounts/fireworks/models/kimi-k2p5"
 	| "accounts/fireworks/models/kimi-k2-instruct"
 	| "accounts/fireworks/models/kimi-k2-instruct-0905"
 	| "accounts/fireworks/models/kimi-k2-thinking"
 	| "accounts/fireworks/models/minimax-m2"
+	| "accounts/fireworks/models/minimax-m2p1"
+	| "accounts/fireworks/models/qwen3-235b-a22b"
 	| "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
 	| "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct"
 	| "accounts/fireworks/models/deepseek-r1-0528"
 	| "accounts/fireworks/models/deepseek-v3"
+	| "accounts/fireworks/models/deepseek-v3-0324"
 	| "accounts/fireworks/models/deepseek-v3p1"
+	| "accounts/fireworks/models/deepseek-v3p2"
 	| "accounts/fireworks/models/glm-4p5"
 	| "accounts/fireworks/models/glm-4p5-air"
 	| "accounts/fireworks/models/glm-4p6"
+	| "accounts/fireworks/models/glm-4p7"
 	| "accounts/fireworks/models/gpt-oss-20b"
 	| "accounts/fireworks/models/gpt-oss-120b"
 
@@ -28,7 +34,8 @@ export const fireworksModels = {
 		defaultToolProtocol: "native",
 		inputPrice: 0.6,
 		outputPrice: 2.5,
-		cacheReadsPrice: 0.15,
+		cacheReadsPrice: 0.3,
+		displayName: "Kimi K2 Instruct 0905",
 		description:
 			"Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support.",
 	},
@@ -41,8 +48,18 @@ export const fireworksModels = {
 		defaultToolProtocol: "native",
 		inputPrice: 0.6,
 		outputPrice: 2.5,
-		description:
-			"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.",
+		deprecated: true,
+	},
+	"accounts/fireworks/models/kimi-k2p5": {
+		maxTokens: 256000,
+		contextWindow: 256000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsNativeTools: true,
+		inputPrice: 0.6,
+		outputPrice: 3,
+		cacheReadsPrice: 0.1,
+		displayName: "Kimi K2.5",
 	},
 	"accounts/fireworks/models/kimi-k2-thinking": {
 		maxTokens: 16000,
@@ -60,19 +77,41 @@ export const fireworksModels = {
 			"The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.",
 	},
 	"accounts/fireworks/models/minimax-m2": {
-		maxTokens: 4096,
-		contextWindow: 204800,
+		maxTokens: 192000,
+		contextWindow: 192000,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.3,
 		outputPrice: 1.2,
-		description:
-			"MiniMax M2 is a high-performance language model with 204.8K context window, optimized for long-context understanding and generation tasks.",
+		cacheReadsPrice: 0.15,
+		displayName: "MiniMax-M2",
+	},
+	"accounts/fireworks/models/minimax-m2p1": {
+		maxTokens: 200000,
+		contextWindow: 200000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsNativeTools: true,
+		inputPrice: 0.3,
+		outputPrice: 1.2,
+		cacheReadsPrice: 0.15,
+		displayName: "MiniMax-M2.1",
+	},
+	"accounts/fireworks/models/qwen3-235b-a22b": {
+		maxTokens: 16384,
+		contextWindow: 128000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsNativeTools: true,
+		inputPrice: 0.22,
+		outputPrice: 0.88,
+		cacheReadsPrice: 0.11,
+		displayName: "Qwen3 235B A22B",
 	},
 	"accounts/fireworks/models/qwen3-235b-a22b-instruct-2507": {
-		maxTokens: 32768,
+		maxTokens: 256000,
 		contextWindow: 256000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -81,29 +120,30 @@ export const fireworksModels = {
 		inputPrice: 0.22,
 		outputPrice: 0.88,
 		description: "Latest Qwen3 thinking model, competitive against the best closed source models in Jul 2025.",
+		displayName: "Qwen3 235B A22B Instruct 2507",
 	},
 	"accounts/fireworks/models/qwen3-coder-480b-a35b-instruct": {
-		maxTokens: 32768,
-		contextWindow: 256000,
+		maxTokens: 256_000,
+		contextWindow: 256_000,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.45,
 		outputPrice: 1.8,
-		description: "Qwen3's most agentic code model to date.",
+		cacheReadsPrice: 0.23,
+		displayName: "Qwen3 Coder 480B A35B Instruct",
 	},
 	"accounts/fireworks/models/deepseek-r1-0528": {
-		maxTokens: 20480,
+		maxTokens: 160000,
 		contextWindow: 160000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 3,
 		outputPrice: 8,
-		description:
-			"05/28 updated checkpoint of Deepseek R1. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro. Compared to the previous version, the upgraded model shows significant improvements in handling complex reasoning tasks, and this version also offers a reduced hallucination rate, enhanced support for function calling, and better experience for vibe coding. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
+		displayName: "DeepSeek R1 0528",
 	},
 	"accounts/fireworks/models/deepseek-v3": {
 		maxTokens: 16384,
@@ -114,79 +154,113 @@ export const fireworksModels = {
 		defaultToolProtocol: "native",
 		inputPrice: 0.9,
 		outputPrice: 0.9,
+		deprecated: true,
 		description:
-			"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
+			"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
+	},
+	"accounts/fireworks/models/deepseek-v3-0324": {
+		maxTokens: 160000,
+		contextWindow: 160000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsNativeTools: true,
+		inputPrice: 0.9,
+		outputPrice: 0.9,
+		displayName: "DeepSeek V3 0324",
 	},
 	"accounts/fireworks/models/deepseek-v3p1": {
-		maxTokens: 16384,
-		contextWindow: 163840,
+		maxTokens: 160_000,
+		contextWindow: 160_000,
 		supportsImages: false,
 		supportsPromptCache: false,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.56,
 		outputPrice: 1.68,
-		description:
-			"DeepSeek v3.1 is an improved version of the v3 model with enhanced performance, better reasoning capabilities, and improved code generation. This Mixture-of-Experts (MoE) model maintains the same 671B total parameters with 37B activated per token.",
+		displayName: "DeepSeek V3.1",
+	},
+	"accounts/fireworks/models/deepseek-v3p2": {
+		maxTokens: 160_000,
+		contextWindow: 160_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsNativeTools: true,
+		inputPrice: 0.56,
+		outputPrice: 1.68,
+		cacheReadsPrice: 0.28,
+		displayName: "Deepseek v3.2",
 	},
 	"accounts/fireworks/models/glm-4p5": {
-		maxTokens: 16384,
-		contextWindow: 128000,
+		maxTokens: 131_072,
+		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: false,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.55,
 		outputPrice: 2.19,
-		description:
-			"Z.ai GLM-4.5 with 355B total parameters and 32B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
+		displayName: "GLM-4.5",
 	},
 	"accounts/fireworks/models/glm-4p5-air": {
-		maxTokens: 16384,
-		contextWindow: 128000,
+		maxTokens: 131_072,
+		contextWindow: 131_072,
 		supportsImages: false,
 		supportsPromptCache: false,
 		supportsNativeTools: true,
-		defaultToolProtocol: "native",
-		inputPrice: 0.55,
-		outputPrice: 2.19,
+    defaultToolProtocol: "native",
+		inputPrice: 0.22,
+		outputPrice: 0.88,
+		displayName: "GLM-4.5 Air",
 		description:
 			"Z.ai GLM-4.5-Air with 106B total parameters and 12B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
 	},
 	"accounts/fireworks/models/glm-4p6": {
-		maxTokens: 25344,
-		contextWindow: 198000,
+		maxTokens: 198_000,
+		contextWindow: 198_000,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.55,
 		outputPrice: 2.19,
-		description:
-			"Z.ai GLM-4.6 is an advanced coding model with exceptional performance on complex programming tasks. Features improved reasoning capabilities and enhanced code generation quality, making it ideal for software development workflows.",
+		cacheReadsPrice: 0.28,
+		displayName: "GLM-4.6",
+	},
+	"accounts/fireworks/models/glm-4p7": {
+		maxTokens: 198_000,
+		contextWindow: 198_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsNativeTools: true,
+		inputPrice: 0.6,
+		outputPrice: 2.2,
+		cacheReadsPrice: 0.3,
+		displayName: "GLM-4.7",
 	},
 	"accounts/fireworks/models/gpt-oss-20b": {
-		maxTokens: 16384,
-		contextWindow: 128000,
+		maxTokens: 128_000,
+		contextWindow: 128_000,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		supportsNativeTools: true,
-		defaultToolProtocol: "native",
-		inputPrice: 0.07,
-		outputPrice: 0.3,
+    defaultToolProtocol: "native",
+		inputPrice: 0.05,
+		outputPrice: 0.2,
+		cacheReadsPrice: 0.04,
+		displayName: "GPT-OSS 20B",
 		description:
 			"OpenAI gpt-oss-20b: Compact model for local/edge deployments. Optimized for low-latency and resource-constrained environments with chain-of-thought output, adjustable reasoning, and agentic workflows.",
 	},
 	"accounts/fireworks/models/gpt-oss-120b": {
-		maxTokens: 16384,
-		contextWindow: 128000,
+		maxTokens: 128_000,
+		contextWindow: 128_000,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		supportsNativeTools: true,
 		defaultToolProtocol: "native",
 		inputPrice: 0.15,
 		outputPrice: 0.6,
-		description:
-			"OpenAI gpt-oss-120b: Production-grade, general-purpose model that fits on a single H100 GPU. Features complex reasoning, configurable effort, full chain-of-thought transparency, and supports function calling, tool use, and structured outputs.",
+		cacheReadsPrice: 0.08,
+		displayName: "GPT-OSS 120B",
 	},
 } as const satisfies Record<string, ModelInfo>