diff --git a/mycoder.config.js b/mycoder.config.js index 466ff52..8328eef 100644 --- a/mycoder.config.js +++ b/mycoder.config.js @@ -35,6 +35,9 @@ export default { //provider: 'openai', //model: 'qwen2.5-coder:14b', //baseUrl: 'http://192.168.2.66:80/v1-openai', + // Manual override for context window size (in tokens) + // Useful for models that don't have a known context window size + // contextWindow: 16384, maxTokens: 4096, temperature: 0.7, diff --git a/packages/agent/src/core/llm/providers/anthropic.ts b/packages/agent/src/core/llm/providers/anthropic.ts index 97a35d9..2de86fe 100644 --- a/packages/agent/src/core/llm/providers/anthropic.ts +++ b/packages/agent/src/core/llm/providers/anthropic.ts @@ -12,8 +12,18 @@ import { ProviderOptions, } from '../types.js'; -// Cache for model context window sizes -const modelContextWindowCache: Record = {}; +const ANTHROPIC_CONTEXT_WINDOWS: Record = { + 'claude-3-7-sonnet-20250219': 200000, + 'claude-3-7-sonnet-latest': 200000, + 'claude-3-5-sonnet-20241022': 200000, + 'claude-3-5-sonnet-latest': 200000, + 'claude-3-haiku-20240307': 200000, + 'claude-3-opus-20240229': 200000, + 'claude-3-sonnet-20240229': 200000, + 'claude-2.1': 100000, + 'claude-2.0': 100000, + 'claude-instant-1.2': 100000, +}; /** * Anthropic-specific options @@ -87,7 +97,7 @@ function addCacheControlToMessages( function tokenUsageFromMessage( message: Anthropic.Message, model: string, - contextWindow?: number, + contextWindow: number | undefined, ) { const usage = new TokenUsage(); usage.input = message.usage.input_tokens; @@ -97,19 +107,10 @@ function tokenUsageFromMessage( const totalTokens = usage.input + usage.output; - // Use provided context window or fallback to cached value - const maxTokens = contextWindow || modelContextWindowCache[model]; - - if (!maxTokens) { - throw new Error( - `Context window size not available for model: ${model}. Make sure to initialize the model properly.`, - ); - } - return { usage, totalTokens, - maxTokens, + contextWindow, }; } @@ -120,13 +121,14 @@ export class AnthropicProvider implements LLMProvider { name: string = 'anthropic'; provider: string = 'anthropic.messages'; model: string; + options: AnthropicOptions; private client: Anthropic; private apiKey: string; private baseUrl?: string; - private modelContextWindow?: number; constructor(model: string, options: AnthropicOptions = {}) { this.model = model; + this.options = options; this.apiKey = options.apiKey ?? ''; this.baseUrl = options.baseUrl; @@ -139,79 +141,18 @@ export class AnthropicProvider implements LLMProvider { apiKey: this.apiKey, ...(this.baseUrl && { baseURL: this.baseUrl }), }); - - // Initialize model context window detection - // This is async but we don't need to await it here - // If it fails, an error will be thrown when the model is used - this.initializeModelContextWindow().catch((error) => { - console.error( - `Failed to initialize model context window: ${error.message}. The model will not work until context window information is available.`, - ); - }); - } - - /** - * Fetches the model context window size from the Anthropic API - * - * @returns The context window size - * @throws Error if the context window size cannot be determined - */ - private async initializeModelContextWindow(): Promise { - try { - const response = await this.client.models.list(); - - if (!response?.data || !Array.isArray(response.data)) { - throw new Error( - `Invalid response from models.list() for ${this.model}`, - ); - } - - // Try to find the exact model - let model = response.data.find((m) => m.id === this.model); - - // If not found, try to find a model that starts with the same name - // This helps with model aliases like 'claude-3-sonnet-latest' - if (!model) { - // Split by '-latest' or '-20' to get the base model name - const parts = this.model.split('-latest'); - const modelPrefix = - parts.length > 1 ? parts[0] : this.model.split('-20')[0]; - - if (modelPrefix) { - model = response.data.find((m) => m.id.startsWith(modelPrefix)); - - if (model) { - console.info( - `Model ${this.model} not found, using ${model.id} for context window size`, - ); - } - } - } - - // Using type assertion to access context_window property - // The Anthropic API returns context_window but it may not be in the TypeScript definitions - if (model && 'context_window' in model) { - const contextWindow = (model as any).context_window; - this.modelContextWindow = contextWindow; - // Cache the result for future use - modelContextWindowCache[this.model] = contextWindow; - return contextWindow; - } else { - throw new Error( - `No context window information found for model: ${this.model}`, - ); - } - } catch (error) { - throw new Error( - `Failed to determine context window size for model ${this.model}: ${(error as Error).message}`, - ); - } } /** * Generate text using Anthropic API */ async generateText(options: GenerateOptions): Promise { + // Use configuration contextWindow if provided, otherwise use model-specific value + let modelContextWindow = ANTHROPIC_CONTEXT_WINDOWS[this.model]; + if (!modelContextWindow && this.options.contextWindow) { + modelContextWindow = this.options.contextWindow; + } + const { messages, functions, temperature = 0.7, maxTokens, topP } = options; // Extract system message @@ -227,63 +168,56 @@ export class AnthropicProvider implements LLMProvider { })), ); - try { - const requestOptions: Anthropic.MessageCreateParams = { - model: this.model, - messages: addCacheControlToMessages(formattedMessages), - temperature, - max_tokens: maxTokens || 1024, - system: systemMessage?.content - ? [ - { - type: 'text', - text: systemMessage?.content, - cache_control: { type: 'ephemeral' }, - }, - ] - : undefined, - top_p: topP, - tools, - stream: false, - }; + const requestOptions: Anthropic.MessageCreateParams = { + model: this.model, + messages: addCacheControlToMessages(formattedMessages), + temperature, + max_tokens: maxTokens || 1024, + system: systemMessage?.content + ? [ + { + type: 'text', + text: systemMessage?.content, + cache_control: { type: 'ephemeral' }, + }, + ] + : undefined, + top_p: topP, + tools, + stream: false, + }; - const response = await this.client.messages.create(requestOptions); + const response = await this.client.messages.create(requestOptions); - // Extract content and tool calls - const content = - response.content.find((c) => c.type === 'text')?.text || ''; - const toolCalls = response.content - .filter((c) => { - const contentType = c.type; - return contentType === 'tool_use'; - }) - .map((c) => { - const toolUse = c as Anthropic.Messages.ToolUseBlock; - return { - id: toolUse.id, - name: toolUse.name, - content: JSON.stringify(toolUse.input), - }; - }); + // Extract content and tool calls + const content = response.content.find((c) => c.type === 'text')?.text || ''; + const toolCalls = response.content + .filter((c) => { + const contentType = c.type; + return contentType === 'tool_use'; + }) + .map((c) => { + const toolUse = c as Anthropic.Messages.ToolUseBlock; + return { + id: toolUse.id, + name: toolUse.name, + content: JSON.stringify(toolUse.input), + }; + }); - const tokenInfo = tokenUsageFromMessage( - response, - this.model, - this.modelContextWindow, - ); + const tokenInfo = tokenUsageFromMessage( + response, + this.model, + modelContextWindow, + ); - return { - text: content, - toolCalls: toolCalls, - tokenUsage: tokenInfo.usage, - totalTokens: tokenInfo.totalTokens, - maxTokens: tokenInfo.maxTokens, - }; - } catch (error) { - throw new Error( - `Error calling Anthropic API: ${(error as Error).message}`, - ); - } + return { + text: content, + toolCalls: toolCalls, + tokenUsage: tokenInfo.usage, + totalTokens: tokenInfo.totalTokens, + contextWindow: tokenInfo.contextWindow, + }; } /** diff --git a/packages/agent/src/core/llm/providers/ollama.ts b/packages/agent/src/core/llm/providers/ollama.ts index 0edfebc..0587bd7 100644 --- a/packages/agent/src/core/llm/providers/ollama.ts +++ b/packages/agent/src/core/llm/providers/ollama.ts @@ -24,8 +24,7 @@ import { // Define model context window sizes for Ollama models // These are approximate and may vary based on specific model configurations -const OLLAMA_MODEL_LIMITS: Record = { - default: 4096, +const OLLAMA_CONTEXT_WINDOWS: Record = { llama2: 4096, 'llama2-uncensored': 4096, 'llama2:13b': 4096, @@ -53,10 +52,12 @@ export class OllamaProvider implements LLMProvider { name: string = 'ollama'; provider: string = 'ollama.chat'; model: string; + options: OllamaOptions; private client: Ollama; constructor(model: string, options: OllamaOptions = {}) { this.model = model; + this.options = options; const baseUrl = options.baseUrl || process.env.OLLAMA_BASE_URL || @@ -136,19 +137,26 @@ export class OllamaProvider implements LLMProvider { const totalTokens = tokenUsage.input + tokenUsage.output; // Extract the base model name without specific parameters - const baseModelName = this.model.split(':')[0]; // Check if model exists in limits, otherwise use base model or default - const modelMaxTokens = - OLLAMA_MODEL_LIMITS[this.model] || - (baseModelName ? OLLAMA_MODEL_LIMITS[baseModelName] : undefined) || - 4096; // Default fallback + let contextWindow = OLLAMA_CONTEXT_WINDOWS[this.model]; + if (!contextWindow) { + const baseModelName = this.model.split(':')[0]; + if (baseModelName) { + contextWindow = OLLAMA_CONTEXT_WINDOWS[baseModelName]; + } + + // If still no context window, use the one from configuration if available + if (!contextWindow && this.options.contextWindow) { + contextWindow = this.options.contextWindow; + } + } return { text: content, toolCalls: toolCalls, tokenUsage: tokenUsage, totalTokens, - maxTokens: modelMaxTokens, + contextWindow, }; } diff --git a/packages/agent/src/core/llm/providers/openai.ts b/packages/agent/src/core/llm/providers/openai.ts index 4f84fb2..9241990 100644 --- a/packages/agent/src/core/llm/providers/openai.ts +++ b/packages/agent/src/core/llm/providers/openai.ts @@ -20,8 +20,7 @@ import type { } from 'openai/resources/chat'; // Define model context window sizes for OpenAI models -const OPENAI_MODEL_LIMITS: Record = { - default: 128000, +const OPENA_CONTEXT_WINDOWS: Record = { 'o3-mini': 200000, 'o1-pro': 200000, o1: 200000, @@ -52,6 +51,7 @@ export class OpenAIProvider implements LLMProvider { name: string = 'openai'; provider: string = 'openai.chat'; model: string; + options: OpenAIOptions; private client: OpenAI; private apiKey: string; private baseUrl?: string; @@ -59,6 +59,7 @@ export class OpenAIProvider implements LLMProvider { constructor(model: string, options: OpenAIOptions = {}) { this.model = model; + this.options = options; this.apiKey = options.apiKey ?? ''; this.baseUrl = options.baseUrl; @@ -136,14 +137,19 @@ export class OpenAIProvider implements LLMProvider { // Calculate total tokens and get max tokens for the model const totalTokens = tokenUsage.input + tokenUsage.output; - const modelMaxTokens = OPENAI_MODEL_LIMITS[this.model] || 8192; // Default fallback + + // Use configuration contextWindow if provided, otherwise use model-specific value + let contextWindow = OPENA_CONTEXT_WINDOWS[this.model]; + if (!contextWindow && this.options.contextWindow) { + contextWindow = this.options.contextWindow; + } return { text: content, toolCalls, tokenUsage, totalTokens, - maxTokens: modelMaxTokens, + contextWindow, }; } catch (error) { throw new Error(`Error calling OpenAI API: ${(error as Error).message}`); diff --git a/packages/agent/src/core/llm/types.ts b/packages/agent/src/core/llm/types.ts index 50e5c95..9f8b697 100644 --- a/packages/agent/src/core/llm/types.ts +++ b/packages/agent/src/core/llm/types.ts @@ -82,7 +82,7 @@ export interface LLMResponse { tokenUsage: TokenUsage; // Add new fields for context window tracking totalTokens?: number; // Total tokens used in this request - maxTokens?: number; // Maximum allowed tokens for this model + contextWindow?: number; // Maximum allowed tokens for this model } /** @@ -107,5 +107,6 @@ export interface ProviderOptions { apiKey?: string; baseUrl?: string; organization?: string; + contextWindow?: number; // Manual override for context window size [key: string]: any; // Allow for provider-specific options } diff --git a/packages/agent/src/core/toolAgent/__tests__/statusUpdates.test.ts b/packages/agent/src/core/toolAgent/__tests__/statusUpdates.test.ts index 997d73f..bfe1702 100644 --- a/packages/agent/src/core/toolAgent/__tests__/statusUpdates.test.ts +++ b/packages/agent/src/core/toolAgent/__tests__/statusUpdates.test.ts @@ -14,7 +14,7 @@ describe('Status Updates', () => { it('should generate a status update with correct token usage information', () => { // Setup const totalTokens = 50000; - const maxTokens = 100000; + const contextWindow = 100000; const tokenTracker = new TokenTracker('test'); // Mock the context @@ -33,7 +33,7 @@ describe('Status Updates', () => { // Execute const statusMessage = generateStatusUpdate( totalTokens, - maxTokens, + contextWindow, tokenTracker, context, ); @@ -58,7 +58,7 @@ describe('Status Updates', () => { it('should include active agents, shells, and sessions', () => { // Setup const totalTokens = 70000; - const maxTokens = 100000; + const contextWindow = 100000; const tokenTracker = new TokenTracker('test'); // Mock the context with active agents, shells, and sessions @@ -92,7 +92,7 @@ describe('Status Updates', () => { // Execute const statusMessage = generateStatusUpdate( totalTokens, - maxTokens, + contextWindow, tokenTracker, context, ); diff --git a/packages/agent/src/core/toolAgent/statusUpdates.ts b/packages/agent/src/core/toolAgent/statusUpdates.ts index e773ade..26debb0 100644 --- a/packages/agent/src/core/toolAgent/statusUpdates.ts +++ b/packages/agent/src/core/toolAgent/statusUpdates.ts @@ -14,12 +14,14 @@ import { ToolContext } from '../types.js'; */ export function generateStatusUpdate( totalTokens: number, - maxTokens: number, + contextWindow: number | undefined, tokenTracker: TokenTracker, context: ToolContext, ): Message { // Calculate token usage percentage - const usagePercentage = Math.round((totalTokens / maxTokens) * 100); + const usagePercentage = contextWindow + ? Math.round((totalTokens / contextWindow) * 100) + : undefined; // Get active sub-agents const activeAgents = context.agentTracker ? getActiveAgents(context) : []; @@ -35,7 +37,9 @@ export function generateStatusUpdate( // Format the status message const statusContent = [ `--- STATUS UPDATE ---`, - `Token Usage: ${formatNumber(totalTokens)}/${formatNumber(maxTokens)} (${usagePercentage}%)`, + contextWindow !== undefined + ? `Token Usage: ${formatNumber(totalTokens)}/${formatNumber(contextWindow)} (${usagePercentage}%)` + : '', `Cost So Far: ${tokenTracker.getTotalCost()}`, ``, `Active Sub-Agents: ${activeAgents.length}`, @@ -47,9 +51,10 @@ export function generateStatusUpdate( `Active Browser Sessions: ${activeSessions.length}`, ...activeSessions.map((s) => `- ${s.id}: ${s.description}`), ``, - usagePercentage >= 50 - ? `Your token usage is high (${usagePercentage}%). It is recommended to use the 'compactHistory' tool now to reduce context size.` - : `If token usage gets high (>50%), consider using the 'compactHistory' tool to reduce context size.`, + usagePercentage !== undefined && + (usagePercentage >= 50 + ? `Your token usage is high (${usagePercentage}%). It is recommended to use the 'compactHistory' tool now to reduce context size.` + : `If token usage gets high (>50%), consider using the 'compactHistory' tool to reduce context size.`), `--- END STATUS ---`, ].join('\n'); diff --git a/packages/agent/src/core/toolAgent/toolAgentCore.ts b/packages/agent/src/core/toolAgent/toolAgentCore.ts index a7e09fb..a3d568b 100644 --- a/packages/agent/src/core/toolAgent/toolAgentCore.ts +++ b/packages/agent/src/core/toolAgent/toolAgentCore.ts @@ -151,34 +151,35 @@ export const toolAgent = async ( maxTokens: localContext.maxTokens, }; - const { text, toolCalls, tokenUsage, totalTokens, maxTokens } = + const { text, toolCalls, tokenUsage, totalTokens, contextWindow } = await generateText(provider, generateOptions); tokenTracker.tokenUsage.add(tokenUsage); // Send status updates based on frequency and token usage threshold statusUpdateCounter++; - if (totalTokens && maxTokens) { - const usagePercentage = Math.round((totalTokens / maxTokens) * 100); - const shouldSendByFrequency = - statusUpdateCounter >= STATUS_UPDATE_FREQUENCY; - const shouldSendByUsage = usagePercentage >= TOKEN_USAGE_THRESHOLD; + if (totalTokens) { + let statusTriggered = false; + statusTriggered ||= statusUpdateCounter >= STATUS_UPDATE_FREQUENCY; + + if (contextWindow) { + const usagePercentage = Math.round((totalTokens / contextWindow) * 100); + statusTriggered ||= usagePercentage >= TOKEN_USAGE_THRESHOLD; + } // Send status update if either condition is met - if (shouldSendByFrequency || shouldSendByUsage) { + if (statusTriggered) { statusUpdateCounter = 0; const statusMessage = generateStatusUpdate( totalTokens, - maxTokens, + contextWindow, tokenTracker, localContext, ); messages.push(statusMessage); - logger.debug( - `Sent status update to agent (token usage: ${usagePercentage}%)`, - ); + logger.debug(`Sent status update to agent`); } } diff --git a/packages/agent/src/core/types.ts b/packages/agent/src/core/types.ts index e11f4f8..c231e68 100644 --- a/packages/agent/src/core/types.ts +++ b/packages/agent/src/core/types.ts @@ -31,6 +31,7 @@ export type ToolContext = { apiKey?: string; maxTokens: number; temperature: number; + contextWindow?: number; // Manual override for context window size agentTracker: AgentTracker; shellTracker: ShellTracker; browserTracker: SessionTracker; diff --git a/packages/cli/README.md b/packages/cli/README.md index e55a7e5..40217c8 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -125,6 +125,9 @@ export default { // Model settings provider: 'anthropic', model: 'claude-3-7-sonnet-20250219', + // Manual override for context window size (in tokens) + // Useful for models that don't have a known context window size + // contextWindow: 16384, maxTokens: 4096, temperature: 0.7, diff --git a/packages/cli/src/commands/$default.ts b/packages/cli/src/commands/$default.ts index 93acf3e..2b9cfe0 100644 --- a/packages/cli/src/commands/$default.ts +++ b/packages/cli/src/commands/$default.ts @@ -197,6 +197,7 @@ export async function executePrompt( model: config.model, maxTokens: config.maxTokens, temperature: config.temperature, + contextWindow: config.contextWindow, shellTracker: new ShellTracker('mainAgent'), agentTracker: new AgentTracker('mainAgent'), browserTracker: new SessionTracker('mainAgent'), diff --git a/packages/cli/src/options.ts b/packages/cli/src/options.ts index 182416a..e0627c4 100644 --- a/packages/cli/src/options.ts +++ b/packages/cli/src/options.ts @@ -9,6 +9,7 @@ export type SharedOptions = { readonly model?: string; readonly maxTokens?: number; readonly temperature?: number; + readonly contextWindow?: number; readonly profile?: boolean; readonly userPrompt?: boolean; readonly upgradeCheck?: boolean; @@ -43,6 +44,10 @@ export const sharedOptions = { type: 'number', description: 'Temperature for text generation (0.0-1.0)', } as const, + contextWindow: { + type: 'number', + description: 'Manual override for context window size in tokens', + } as const, interactive: { type: 'boolean', alias: 'i', diff --git a/packages/cli/src/settings/config.ts b/packages/cli/src/settings/config.ts index 07a3d0a..f6fbd10 100644 --- a/packages/cli/src/settings/config.ts +++ b/packages/cli/src/settings/config.ts @@ -12,6 +12,7 @@ export type Config = { model?: string; maxTokens: number; temperature: number; + contextWindow?: number; // Manual override for context window size customPrompt: string | string[]; profile: boolean; userPrompt: boolean; @@ -90,6 +91,7 @@ export const getConfigFromArgv = (argv: ArgumentsCamelCase) => { model: argv.model, maxTokens: argv.maxTokens, temperature: argv.temperature, + contextWindow: argv.contextWindow, profile: argv.profile, userSession: argv.userSession, headless: argv.headless, diff --git a/packages/docs/docs/providers/ollama.md b/packages/docs/docs/providers/ollama.md index 1425890..2b52bac 100644 --- a/packages/docs/docs/providers/ollama.md +++ b/packages/docs/docs/providers/ollama.md @@ -64,6 +64,11 @@ export default { // Optional: Custom base URL (defaults to http://localhost:11434) // baseUrl: 'http://localhost:11434', + // Manual override for context window size (in tokens) + // This is particularly useful for Ollama models since MyCoder may not know + // the context window size for all possible models + contextWindow: 32768, // Example for a 32k context window model + // Other MyCoder settings maxTokens: 4096, temperature: 0.7, @@ -81,6 +86,28 @@ Confirmed models with tool calling support: If using other models, verify their tool calling capabilities before attempting to use them with MyCoder. +## Context Window Configuration + +Ollama supports a wide variety of models, and MyCoder may not have pre-configured context window sizes for all of them. Since the context window size is used to: + +1. Track token usage percentage +2. Determine when to trigger automatic history compaction + +It's recommended to manually set the `contextWindow` configuration option when using Ollama models. This ensures proper token tracking and timely history compaction to prevent context overflow. + +For example, if using a model with a 32k context window: + +```javascript +export default { + provider: 'ollama', + model: 'your-model-name', + contextWindow: 32768, // 32k context window + // other settings... +}; +``` + +You can find the context window size for your specific model in the model's documentation or by checking the Ollama model card. + ## Hardware Requirements Running large language models locally requires significant hardware resources: diff --git a/packages/docs/docs/usage/configuration.md b/packages/docs/docs/usage/configuration.md index 4fb3ba8..79cf1d5 100644 --- a/packages/docs/docs/usage/configuration.md +++ b/packages/docs/docs/usage/configuration.md @@ -23,6 +23,8 @@ export default { // Model settings provider: 'anthropic', model: 'claude-3-7-sonnet-20250219', + // Manual override for context window size (in tokens) + // contextWindow: 16384, maxTokens: 4096, temperature: 0.7, @@ -42,10 +44,11 @@ MyCoder will search for configuration in the following places (in order of prece ### AI Model Selection -| Option | Description | Possible Values | Default | -| ---------- | ------------------------- | ------------------------------------------------- | ---------------------------- | -| `provider` | The AI provider to use | `anthropic`, `openai`, `mistral`, `xai`, `ollama` | `anthropic` | -| `model` | The specific model to use | Depends on provider | `claude-3-7-sonnet-20250219` | +| Option | Description | Possible Values | Default | +| --------------- | ---------------------------------- | ------------------------------------------------- | ---------------------------- | +| `provider` | The AI provider to use | `anthropic`, `openai`, `mistral`, `xai`, `ollama` | `anthropic` | +| `model` | The specific model to use | Depends on provider | `claude-3-7-sonnet-20250219` | +| `contextWindow` | Manual override for context window | Any positive number | Model-specific | Example: @@ -55,6 +58,8 @@ export default { // Use OpenAI as the provider with GPT-4o model provider: 'openai', model: 'gpt-4o', + // Manually set context window size if needed (e.g., for custom or new models) + // contextWindow: 128000, }; ```