Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configurable context window size #364

Merged
merged 4 commits into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions mycoder.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ export default {
//provider: 'openai',
//model: 'qwen2.5-coder:14b',
//baseUrl: 'http://192.168.2.66:80/v1-openai',
// Manual override for context window size (in tokens)
// Useful for models that don't have a known context window size
// contextWindow: 16384,
maxTokens: 4096,
temperature: 0.7,

Expand Down
202 changes: 68 additions & 134 deletions packages/agent/src/core/llm/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,18 @@ import {
ProviderOptions,
} from '../types.js';

// Cache for model context window sizes
const modelContextWindowCache: Record<string, number> = {};
const ANTHROPIC_CONTEXT_WINDOWS: Record<string, number> = {
'claude-3-7-sonnet-20250219': 200000,
'claude-3-7-sonnet-latest': 200000,
'claude-3-5-sonnet-20241022': 200000,
'claude-3-5-sonnet-latest': 200000,
'claude-3-haiku-20240307': 200000,
'claude-3-opus-20240229': 200000,
'claude-3-sonnet-20240229': 200000,
'claude-2.1': 100000,
'claude-2.0': 100000,
'claude-instant-1.2': 100000,
};

/**
* Anthropic-specific options
Expand Down Expand Up @@ -87,7 +97,7 @@ function addCacheControlToMessages(
function tokenUsageFromMessage(
message: Anthropic.Message,
model: string,
contextWindow?: number,
contextWindow: number | undefined,
) {
const usage = new TokenUsage();
usage.input = message.usage.input_tokens;
Expand All @@ -97,19 +107,10 @@ function tokenUsageFromMessage(

const totalTokens = usage.input + usage.output;

// Use provided context window or fallback to cached value
const maxTokens = contextWindow || modelContextWindowCache[model];

if (!maxTokens) {
throw new Error(
`Context window size not available for model: ${model}. Make sure to initialize the model properly.`,
);
}

return {
usage,
totalTokens,
maxTokens,
contextWindow,
};
}

Expand All @@ -120,13 +121,14 @@ export class AnthropicProvider implements LLMProvider {
name: string = 'anthropic';
provider: string = 'anthropic.messages';
model: string;
options: AnthropicOptions;
private client: Anthropic;
private apiKey: string;
private baseUrl?: string;
private modelContextWindow?: number;

constructor(model: string, options: AnthropicOptions = {}) {
this.model = model;
this.options = options;
this.apiKey = options.apiKey ?? '';
this.baseUrl = options.baseUrl;

Expand All @@ -139,79 +141,18 @@ export class AnthropicProvider implements LLMProvider {
apiKey: this.apiKey,
...(this.baseUrl && { baseURL: this.baseUrl }),
});

// Initialize model context window detection
// This is async but we don't need to await it here
// If it fails, an error will be thrown when the model is used
this.initializeModelContextWindow().catch((error) => {
console.error(
`Failed to initialize model context window: ${error.message}. The model will not work until context window information is available.`,
);
});
}

/**
* Fetches the model context window size from the Anthropic API
*
* @returns The context window size
* @throws Error if the context window size cannot be determined
*/
private async initializeModelContextWindow(): Promise<number> {
try {
const response = await this.client.models.list();

if (!response?.data || !Array.isArray(response.data)) {
throw new Error(
`Invalid response from models.list() for ${this.model}`,
);
}

// Try to find the exact model
let model = response.data.find((m) => m.id === this.model);

// If not found, try to find a model that starts with the same name
// This helps with model aliases like 'claude-3-sonnet-latest'
if (!model) {
// Split by '-latest' or '-20' to get the base model name
const parts = this.model.split('-latest');
const modelPrefix =
parts.length > 1 ? parts[0] : this.model.split('-20')[0];

if (modelPrefix) {
model = response.data.find((m) => m.id.startsWith(modelPrefix));

if (model) {
console.info(
`Model ${this.model} not found, using ${model.id} for context window size`,
);
}
}
}

// Using type assertion to access context_window property
// The Anthropic API returns context_window but it may not be in the TypeScript definitions
if (model && 'context_window' in model) {
const contextWindow = (model as any).context_window;
this.modelContextWindow = contextWindow;
// Cache the result for future use
modelContextWindowCache[this.model] = contextWindow;
return contextWindow;
} else {
throw new Error(
`No context window information found for model: ${this.model}`,
);
}
} catch (error) {
throw new Error(
`Failed to determine context window size for model ${this.model}: ${(error as Error).message}`,
);
}
}

/**
* Generate text using Anthropic API
*/
async generateText(options: GenerateOptions): Promise<LLMResponse> {
// Use configuration contextWindow if provided, otherwise use model-specific value
let modelContextWindow = ANTHROPIC_CONTEXT_WINDOWS[this.model];
if (!modelContextWindow && this.options.contextWindow) {
modelContextWindow = this.options.contextWindow;
}

const { messages, functions, temperature = 0.7, maxTokens, topP } = options;

// Extract system message
Expand All @@ -227,63 +168,56 @@ export class AnthropicProvider implements LLMProvider {
})),
);

try {
const requestOptions: Anthropic.MessageCreateParams = {
model: this.model,
messages: addCacheControlToMessages(formattedMessages),
temperature,
max_tokens: maxTokens || 1024,
system: systemMessage?.content
? [
{
type: 'text',
text: systemMessage?.content,
cache_control: { type: 'ephemeral' },
},
]
: undefined,
top_p: topP,
tools,
stream: false,
};
const requestOptions: Anthropic.MessageCreateParams = {
model: this.model,
messages: addCacheControlToMessages(formattedMessages),
temperature,
max_tokens: maxTokens || 1024,
system: systemMessage?.content
? [
{
type: 'text',
text: systemMessage?.content,
cache_control: { type: 'ephemeral' },
},
]
: undefined,
top_p: topP,
tools,
stream: false,
};

const response = await this.client.messages.create(requestOptions);
const response = await this.client.messages.create(requestOptions);

// Extract content and tool calls
const content =
response.content.find((c) => c.type === 'text')?.text || '';
const toolCalls = response.content
.filter((c) => {
const contentType = c.type;
return contentType === 'tool_use';
})
.map((c) => {
const toolUse = c as Anthropic.Messages.ToolUseBlock;
return {
id: toolUse.id,
name: toolUse.name,
content: JSON.stringify(toolUse.input),
};
});
// Extract content and tool calls
const content = response.content.find((c) => c.type === 'text')?.text || '';
const toolCalls = response.content
.filter((c) => {
const contentType = c.type;
return contentType === 'tool_use';
})
.map((c) => {
const toolUse = c as Anthropic.Messages.ToolUseBlock;
return {
id: toolUse.id,
name: toolUse.name,
content: JSON.stringify(toolUse.input),
};
});

const tokenInfo = tokenUsageFromMessage(
response,
this.model,
this.modelContextWindow,
);
const tokenInfo = tokenUsageFromMessage(
response,
this.model,
modelContextWindow,
);

return {
text: content,
toolCalls: toolCalls,
tokenUsage: tokenInfo.usage,
totalTokens: tokenInfo.totalTokens,
maxTokens: tokenInfo.maxTokens,
};
} catch (error) {
throw new Error(
`Error calling Anthropic API: ${(error as Error).message}`,
);
}
return {
text: content,
toolCalls: toolCalls,
tokenUsage: tokenInfo.usage,
totalTokens: tokenInfo.totalTokens,
contextWindow: tokenInfo.contextWindow,
};
}

/**
Expand Down
24 changes: 16 additions & 8 deletions packages/agent/src/core/llm/providers/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ import {

// Define model context window sizes for Ollama models
// These are approximate and may vary based on specific model configurations
const OLLAMA_MODEL_LIMITS: Record<string, number> = {
default: 4096,
const OLLAMA_CONTEXT_WINDOWS: Record<string, number> = {
llama2: 4096,
'llama2-uncensored': 4096,
'llama2:13b': 4096,
Expand Down Expand Up @@ -53,10 +52,12 @@ export class OllamaProvider implements LLMProvider {
name: string = 'ollama';
provider: string = 'ollama.chat';
model: string;
options: OllamaOptions;
private client: Ollama;

constructor(model: string, options: OllamaOptions = {}) {
this.model = model;
this.options = options;
const baseUrl =
options.baseUrl ||
process.env.OLLAMA_BASE_URL ||
Expand Down Expand Up @@ -136,19 +137,26 @@ export class OllamaProvider implements LLMProvider {
const totalTokens = tokenUsage.input + tokenUsage.output;

// Extract the base model name without specific parameters
const baseModelName = this.model.split(':')[0];
// Check if model exists in limits, otherwise use base model or default
const modelMaxTokens =
OLLAMA_MODEL_LIMITS[this.model] ||
(baseModelName ? OLLAMA_MODEL_LIMITS[baseModelName] : undefined) ||
4096; // Default fallback
let contextWindow = OLLAMA_CONTEXT_WINDOWS[this.model];
if (!contextWindow) {
const baseModelName = this.model.split(':')[0];
if (baseModelName) {
contextWindow = OLLAMA_CONTEXT_WINDOWS[baseModelName];
}

// If still no context window, use the one from configuration if available
if (!contextWindow && this.options.contextWindow) {
contextWindow = this.options.contextWindow;
}
}

return {
text: content,
toolCalls: toolCalls,
tokenUsage: tokenUsage,
totalTokens,
maxTokens: modelMaxTokens,
contextWindow,
};
}

Expand Down
14 changes: 10 additions & 4 deletions packages/agent/src/core/llm/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ import type {
} from 'openai/resources/chat';

// Define model context window sizes for OpenAI models
const OPENAI_MODEL_LIMITS: Record<string, number> = {
default: 128000,
const OPENA_CONTEXT_WINDOWS: Record<string, number> = {
'o3-mini': 200000,
'o1-pro': 200000,
o1: 200000,
Expand Down Expand Up @@ -52,13 +51,15 @@ export class OpenAIProvider implements LLMProvider {
name: string = 'openai';
provider: string = 'openai.chat';
model: string;
options: OpenAIOptions;
private client: OpenAI;
private apiKey: string;
private baseUrl?: string;
private organization?: string;

constructor(model: string, options: OpenAIOptions = {}) {
this.model = model;
this.options = options;
this.apiKey = options.apiKey ?? '';
this.baseUrl = options.baseUrl;

Expand Down Expand Up @@ -136,14 +137,19 @@ export class OpenAIProvider implements LLMProvider {

// Calculate total tokens and get max tokens for the model
const totalTokens = tokenUsage.input + tokenUsage.output;
const modelMaxTokens = OPENAI_MODEL_LIMITS[this.model] || 8192; // Default fallback

// Use configuration contextWindow if provided, otherwise use model-specific value
let contextWindow = OPENA_CONTEXT_WINDOWS[this.model];
if (!contextWindow && this.options.contextWindow) {
contextWindow = this.options.contextWindow;
}

return {
text: content,
toolCalls,
tokenUsage,
totalTokens,
maxTokens: modelMaxTokens,
contextWindow,
};
} catch (error) {
throw new Error(`Error calling OpenAI API: ${(error as Error).message}`);
Expand Down
3 changes: 2 additions & 1 deletion packages/agent/src/core/llm/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
tokenUsage: TokenUsage;
// Add new fields for context window tracking
totalTokens?: number; // Total tokens used in this request
maxTokens?: number; // Maximum allowed tokens for this model
contextWindow?: number; // Maximum allowed tokens for this model
}

/**
Expand All @@ -107,5 +107,6 @@
apiKey?: string;
baseUrl?: string;
organization?: string;
contextWindow?: number; // Manual override for context window size
[key: string]: any; // Allow for provider-specific options

Check warning on line 111 in packages/agent/src/core/llm/types.ts

View workflow job for this annotation

GitHub Actions / ci

Unexpected any. Specify a different type
}
Loading
Loading