From 48780e03b53293c2505e5e4f0dcd9e9151e2bbba Mon Sep 17 00:00:00 2001 From: Nathan Hedge <23344786+10Nates@users.noreply.github.com> Date: Sun, 6 Oct 2024 20:41:23 -0500 Subject: [PATCH] Improved error handling, max context window no longer hardcoded, aligning defaults with current advancements in LLMS --- README.md | 3 +- package.json | 21 ++++++------- src/extension.ts | 80 ++++++++++++++++++++++++++---------------------- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index db9a64d..3292803 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ A simple to use Ollama autocompletion engine with options exposed and streaming - Ollama must be serving on the API endpoint applied in settings - For installation of Ollama, visit [ollama.ai](https://ollama.ai) -- Ollama must have the model applied in settings installed. The current default is `llama3-gradient:latest`. +- Ollama must have the `model` applied in settings installed. The current default is `qwen2.5-coder:latest`. +- The `prompt window size` should align with the maximum context window of the model. ## How to Use diff --git a/package.json b/package.json index 81385d1..7ce4b7d 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "ollama-autocoder", "displayName": "Ollama Autocoder", "description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality", - "version": "0.0.10", + "version": "0.1.0", "icon": "icon.png", "publisher": "10nates", "license": "MIT", @@ -53,7 +53,7 @@ }, "ollama-autocoder.model": { "type": "string", - "default": "llama3-gradient:latest", + "default": "qwen2.5-coder:latest", "description": "The model to use for generating completions" }, "ollama-autocoder.message header": { @@ -69,8 +69,8 @@ }, "ollama-autocoder.prompt window size": { "type": "integer", - "default": 10000, - "description": "The size of the prompt in characters. NOT tokens, so can be set 1.3-4x the max tokens of the model (varies significantly)." + "default": 131072, + "description": "The size of the prompt in characters. NOT tokens, so can be set 1.3-4x the max tokens of the model (varies significantly). If unsure, just use a 1:1 token size." }, "ollama-autocoder.completion keys": { "type": "string", @@ -118,14 +118,13 @@ "watch": "tsc -watch -p ./" }, "devDependencies": { - "@types/node": "^16.18.34", - "@types/vscode": "^1.73.0", - "@typescript-eslint/eslint-plugin": "^6.7.0", - "@typescript-eslint/parser": "^6.7.0", - "eslint": "^8.26.0", - "typescript": "^5.3.2" + "@types/node": "^22.7.4", + "@types/vscode": "^1.94.0", + "@typescript-eslint/eslint-plugin": "^8.8.0", + "@typescript-eslint/parser": "^8.8.0", + "typescript": "^5.6.2" }, "dependencies": { - "axios": "^1.6.2" + "axios": "^1.7.7" } } diff --git a/src/extension.ts b/src/extension.ts index 1d338f9..3588685 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -45,6 +45,14 @@ function messageHeaderSub(document: vscode.TextDocument) { return sub; } +function handleError(err: any) { + if (err.code === 'ERR_CANCELED') return; + + // Show an error message + vscode.window.showErrorMessage("Ollama Autocoder encountered an error: " + err.toString() + (err.code ? " (" + err.code + ")" : "")); + console.log(err); +} + // internal function for autocomplete, not directly exposed async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) { const document = textEditor.document; @@ -75,9 +83,9 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo progressCancellationToken.onCancellationRequested(cancelPost); vscode.workspace.onDidCloseTextDocument(cancelPost); }); - + const completeInput = messageHeaderSub(textEditor.document) + prompt; - + // Make a request to the ollama.ai REST API const response = await axios.post(apiEndpoint, { model: apiModel, // Change this to the model you want to use @@ -88,7 +96,7 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo num_predict: numPredict, temperature: apiTemperature, stop: ["```"], - num_ctx: Math.min(completeInput.length, 1_048_000) // Assumes absolute worst case of 1 char = 1 token + num_ctx: Math.min(completeInput.length, promptWindowSize) // Assumes absolute worst case of 1 char = 1 token } }, { cancelToken: axiosCancelToken, @@ -111,7 +119,7 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo // Get a completion from the response const completion: string = JSON.parse(d.toString()).response; // lastToken = completion; - + if (completion === "") { return; } @@ -154,11 +162,7 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo await finished; } catch (err: any) { - // Show an error message - vscode.window.showErrorMessage( - "Ollama encountered an error: " + err.message - ); - console.log(err); + handleError(err); } } ); @@ -176,38 +180,42 @@ async function provideCompletionItems(document: vscode.TextDocument, position: v // Wait before initializing Ollama to reduce compute usage if (responsePreview) await new Promise(resolve => setTimeout(resolve, responsePreviewDelay * 1000)); if (cancellationToken.isCancellationRequested) { - return [ item ]; + return [item]; } // Set the label & inset text to a shortened, non-stream response if (responsePreview) { - let prompt = document.getText(new vscode.Range(document.lineAt(0).range.start, position)); - prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length); - const completeInput = messageHeaderSub(document) + prompt; - - const response_preview = await axios.post(apiEndpoint, { - model: apiModel, // Change this to the model you want to use - prompt: completeInput, - stream: false, - raw: true, - options: { - num_predict: responsePreviewMaxTokens, // reduced compute max - temperature: apiTemperature, - stop: ['\n', '```'], - num_ctx: Math.min(completeInput.length, 1_048_000) // Assumes absolute worst case of 1 char = 1 token + try { + let prompt = document.getText(new vscode.Range(document.lineAt(0).range.start, position)); + prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length); + const completeInput = messageHeaderSub(document) + prompt; + + const response_preview = await axios.post(apiEndpoint, { + model: apiModel, // Change this to the model you want to use + prompt: completeInput, + stream: false, + raw: true, + options: { + num_predict: responsePreviewMaxTokens, // reduced compute max + temperature: apiTemperature, + stop: ['\n', '```'], + num_ctx: Math.min(completeInput.length, promptWindowSize) // Assumes absolute worst case of 1 char = 1 token + } + }, { + cancelToken: new axios.CancelToken((c) => { + const cancelPost = function () { + c("Autocompletion request terminated by completion cancel"); + }; + cancellationToken.onCancellationRequested(cancelPost); + }) + }); + + if (response_preview.data.response.trim() != "") { // default if empty + item.label = response_preview.data.response.trimStart(); // tended to add whitespace at the beginning + item.insertText = response_preview.data.response.trimStart(); } - }, { - cancelToken: new axios.CancelToken((c) => { - const cancelPost = function () { - c("Autocompletion request terminated by completion cancel"); - }; - cancellationToken.onCancellationRequested(cancelPost); - }) - }); - - if (response_preview.data.response.trim() != "") { // default if empty - item.label = response_preview.data.response.trimStart(); // tended to add whitespace at the beginning - item.insertText = response_preview.data.response.trimStart(); + } catch (err: any) { + handleError(err); } }