Skip to content

Commit

Permalink
Improved error handling, max context window no longer hardcoded, alig…
Browse files Browse the repository at this point in the history
…ning defaults with current advancements in LLMS
  • Loading branch information
10Nates committed Oct 7, 2024
1 parent cc9e60f commit 48780e0
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 48 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ A simple to use Ollama autocompletion engine with options exposed and streaming

- Ollama must be serving on the API endpoint applied in settings
- For installation of Ollama, visit [ollama.ai](https://ollama.ai)
- Ollama must have the model applied in settings installed. The current default is `llama3-gradient:latest`.
- Ollama must have the `model` applied in settings installed. The current default is `qwen2.5-coder:latest`.
- The `prompt window size` should align with the maximum context window of the model.

## How to Use

Expand Down
21 changes: 10 additions & 11 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "ollama-autocoder",
"displayName": "Ollama Autocoder",
"description": "A simple to use Ollama autocompletion engine with options exposed and streaming functionality",
"version": "0.0.10",
"version": "0.1.0",
"icon": "icon.png",
"publisher": "10nates",
"license": "MIT",
Expand Down Expand Up @@ -53,7 +53,7 @@
},
"ollama-autocoder.model": {
"type": "string",
"default": "llama3-gradient:latest",
"default": "qwen2.5-coder:latest",
"description": "The model to use for generating completions"
},
"ollama-autocoder.message header": {
Expand All @@ -69,8 +69,8 @@
},
"ollama-autocoder.prompt window size": {
"type": "integer",
"default": 10000,
"description": "The size of the prompt in characters. NOT tokens, so can be set 1.3-4x the max tokens of the model (varies significantly)."
"default": 131072,
"description": "The size of the prompt in characters. NOT tokens, so can be set 1.3-4x the max tokens of the model (varies significantly). If unsure, just use a 1:1 token size."
},
"ollama-autocoder.completion keys": {
"type": "string",
Expand Down Expand Up @@ -118,14 +118,13 @@
"watch": "tsc -watch -p ./"
},
"devDependencies": {
"@types/node": "^16.18.34",
"@types/vscode": "^1.73.0",
"@typescript-eslint/eslint-plugin": "^6.7.0",
"@typescript-eslint/parser": "^6.7.0",
"eslint": "^8.26.0",
"typescript": "^5.3.2"
"@types/node": "^22.7.4",
"@types/vscode": "^1.94.0",
"@typescript-eslint/eslint-plugin": "^8.8.0",
"@typescript-eslint/parser": "^8.8.0",
"typescript": "^5.6.2"
},
"dependencies": {
"axios": "^1.6.2"
"axios": "^1.7.7"
}
}
80 changes: 44 additions & 36 deletions src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ function messageHeaderSub(document: vscode.TextDocument) {
return sub;
}

function handleError(err: any) {
if (err.code === 'ERR_CANCELED') return;

// Show an error message
vscode.window.showErrorMessage("Ollama Autocoder encountered an error: " + err.toString() + (err.code ? " (" + err.code + ")" : ""));
console.log(err);
}

// internal function for autocomplete, not directly exposed
async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationToken?: vscode.CancellationToken) {
const document = textEditor.document;
Expand Down Expand Up @@ -75,9 +83,9 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
progressCancellationToken.onCancellationRequested(cancelPost);
vscode.workspace.onDidCloseTextDocument(cancelPost);
});

const completeInput = messageHeaderSub(textEditor.document) + prompt;

// Make a request to the ollama.ai REST API
const response = await axios.post(apiEndpoint, {
model: apiModel, // Change this to the model you want to use
Expand All @@ -88,7 +96,7 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
num_predict: numPredict,
temperature: apiTemperature,
stop: ["```"],
num_ctx: Math.min(completeInput.length, 1_048_000) // Assumes absolute worst case of 1 char = 1 token
num_ctx: Math.min(completeInput.length, promptWindowSize) // Assumes absolute worst case of 1 char = 1 token
}
}, {
cancelToken: axiosCancelToken,
Expand All @@ -111,7 +119,7 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
// Get a completion from the response
const completion: string = JSON.parse(d.toString()).response;
// lastToken = completion;

if (completion === "") {
return;
}
Expand Down Expand Up @@ -154,11 +162,7 @@ async function autocompleteCommand(textEditor: vscode.TextEditor, cancellationTo
await finished;

} catch (err: any) {
// Show an error message
vscode.window.showErrorMessage(
"Ollama encountered an error: " + err.message
);
console.log(err);
handleError(err);
}
}
);
Expand All @@ -176,38 +180,42 @@ async function provideCompletionItems(document: vscode.TextDocument, position: v
// Wait before initializing Ollama to reduce compute usage
if (responsePreview) await new Promise(resolve => setTimeout(resolve, responsePreviewDelay * 1000));
if (cancellationToken.isCancellationRequested) {
return [ item ];
return [item];
}

// Set the label & inset text to a shortened, non-stream response
if (responsePreview) {
let prompt = document.getText(new vscode.Range(document.lineAt(0).range.start, position));
prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
const completeInput = messageHeaderSub(document) + prompt;

const response_preview = await axios.post(apiEndpoint, {
model: apiModel, // Change this to the model you want to use
prompt: completeInput,
stream: false,
raw: true,
options: {
num_predict: responsePreviewMaxTokens, // reduced compute max
temperature: apiTemperature,
stop: ['\n', '```'],
num_ctx: Math.min(completeInput.length, 1_048_000) // Assumes absolute worst case of 1 char = 1 token
try {
let prompt = document.getText(new vscode.Range(document.lineAt(0).range.start, position));
prompt = prompt.substring(Math.max(0, prompt.length - promptWindowSize), prompt.length);
const completeInput = messageHeaderSub(document) + prompt;

const response_preview = await axios.post(apiEndpoint, {
model: apiModel, // Change this to the model you want to use
prompt: completeInput,
stream: false,
raw: true,
options: {
num_predict: responsePreviewMaxTokens, // reduced compute max
temperature: apiTemperature,
stop: ['\n', '```'],
num_ctx: Math.min(completeInput.length, promptWindowSize) // Assumes absolute worst case of 1 char = 1 token
}
}, {
cancelToken: new axios.CancelToken((c) => {
const cancelPost = function () {
c("Autocompletion request terminated by completion cancel");
};
cancellationToken.onCancellationRequested(cancelPost);
})
});

if (response_preview.data.response.trim() != "") { // default if empty
item.label = response_preview.data.response.trimStart(); // tended to add whitespace at the beginning
item.insertText = response_preview.data.response.trimStart();
}
}, {
cancelToken: new axios.CancelToken((c) => {
const cancelPost = function () {
c("Autocompletion request terminated by completion cancel");
};
cancellationToken.onCancellationRequested(cancelPost);
})
});

if (response_preview.data.response.trim() != "") { // default if empty
item.label = response_preview.data.response.trimStart(); // tended to add whitespace at the beginning
item.insertText = response_preview.data.response.trimStart();
} catch (err: any) {
handleError(err);
}
}

Expand Down

0 comments on commit 48780e0

Please sign in to comment.