Skip to content

Commit 43ed372

Browse files
authored
feat: improve task model tool calling (#1755)
* feat: improve task model tool calling * feat(prod): add tokenizer for Hermes-3-Llama-3.1-8B to task configuration
1 parent 301ac8d commit 43ed372

File tree

3 files changed

+16
-9
lines changed

3 files changed

+16
-9
lines changed

chart/env/prod.yaml

+10-4
Original file line numberDiff line numberDiff line change
@@ -375,11 +375,17 @@ envVars:
375375
]
376376
},
377377
{
378-
"name": "meta-llama/Llama-3.1-8B-Instruct",
379-
"tools": true,
378+
"name": "internal/task",
379+
"tokenizer" : "NousResearch/Hermes-3-Llama-3.1-8B",
380380
"unlisted": true,
381+
"tools" : true,
382+
"endpoints": [
383+
{
384+
"type": "openai",
385+
"baseURL": "https://internal.api-inference.huggingface.co/models/NousResearch/Hermes-3-Llama-3.1-8B/v1"
386+
}
387+
],
381388
"parameters": {
382-
"stop": ["<|eot_id|>", "<|im_end|>"],
383389
"temperature": 0.1,
384390
"max_new_tokens": 256
385391
}
@@ -456,7 +462,7 @@ envVars:
456462
PUBLIC_APP_DISCLAIMER: 1
457463
PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
458464
REQUIRE_FEATURED_ASSISTANTS: "true"
459-
TASK_MODEL: "meta-llama/Llama-3.1-8B-Instruct"
465+
TASK_MODEL: "internal/task"
460466
TEXT_EMBEDDING_MODELS: >
461467
[{
462468
"name": "bge-base-en-v1-5-sxa",

src/lib/server/textGeneration/reasoning.ts

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
1515
];
1616

1717
const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
18-
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
18+
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The sentence must be very short, ideally 5 words or less.`;
1919

2020
if (smallModel.tools) {
2121
const summaryTool = {
@@ -25,7 +25,8 @@ The text might be incomplete, try your best to summarize it in one very short se
2525
{
2626
name: "summary",
2727
type: "str",
28-
description: "The short summary of the reasoning steps",
28+
description:
29+
"The short summary of the reasoning steps. 5 words or less. Must start with a gerund.",
2930
paramType: "required",
3031
},
3132
],

src/lib/server/textGeneration/title.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ export async function generateTitle(prompt: string) {
4343
name: "title",
4444
type: "str",
4545
description:
46-
"The title for the conversation. It should be a single short sentence of four words or less and start with a unicode emoji relevant to the conversation.",
46+
"The title for the conversation. It should be 5 words or less and start with a unicode emoji relevant to the query.",
4747
},
4848
],
4949
} as unknown as Tool;
@@ -57,7 +57,7 @@ export async function generateTitle(prompt: string) {
5757
},
5858
],
5959
preprompt:
60-
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
60+
"The task is to generate conversation titles based on text snippets. You'll never answer the provided question directly, but instead summarize the user's request into a short title.",
6161
tool: titleTool,
6262
endpoint,
6363
});
@@ -76,7 +76,7 @@ export async function generateTitle(prompt: string) {
7676
preprompt:
7777
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
7878
generateSettings: {
79-
max_new_tokens: 15,
79+
max_new_tokens: 30,
8080
},
8181
})
8282
)

0 commit comments

Comments
 (0)