feat: improve task model tool calling (#1755)

nsarrazin · web-flow · commit 43ed372e9e29 · 2025-03-12T16:06:59.000+01:00
* feat: improve task model tool calling

* feat(prod): add tokenizer for Hermes-3-Llama-3.1-8B to task configuration
diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml
@@ -375,11 +375,17 @@ envVars:
         ]
       },
       {
-        "name": "meta-llama/Llama-3.1-8B-Instruct",
-        "tools": true,
+        "name": "internal/task",
+        "tokenizer" : "NousResearch/Hermes-3-Llama-3.1-8B",
         "unlisted": true,
+        "tools" : true,
+        "endpoints": [
+          {
+            "type": "openai",
+            "baseURL": "https://internal.api-inference.huggingface.co/models/NousResearch/Hermes-3-Llama-3.1-8B/v1"
+          }
+        ],
         "parameters": {
-          "stop": ["<|eot_id|>", "<|im_end|>"],
           "temperature": 0.1,
           "max_new_tokens": 256
         }
@@ -456,7 +462,7 @@ envVars:
   PUBLIC_APP_DISCLAIMER: 1
   PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
   REQUIRE_FEATURED_ASSISTANTS: "true"
-  TASK_MODEL: "meta-llama/Llama-3.1-8B-Instruct"
+  TASK_MODEL: "internal/task"
   TEXT_EMBEDDING_MODELS: >
     [{
       "name": "bge-base-en-v1-5-sxa",
diff --git a/src/lib/server/textGeneration/reasoning.ts b/src/lib/server/textGeneration/reasoning.ts
@@ -15,7 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
 	];
 
 	const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
-The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
+The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The sentence must be very short, ideally 5 words or less.`;
 
 	if (smallModel.tools) {
 		const summaryTool = {
@@ -25,7 +25,8 @@ The text might be incomplete, try your best to summarize it in one very short se
 				{
 					name: "summary",
 					type: "str",
-					description: "The short summary of the reasoning steps",
+					description:
+						"The short summary of the reasoning steps. 5 words or less. Must start with a gerund.",
 					paramType: "required",
 				},
 			],
diff --git a/src/lib/server/textGeneration/title.ts b/src/lib/server/textGeneration/title.ts
@@ -43,7 +43,7 @@ export async function generateTitle(prompt: string) {
 					name: "title",
 					type: "str",
 					description:
-						"The title for the conversation. It should be a single short sentence of four words or less and start with a unicode emoji relevant to the conversation.",
+						"The title for the conversation. It should be 5 words or less and start with a unicode emoji relevant to the query.",
 				},
 			],
 		} as unknown as Tool;
@@ -57,7 +57,7 @@ export async function generateTitle(prompt: string) {
 				},
 			],
 			preprompt:
-				"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
+				"The task is to generate conversation titles based on text snippets. You'll never answer the provided question directly, but instead summarize the user's request into a short title.",
 			tool: titleTool,
 			endpoint,
 		});
@@ -76,7 +76,7 @@ export async function generateTitle(prompt: string) {
 			preprompt:
 				"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
 			generateSettings: {
-				max_new_tokens: 15,
+				max_new_tokens: 30,
 			},
 		})
 	)